In [1]:
import numpy as np 
import pandas as pd 

**Creating a DataFrame**

In [2]:
data = {
    'Name': ['John', 'Anna', 'Peter', 'Linda'],
    'Age': [28, 34, 29, 42],
    'City': ['New York', 'Paris', 'Berlin', 'London'],
    'Salary': [65000, 70000, 62000, 85000]
}
df = pd.DataFrame(data)

In [3]:
df

Unnamed: 0,Name,Age,City,Salary
0,John,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


In [4]:
data_list = [
    ['John', 28, 'New York', 65000],
    ['Anna', 34, 'Paris', 70000],
    ['Peter', 29, 'Berlin', 62000],
    ['Linda', 42, 'London', 85000]
]
df2 = pd.DataFrame(data_list)
columns = ["Name", "Age", "City", "Salary"]
df2 = pd.DataFrame(data_list, columns=columns)
df2

Unnamed: 0,Name,Age,City,Salary
0,John,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


**Selection and Indexing of Columns**

In [5]:
df2

Unnamed: 0,Name,Age,City,Salary
0,John,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


In [6]:
df2['Name']

0     John
1     Anna
2    Peter
3    Linda
Name: Name, dtype: object

In [7]:
df2[['Name', 'City']]

Unnamed: 0,Name,City
0,John,New York
1,Anna,Paris
2,Peter,Berlin
3,Linda,London


**Creating a new column**

In [8]:
df2["Designation"] = ["Doctor", "Eng.", "Doctor", "Eng."]

In [9]:
df2

Unnamed: 0,Name,Age,City,Salary,Designation
0,John,28,New York,65000,Doctor
1,Anna,34,Paris,70000,Eng.
2,Peter,29,Berlin,62000,Doctor
3,Linda,42,London,85000,Eng.


**Removing Columns**

In [11]:
# If we don't put the inplace then it won't remove the selected data permanently, it just shows a snapshot of deleted data.
df2.drop("Designation", axis=1, inplace= True)

In [12]:
df2

Unnamed: 0,Name,Age,City,Salary
0,John,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


In [13]:
df2.drop(["Name", "City"], axis=1)

Unnamed: 0,Age,Salary
0,28,65000
1,34,70000
2,29,62000
3,42,85000


In [14]:
df2

Unnamed: 0,Name,Age,City,Salary
0,John,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


In [15]:
df2.drop(0)

Unnamed: 0,Name,Age,City,Salary
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


**Selecting Rows**

In [16]:
df2

Unnamed: 0,Name,Age,City,Salary
0,John,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


In [21]:
# loc -> [], not ()
df2.loc[0]

Name          John
Age             28
City      New York
Salary       65000
Name: 0, dtype: object

In [22]:
df2.loc[[0, 1]]

Unnamed: 0,Name,Age,City,Salary
0,John,28,New York,65000
1,Anna,34,Paris,70000


In [23]:
df2.iloc[3]

Name       Linda
Age           42
City      London
Salary     85000
Name: 3, dtype: object

**Selecting Subsets of Rows and Columns**

In [25]:
df.loc[[2, 3]][["Name", "Age"]]

Unnamed: 0,Name,Age
2,Peter,29
3,Linda,42


**Conditional Selection**

In [26]:
df2

Unnamed: 0,Name,Age,City,Salary
0,John,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


In [27]:
# I only want to see those people whose age is above 30.

In [28]:
df2[df2["Age"] > 30]

Unnamed: 0,Name,Age,City,Salary
1,Anna,34,Paris,70000
3,Linda,42,London,85000


In [33]:
# I only want people whose age is above 30 and their city must be paris

In [36]:
df2[(df2["Age"] > 30) & (df2["City"] == "Paris")]

Unnamed: 0,Name,Age,City,Salary
1,Anna,34,Paris,70000
