In [1]:
import numpy as np
import pandas as pd

### Creating a DataFrame

In [9]:
data = {
    'Name':['John','Anna','Peter','Linda'],
    'Age':[28,34,29,42],
    'City':['New York','Paris','Berlin','London'],
    'Salary':[65000,70000,62000,85000]
}
df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age,City,Salary
0,John,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


In [15]:
data_list = [
    ['John',28,'new york',65000],
    ['Anna',34,'Paris',70000],
    ['Peter',29,'Berlin',62000],
    ['Linda',42,'London',85000]
]
df2 = pd.DataFrame(data_list)
df2

Unnamed: 0,0,1,2,3
0,John,28,new york,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


In [16]:
columns = ['Name','Age','City','Salary']
df3 = pd.DataFrame(data_list,index = columns)
df3

Unnamed: 0,0,1,2,3
Name,John,28,new york,65000
Age,Anna,34,Paris,70000
City,Peter,29,Berlin,62000
Salary,Linda,42,London,85000


In [17]:
df2 = pd.DataFrame(data_list,columns = columns)
df2

Unnamed: 0,Name,Age,City,Salary
0,John,28,new york,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


In [18]:
df2['Name']

0     John
1     Anna
2    Peter
3    Linda
Name: Name, dtype: object

In [19]:
df2[['Name','City']]

Unnamed: 0,Name,City
0,John,new york
1,Anna,Paris
2,Peter,Berlin
3,Linda,London


### Creating a new Column

In [20]:
df2['Designation'] = ['Doctor','Engineer','Doctor','Engineer']

In [21]:
df2

Unnamed: 0,Name,Age,City,Salary,Designation
0,John,28,new york,65000,Doctor
1,Anna,34,Paris,70000,Engineer
2,Peter,29,Berlin,62000,Doctor
3,Linda,42,London,85000,Engineer


### Removing a Column

In [22]:
# by default axis = 0, means index, but for column we need axis = 1
df2.drop('Designation',axis = 1)

Unnamed: 0,Name,Age,City,Salary
0,John,28,new york,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


In [23]:
df2

Unnamed: 0,Name,Age,City,Salary,Designation
0,John,28,new york,65000,Doctor
1,Anna,34,Paris,70000,Engineer
2,Peter,29,Berlin,62000,Doctor
3,Linda,42,London,85000,Engineer


In [24]:
# doesn't modify the dataframe, so apply inplace = True
df2.drop('Designation',axis = 1, inplace = True)

In [25]:
df2

Unnamed: 0,Name,Age,City,Salary
0,John,28,new york,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


In [26]:
df2.drop(['City',"Salary"],axis=1)

Unnamed: 0,Name,Age
0,John,28
1,Anna,34
2,Peter,29
3,Linda,42


### Deleting a Row

In [27]:
df2

Unnamed: 0,Name,Age,City,Salary
0,John,28,new york,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


In [28]:
df.drop(0,axis = 0)

Unnamed: 0,Name,Age,City,Salary
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


### Selecting Rows

df2.loc[0]

In [32]:
df2.loc[[0,1]]

Unnamed: 0,Name,Age,City,Salary
0,John,28,new york,65000
1,Anna,34,Paris,70000


In [34]:
df2.iloc[3]

Name       Linda
Age           42
City      London
Salary     85000
Name: 3, dtype: object

### Selecting Subsets of Rows and Columns

In [37]:
df2.loc[[0,1]][["City","Salary"]]

Unnamed: 0,City,Salary
0,new york,65000
1,Paris,70000


### Conditional Selection

In [38]:
df2

Unnamed: 0,Name,Age,City,Salary
0,John,28,new york,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


In [39]:
# i only want to see people whose age > 30
df2[df2['Age'] > 30]

Unnamed: 0,Name,Age,City,Salary
1,Anna,34,Paris,70000
3,Linda,42,London,85000


In [41]:
# i only want to see people whose age is above 30 and their city must be paris
df2[ (df2['Age'] > 30) & (df2['City'] == 'Paris') ]

Unnamed: 0,Name,Age,City,Salary
1,Anna,34,Paris,70000
