In [1]:
import numpy as np
import pandas as pd

In [6]:
data = {
    'Name': ['John', 'Anna', 'Peter', 'Linda'],
    'Age': [28, 34, 29, 42],
    'City': ['New York', 'Paris', 'Berlin', 'London'],
    'Salary': [65000, 70000, 62000, 85000]
}
df = pd.DataFrame(data)
print(df)

    Name  Age      City  Salary
0   John   28  New York   65000
1   Anna   34     Paris   70000
2  Peter   29    Berlin   62000
3  Linda   42    London   85000


In [12]:
data_lst = [
    ['John', 28, 'New York', 65000],
    ['Anna', 34, 'Paris', 70000],
    ['Peter', 29, 'Berlin', 62000],
    ['Linda', 42, 'London', 85000]
]

columns = ['Name', 'Age', 'City', 'Salary']
df2 = pd.DataFrame(data_lst, columns=columns) ## passing the columns list as labels for the dataframe
print(df2)

    Name  Age      City  Salary
0   John   28  New York   65000
1   Anna   34     Paris   70000
2  Peter   29    Berlin   62000
3  Linda   42    London   85000


In [13]:
## Seletion and indexing of columns

df2['Name']

0     John
1     Anna
2    Peter
3    Linda
Name: Name, dtype: object

In [14]:
df2['City']

0    New York
1       Paris
2      Berlin
3      London
Name: City, dtype: object

In [15]:
df2[['Name', 'City']]

Unnamed: 0,Name,City
0,John,New York
1,Anna,Paris
2,Peter,Berlin
3,Linda,London


In [16]:
df2['Designation'] = ['Doctor', 'Engineer', 'Doctor', 'Engineer']

In [17]:
df2

Unnamed: 0,Name,Age,City,Salary,Designation
0,John,28,New York,65000,Doctor
1,Anna,34,Paris,70000,Engineer
2,Peter,29,Berlin,62000,Doctor
3,Linda,42,London,85000,Engineer


In [None]:
## Removing a column
df2.drop('Designation', axis=1) ## this is axis by default 0 -> pandas will try dropping a row names Designation
## axis = 0 -> row-wise
## axis = 1 -> column-wise

## does not modify original array
## gives out a new DataFrame(copy-like object)
## df3 = df2.drop('Designation', axis=1)
## Changes in df3 will NOT reflect in df2.

Unnamed: 0,Name,Age,City,Salary
0,John,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


In [20]:
df2

Unnamed: 0,Name,Age,City,Salary,Designation
0,John,28,New York,65000,Doctor
1,Anna,34,Paris,70000,Engineer
2,Peter,29,Berlin,62000,Doctor
3,Linda,42,London,85000,Engineer


In [21]:
df2.drop('Designation', axis=1, inplace=True)
## inplace is used to modify the dataframe directly

In [22]:
df2

Unnamed: 0,Name,Age,City,Salary
0,John,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


In [23]:
df2.drop(0, axis = 0) ## drops rows 0

Unnamed: 0,Name,Age,City,Salary
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


In [25]:
## Seleting a row
df2.loc[0] ## loc is used for rows

Name          John
Age             28
City      New York
Salary       65000
Name: 0, dtype: object

In [29]:
df2.loc[[0 ,1]]

Unnamed: 0,Name,Age,City,Salary
0,John,28,New York,65000
1,Anna,34,Paris,70000


In [30]:
df2.iloc[3] ## index location

Name       Linda
Age           42
City      London
Salary     85000
Name: 3, dtype: object

In [31]:
## Selecting subsets of rows ans columns
## Extract city and salary of 0th and 1st row
df2.loc[[0, 1]][['City', 'Salary']]

Unnamed: 0,City,Salary
0,New York,65000
1,Paris,70000


In [None]:
## Extract Name and age pf 2 and 3

df2.loc[[2, 3]][['Name', 'Age']] ## order is always: rows -> columns

Unnamed: 0,Name,Age
2,Peter,29
3,Linda,42


In [34]:
## Conditional Selection

df2

Unnamed: 0,Name,Age,City,Salary
0,John,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


In [36]:
df2[df2['Age'] > 30]

Unnamed: 0,Name,Age,City,Salary
1,Anna,34,Paris,70000
3,Linda,42,London,85000


In [None]:
# df2[df2['Age'] > 30 & df2['City']=='Paris'] ## throws a error
df2[(df2['Age'] > 30) & (df2['City']=='Paris')]
## Note for multiple conditions passed to dataframe always enclose each condition in ()

Unnamed: 0,Name,Age,City,Salary
1,Anna,34,Paris,70000
