In [1]:
import pandas as pd

# 1. Createing a dataframe

In [2]:
df = pd.DataFrame(columns=["age", "name", "id"])
df["age"] = [32, 26, 20, 24, 19, 31]
df["name"] = ["ram", "sita", "sab", "raj", "priya", "Tom"]
df["id"] = [2, 5, 8, 1, 25, 3]
df["job"] = ['01-01-1988', '02-11-1994', '04-10-2000', 
             '31-13-1996', '02-06-2001', '07-03-1989']

df

Unnamed: 0,age,name,id,job
0,32,ram,2,01-01-1988
1,26,sita,5,02-11-1994
2,20,sab,8,04-10-2000
3,24,raj,1,31-13-1996
4,19,priya,25,02-06-2001
5,31,Tom,3,07-03-1989


# 2. head - tails 

In [4]:
display("top rows", df.head(), "bottom rows", df.tail())

'top rows'

Unnamed: 0,age,name,id,job
0,32,ram,2,01-01-1988
1,26,sita,5,02-11-1994
2,20,sab,8,04-10-2000
3,24,raj,1,31-13-1996
4,19,priya,25,02-06-2001


'bottom rows'

Unnamed: 0,age,name,id,job
1,26,sita,5,02-11-1994
2,20,sab,8,04-10-2000
3,24,raj,1,31-13-1996
4,19,priya,25,02-06-2001
5,31,Tom,3,07-03-1989


# 3. Selecting a contiguous portion of df 

In [6]:
print("showing 2 columns and 3 rows stating from the 2nd column 2nd row")

df_new = df.iloc[1:4, 1:3]
df_new

showing 2 columns and 3 rows stating from the 2nd column 2nd row


Unnamed: 0,name,id
1,sita,5
2,sab,8
3,raj,1


# 4. Selecting some columns

In [7]:
print("showing id and age")
df_new = df[["id", "age"]]
df_new

showing id and age


Unnamed: 0,id,age
0,2,32
1,5,26
2,8,20
3,1,24
4,25,19
5,3,31


# 5. Renaming columns

In [10]:
df.rename({"id": "col 1", "name" : "col 2"}, 
                  axis="columns",
                   inplace=True) # one by one

display(df)
df.columns = ["age", "name", "id", "dob"] 
display(df)


Unnamed: 0,age,col 2,col 1,job
0,32,ram,2,01-01-1988
1,26,sita,5,02-11-1994
2,20,sab,8,04-10-2000
3,24,raj,1,31-13-1996
4,19,priya,25,02-06-2001
5,31,Tom,3,07-03-1989


Unnamed: 0,age,name,id,dob
0,32,ram,2,01-01-1988
1,26,sita,5,02-11-1994
2,20,sab,8,04-10-2000
3,24,raj,1,31-13-1996
4,19,priya,25,02-06-2001
5,31,Tom,3,07-03-1989


# 6. splitting a string column

In [12]:
df[["day", "month", "year"]] = df["dob"].str.split("-", expand=True)
df

Unnamed: 0,age,name,id,dob,day,month,year
0,32,ram,2,01-01-1988,1,1,1988
1,26,sita,5,02-11-1994,2,11,1994
2,20,sab,8,04-10-2000,4,10,2000
3,24,raj,1,31-13-1996,31,13,1996
4,19,priya,25,02-06-2001,2,6,2001
5,31,Tom,3,07-03-1989,7,3,1989


# 7. Concatenation string columns

In [14]:
df["mod_date"] = df["month"] + "-" + df["day"] + "-" + df["year"]
df

Unnamed: 0,age,name,id,dob,day,month,year,mod_date
0,32,ram,2,01-01-1988,1,1,1988,01-01-1988
1,26,sita,5,02-11-1994,2,11,1994,11-02-1994
2,20,sab,8,04-10-2000,4,10,2000,10-04-2000
3,24,raj,1,31-13-1996,31,13,1996,13-31-1996
4,19,priya,25,02-06-2001,2,6,2001,06-02-2001
5,31,Tom,3,07-03-1989,7,3,1989,03-07-1989


# 8. Filtering with multiple criteria

In [15]:
df_new = df[
    (df["age"] < 25) &
    (df["id"] >= 3)
]

df_new

Unnamed: 0,age,name,id,dob,day,month,year,mod_date
2,20,sab,8,04-10-2000,4,10,2000,10-04-2000
4,19,priya,25,02-06-2001,2,6,2001,06-02-2001


# 9. One hot encoding

In [16]:
df_new = pd.get_dummies(df, prefix="mo", 
                        prefix_sep="_", 
                        columns=["month"],
                        drop_first=True
                       )
df_new

Unnamed: 0,age,name,id,dob,day,year,mod_date,mo_03,mo_06,mo_10,mo_11,mo_13
0,32,ram,2,01-01-1988,1,1988,01-01-1988,False,False,False,False,False
1,26,sita,5,02-11-1994,2,1994,11-02-1994,False,False,False,True,False
2,20,sab,8,04-10-2000,4,2000,10-04-2000,False,False,True,False,False
3,24,raj,1,31-13-1996,31,1996,13-31-1996,False,False,False,False,True
4,19,priya,25,02-06-2001,2,2001,06-02-2001,False,True,False,False,False
5,31,Tom,3,07-03-1989,7,1989,03-07-1989,True,False,False,False,False


# 10. Continuous variable to categorical

In [17]:
df["cat_age"] = pd.cut(df["age"].values, 
                      bins=[0,30,60],
                      labels=["20's", "30's"])

df

Unnamed: 0,age,name,id,dob,day,month,year,mod_date,cat_age
0,32,ram,2,01-01-1988,1,1,1988,01-01-1988,30's
1,26,sita,5,02-11-1994,2,11,1994,11-02-1994,20's
2,20,sab,8,04-10-2000,4,10,2000,10-04-2000,20's
3,24,raj,1,31-13-1996,31,13,1996,13-31-1996,20's
4,19,priya,25,02-06-2001,2,6,2001,06-02-2001,20's
5,31,Tom,3,07-03-1989,7,3,1989,03-07-1989,30's


# 11. apply() method

In [18]:
df["name"] = df["name"].apply(lambda x: x.upper())
df

Unnamed: 0,age,name,id,dob,day,month,year,mod_date,cat_age
0,32,RAM,2,01-01-1988,1,1,1988,01-01-1988,30's
1,26,SITA,5,02-11-1994,2,11,1994,11-02-1994,20's
2,20,SAB,8,04-10-2000,4,10,2000,10-04-2000,20's
3,24,RAJ,1,31-13-1996,31,13,1996,13-31-1996,20's
4,19,PRIYA,25,02-06-2001,2,6,2001,06-02-2001,20's
5,31,TOM,3,07-03-1989,7,3,1989,03-07-1989,30's


In [20]:
df["odd_even"] = df["id"].apply(lambda x : 0 if x%2==0 else 1)
df

Unnamed: 0,age,name,id,dob,day,month,year,mod_date,cat_age,odd_even
0,32,RAM,2,01-01-1988,1,1,1988,01-01-1988,30's,0
1,26,SITA,5,02-11-1994,2,11,1994,11-02-1994,20's,1
2,20,SAB,8,04-10-2000,4,10,2000,10-04-2000,20's,0
3,24,RAJ,1,31-13-1996,31,13,1996,13-31-1996,20's,1
4,19,PRIYA,25,02-06-2001,2,6,2001,06-02-2001,20's,1
5,31,TOM,3,07-03-1989,7,3,1989,03-07-1989,30's,1
