In [1]:
import pandas as pd

## *1. creating a df*

In [2]:
df = pd.DataFrame(columns=["age", "name", "id"])
df["age"] = [32, 26, 20, 24, 19, 31]
df["name"] = ["Joey", "Mark", "Raj", "Priya", "Sheldon", "Tom"]
df["id"] = [2, 5, 8, 1, 25, 3]
df["dob"] = ['01-01-1988', '02-11-1994', '04-10-2000', 
             '31-12-1996', '02-06-2001', '07-03-1989']
df

Unnamed: 0,age,name,id,dob
0,32,Joey,2,01-01-1988
1,26,Mark,5,02-11-1994
2,20,Raj,8,04-10-2000
3,24,Priya,1,31-12-1996
4,19,Sheldon,25,02-06-2001
5,31,Tom,3,07-03-1989


## *2. head-tail*

In [3]:
print("top rows")
print(df.head(3))
print("bottom rows")
print(df.tail(3))

top rows
   age  name  id         dob
0   32  Joey   2  01-01-1988
1   26  Mark   5  02-11-1994
2   20   Raj   8  04-10-2000
bottom rows
   age     name  id         dob
3   24    Priya   1  31-12-1996
4   19  Sheldon  25  02-06-2001
5   31      Tom   3  07-03-1989


## *3. selecting a contiguous portion of df* 

In [4]:
print("showing 2 columns and 3 rows starting from the 2nd column 2nd row")
df_new = df.iloc[1:4, 1:3]
df_new

showing 2 columns and 3 rows starting from the 2nd column 2nd row


Unnamed: 0,name,id
1,Mark,5
2,Raj,8
3,Priya,1


## *4. selecting some columns*

In [5]:
print("showing id and age")
df_new = df[['id', 'age']]
df_new

showing id and age


Unnamed: 0,id,age
0,2,32
1,5,26
2,8,20
3,1,24
4,25,19
5,3,31


## *5. renaming columns*

In [6]:
df.rename({"id":"col_1", "name":"col_2"}, 
          axis = "columns", 
          inplace = True) # one by one
print(df)
df.columns = ["age", "name", "id", "dob"] # all at once
print(df)

   age    col_2  col_1         dob
0   32     Joey      2  01-01-1988
1   26     Mark      5  02-11-1994
2   20      Raj      8  04-10-2000
3   24    Priya      1  31-12-1996
4   19  Sheldon     25  02-06-2001
5   31      Tom      3  07-03-1989
   age     name  id         dob
0   32     Joey   2  01-01-1988
1   26     Mark   5  02-11-1994
2   20      Raj   8  04-10-2000
3   24    Priya   1  31-12-1996
4   19  Sheldon  25  02-06-2001
5   31      Tom   3  07-03-1989


## *6. splitting a string column*

In [7]:
df[["day", "month", "year"]] = df["dob"].str.split("-", expand = True)
df

Unnamed: 0,age,name,id,dob,day,month,year
0,32,Joey,2,01-01-1988,1,1,1988
1,26,Mark,5,02-11-1994,2,11,1994
2,20,Raj,8,04-10-2000,4,10,2000
3,24,Priya,1,31-12-1996,31,12,1996
4,19,Sheldon,25,02-06-2001,2,6,2001
5,31,Tom,3,07-03-1989,7,3,1989


## *7. concatenating string columns*

In [8]:
df["mod_date"] = df["month"]+'-'+df["day"]+'-'+df["year"]
df

Unnamed: 0,age,name,id,dob,day,month,year,mod_date
0,32,Joey,2,01-01-1988,1,1,1988,01-01-1988
1,26,Mark,5,02-11-1994,2,11,1994,11-02-1994
2,20,Raj,8,04-10-2000,4,10,2000,10-04-2000
3,24,Priya,1,31-12-1996,31,12,1996,12-31-1996
4,19,Sheldon,25,02-06-2001,2,6,2001,06-02-2001
5,31,Tom,3,07-03-1989,7,3,1989,03-07-1989


## *8. filtering with multiple criteria*

In [9]:
df_new = df[
            (df["age"] <25) & 
            (df["id"] >=3)
        ]
df_new

Unnamed: 0,age,name,id,dob,day,month,year,mod_date
2,20,Raj,8,04-10-2000,4,10,2000,10-04-2000
4,19,Sheldon,25,02-06-2001,2,6,2001,06-02-2001


## *9. one hot encoding*

In [10]:
df_new = pd.get_dummies(df, 
                        prefix="mo", 
                        prefix_sep='_', 
                        columns=["month"], 
                        drop_first=True) 
df_new

Unnamed: 0,age,name,id,dob,day,year,mod_date,mo_03,mo_06,mo_10,mo_11,mo_12
0,32,Joey,2,01-01-1988,1,1988,01-01-1988,0,0,0,0,0
1,26,Mark,5,02-11-1994,2,1994,11-02-1994,0,0,0,1,0
2,20,Raj,8,04-10-2000,4,2000,10-04-2000,0,0,1,0,0
3,24,Priya,1,31-12-1996,31,1996,12-31-1996,0,0,0,0,1
4,19,Sheldon,25,02-06-2001,2,2001,06-02-2001,0,1,0,0,0
5,31,Tom,3,07-03-1989,7,1989,03-07-1989,1,0,0,0,0


## *10. continuos variable to categorical*

In [11]:
df["cat_age"] = pd.cut(df["age"].values, 
                       bins = [0, 30, 60], 
                       labels = ["20's", "30's"])
df

Unnamed: 0,age,name,id,dob,day,month,year,mod_date,cat_age
0,32,Joey,2,01-01-1988,1,1,1988,01-01-1988,30's
1,26,Mark,5,02-11-1994,2,11,1994,11-02-1994,20's
2,20,Raj,8,04-10-2000,4,10,2000,10-04-2000,20's
3,24,Priya,1,31-12-1996,31,12,1996,12-31-1996,20's
4,19,Sheldon,25,02-06-2001,2,6,2001,06-02-2001,20's
5,31,Tom,3,07-03-1989,7,3,1989,03-07-1989,30's


## *11. apply( ) methood*

In [12]:
df["name"] = df["name"].apply(lambda x : x.upper())
df

Unnamed: 0,age,name,id,dob,day,month,year,mod_date,cat_age
0,32,JOEY,2,01-01-1988,1,1,1988,01-01-1988,30's
1,26,MARK,5,02-11-1994,2,11,1994,11-02-1994,20's
2,20,RAJ,8,04-10-2000,4,10,2000,10-04-2000,20's
3,24,PRIYA,1,31-12-1996,31,12,1996,12-31-1996,20's
4,19,SHELDON,25,02-06-2001,2,6,2001,06-02-2001,20's
5,31,TOM,3,07-03-1989,7,3,1989,03-07-1989,30's


In [13]:
df["even_odd"] = df["id"].apply(lambda x : 0 if x%2==0 else 1)
df

Unnamed: 0,age,name,id,dob,day,month,year,mod_date,cat_age,even_odd
0,32,JOEY,2,01-01-1988,1,1,1988,01-01-1988,30's,0
1,26,MARK,5,02-11-1994,2,11,1994,11-02-1994,20's,1
2,20,RAJ,8,04-10-2000,4,10,2000,10-04-2000,20's,0
3,24,PRIYA,1,31-12-1996,31,12,1996,12-31-1996,20's,1
4,19,SHELDON,25,02-06-2001,2,6,2001,06-02-2001,20's,1
5,31,TOM,3,07-03-1989,7,3,1989,03-07-1989,30's,1
