### Simple Example with Pandas 

#### How to make own table ?? 

In [118]:
import pandas as pd

In [119]:
people = {
    "first" : ["Sakshi", "Tom" , "Harry"],
    "last" : [ "Narayanaswamy" , "mot" , "yrrah"],
    "email": ["sakshi@gmail.com", "Tom@gmail.com", "Harry@gmail.com"]
}

In [180]:
df = pd.DataFrame(people)
df

Unnamed: 0,first,last,email
0,Sakshi,Narayanaswamy,sakshi@gmail.com
1,Tom,mot,Tom@gmail.com
2,Harry,yrrah,Harry@gmail.com


In [121]:
df['email']

0    sakshi@gmail.com
1       Tom@gmail.com
2     Harry@gmail.com
Name: email, dtype: object

In [122]:
df[['last','email']]

Unnamed: 0,last,email
0,Narayanaswamy,sakshi@gmail.com
1,mot,Tom@gmail.com
2,yrrah,Harry@gmail.com


#### ILOC

iloc helps to access rows by integer location

In [123]:
df.iloc[0]

first              Sakshi
last        Narayanaswamy
email    sakshi@gmail.com
Name: 0, dtype: object

In [124]:
df.iloc[[0, 1]]


Unnamed: 0,first,last,email
0,Sakshi,Narayanaswamy,sakshi@gmail.com
1,Tom,mot,Tom@gmail.com


In [125]:
df.iloc[[0, 1],2]

0    sakshi@gmail.com
1       Tom@gmail.com
Name: email, dtype: object

#### LOC

Labels can be used to access the columns

In [126]:
df.loc[[0, 1]]

Unnamed: 0,first,last,email
0,Sakshi,Narayanaswamy,sakshi@gmail.com
1,Tom,mot,Tom@gmail.com


In [127]:
df.loc[[0, 1],'email']

0    sakshi@gmail.com
1       Tom@gmail.com
Name: email, dtype: object

In [128]:
df.loc[[0, 2],['email','last']]

Unnamed: 0,email,last
0,sakshi@gmail.com,Narayanaswamy
2,Harry@gmail.com,yrrah


### How to Set/Reset Index? 

In [129]:
df.set_index('email')
df

Unnamed: 0,first,last,email
0,Sakshi,Narayanaswamy,sakshi@gmail.com
1,Tom,mot,Tom@gmail.com
2,Harry,yrrah,Harry@gmail.com


In [130]:
df.set_index('email', inplace = True)
df

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
sakshi@gmail.com,Sakshi,Narayanaswamy
Tom@gmail.com,Tom,mot
Harry@gmail.com,Harry,yrrah


In [131]:
df.index

Index(['sakshi@gmail.com', 'Tom@gmail.com', 'Harry@gmail.com'], dtype='object', name='email')

In [132]:
df.loc['sakshi@gmail.com']

first           Sakshi
last     Narayanaswamy
Name: sakshi@gmail.com, dtype: object

In [133]:
df.reset_index(inplace=True)
df

Unnamed: 0,email,first,last
0,sakshi@gmail.com,Sakshi,Narayanaswamy
1,Tom@gmail.com,Tom,mot
2,Harry@gmail.com,Harry,yrrah


### Filtering using pandas 

In [134]:
filt = (df['last']  == 'Narayanaswamy')

In [135]:
df[filt]

Unnamed: 0,email,first,last
0,sakshi@gmail.com,Sakshi,Narayanaswamy


In [136]:
df.loc[filt]

Unnamed: 0,email,first,last
0,sakshi@gmail.com,Sakshi,Narayanaswamy


In [137]:
df.loc[filt, 'email']

0    sakshi@gmail.com
Name: email, dtype: object

In [138]:
filt = (df['last']  == 'Narayanaswamy') & (df['first'] == 'Sakshi')
df[filt]

Unnamed: 0,email,first,last
0,sakshi@gmail.com,Sakshi,Narayanaswamy


In [139]:
filt = (df['last']  == 'mot') | (df['first'] == 'Sakshi')
df[filt]

Unnamed: 0,email,first,last
0,sakshi@gmail.com,Sakshi,Narayanaswamy
1,Tom@gmail.com,Tom,mot


In [140]:
filt = (df['last']  == 'mot') | (df['first'] == 'Sakshi')
df[~filt]

Unnamed: 0,email,first,last
2,Harry@gmail.com,Harry,yrrah


#### How to update Rows/Columns in Dataframe 

In [141]:
df.columns

Index(['email', 'first', 'last'], dtype='object')

In [142]:
df.columns = ['first_name', 'last_name', 'email']
df

Unnamed: 0,first_name,last_name,email
0,sakshi@gmail.com,Sakshi,Narayanaswamy
1,Tom@gmail.com,Tom,mot
2,Harry@gmail.com,Harry,yrrah


In [143]:
df.columns = [x.upper() for x in df.columns]
df

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL
0,sakshi@gmail.com,Sakshi,Narayanaswamy
1,Tom@gmail.com,Tom,mot
2,Harry@gmail.com,Harry,yrrah


In [144]:
df.columns = [x.lower() for x in df.columns]
df.columns = df.columns.str.replace(' ', '_')
df

Unnamed: 0,first_name,last_name,email
0,sakshi@gmail.com,Sakshi,Narayanaswamy
1,Tom@gmail.com,Tom,mot
2,Harry@gmail.com,Harry,yrrah


In [145]:
df.rename(columns={'first_name':'first', 'last_name': 'last'}, inplace=True)
df

Unnamed: 0,first,last,email
0,sakshi@gmail.com,Sakshi,Narayanaswamy
1,Tom@gmail.com,Tom,mot
2,Harry@gmail.com,Harry,yrrah


In [146]:
df.loc[2]

first    Harry@gmail.com
last               Harry
email              yrrah
Name: 2, dtype: object

In [147]:
df.loc[2] = ['Harry','yraha','Harryraha@gmail.com']
df

Unnamed: 0,first,last,email
0,sakshi@gmail.com,Sakshi,Narayanaswamy
1,Tom@gmail.com,Tom,mot
2,Harry,yraha,Harryraha@gmail.com


In [148]:
df.at[2,'last'] = 'yrrah'
df

Unnamed: 0,first,last,email
0,sakshi@gmail.com,Sakshi,Narayanaswamy
1,Tom@gmail.com,Tom,mot
2,Harry,yrrah,Harryraha@gmail.com


In [149]:
filt = (df['email'] == 'Harryraha@gmail.com')
df[filt]['last']

2    yrrah
Name: last, dtype: object

In [150]:
df.loc[filt,'last'] = 'Smith'
df

Unnamed: 0,first,last,email
0,sakshi@gmail.com,Sakshi,Narayanaswamy
1,Tom@gmail.com,Tom,mot
2,Harry,Smith,Harryraha@gmail.com


In [151]:
df['email'] = df['email'].str.lower()
df

Unnamed: 0,first,last,email
0,sakshi@gmail.com,Sakshi,narayanaswamy
1,Tom@gmail.com,Tom,mot
2,Harry,Smith,harryraha@gmail.com


In [152]:
df['email'].apply(len)

0    13
1     3
2    19
Name: email, dtype: int64

In [153]:
def update_email(email):
    return email.upper()

In [154]:
df['email'] = df['email'].apply(update_email)
df

Unnamed: 0,first,last,email
0,sakshi@gmail.com,Sakshi,NARAYANASWAMY
1,Tom@gmail.com,Tom,MOT
2,Harry,Smith,HARRYRAHA@GMAIL.COM


In [155]:
df['email'] = df['email'].apply(lambda x: x.lower())
df

Unnamed: 0,first,last,email
0,sakshi@gmail.com,Sakshi,narayanaswamy
1,Tom@gmail.com,Tom,mot
2,Harry,Smith,harryraha@gmail.com


In [156]:
df.apply(len, axis = 'columns')

0    3
1    3
2    3
dtype: int64

In [157]:
df.apply(pd.Series.min)

first                  Harry
last                  Sakshi
email    harryraha@gmail.com
dtype: object

In [158]:
df.apply(lambda x: x.min())

first                  Harry
last                  Sakshi
email    harryraha@gmail.com
dtype: object

In [159]:
df.applymap(len)

Unnamed: 0,first,last,email
0,16,6,13
1,13,3,3
2,5,5,19


In [160]:
df.applymap(str.lower)

Unnamed: 0,first,last,email
0,sakshi@gmail.com,sakshi,narayanaswamy
1,tom@gmail.com,tom,mot
2,harry,smith,harryraha@gmail.com


In [161]:
df['first'].map({'Harry':'mary', 'Tom':'tommy'})

0     NaN
1     NaN
2    mary
Name: first, dtype: object

In [162]:
df['first'].replace({'Harry':'mary', 'Tom':'tommy'})


0    sakshi@gmail.com
1       Tom@gmail.com
2                mary
Name: first, dtype: object

In [163]:
df

Unnamed: 0,first,last,email
0,sakshi@gmail.com,Sakshi,narayanaswamy
1,Tom@gmail.com,Tom,mot
2,Harry,Smith,harryraha@gmail.com


#### Add/Remove Rows and Columns

In [164]:
df['first'] + ' ' + df['last']

0    sakshi@gmail.com Sakshi
1          Tom@gmail.com Tom
2                Harry Smith
dtype: object

In [165]:
df['full_name'] = df['first'] + ' ' + df['last']
df

Unnamed: 0,first,last,email,full_name
0,sakshi@gmail.com,Sakshi,narayanaswamy,sakshi@gmail.com Sakshi
1,Tom@gmail.com,Tom,mot,Tom@gmail.com Tom
2,Harry,Smith,harryraha@gmail.com,Harry Smith


In [166]:
df.drop(columns=['first', 'last'], inplace = True)
df

Unnamed: 0,email,full_name
0,narayanaswamy,sakshi@gmail.com Sakshi
1,mot,Tom@gmail.com Tom
2,harryraha@gmail.com,Harry Smith


In [167]:
df['full_name'].str.split(' ',expand=True)

Unnamed: 0,0,1
0,sakshi@gmail.com,Sakshi
1,Tom@gmail.com,Tom
2,Harry,Smith


In [168]:
df[['first','last']] = df['full_name'].str.split(' ',expand=True)
df

Unnamed: 0,email,full_name,first,last
0,narayanaswamy,sakshi@gmail.com Sakshi,sakshi@gmail.com,Sakshi
1,mot,Tom@gmail.com Tom,Tom@gmail.com,Tom
2,harryraha@gmail.com,Harry Smith,Harry,Smith


In [185]:
#df.append({'first': 'Tony'}, ignore_index=True) been depreciated
df.loc[len(df)] = {'first': 'Tony'}
df

Unnamed: 0,first,last,email
0,Sakshi,Narayanaswamy,sakshi@gmail.com
1,Tom,mot,Tom@gmail.com
2,Harry,yrrah,Harry@gmail.com
3,Tony,,
4,Tony,,
5,Tony,,


In [197]:
people1 = {
    "first" : ["Tony", "Steve"],
    "last" : [ "Stark" , "Rogers"],
    "email": ["IronMan@avenge.com", "Cap@avenge.com"]
}
df2 = pd.DataFrame(people1)

In [198]:
df = pd.concat([df, df2], ignore_index=True)
df

Unnamed: 0,first,last,email
0,Sakshi,Narayanaswamy,sakshi@gmail.com
1,Tom,mot,Tom@gmail.com
2,Harry,yrrah,Harry@gmail.com
3,Tony,,
4,Tony,,
5,Tony,Stark,IronMan@avenge.com
6,Steve,Rogers,Cap@avenge.com


In [204]:
df.drop(index=df[df['first'] == 'Tony'].index, inplace=True)
df

Unnamed: 0,first,last,email
0,Sakshi,Narayanaswamy,sakshi@gmail.com
1,Tom,mot,Tom@gmail.com
2,Harry,yrrah,Harry@gmail.com
6,Steve,Rogers,Cap@avenge.com


### How to Sort Data

In [205]:
df.sort_values(by='last')

Unnamed: 0,first,last,email
0,Sakshi,Narayanaswamy,sakshi@gmail.com
6,Steve,Rogers,Cap@avenge.com
1,Tom,mot,Tom@gmail.com
2,Harry,yrrah,Harry@gmail.com


In [206]:
df.sort_values(by='last',ascending = False)

Unnamed: 0,first,last,email
2,Harry,yrrah,Harry@gmail.com
1,Tom,mot,Tom@gmail.com
6,Steve,Rogers,Cap@avenge.com
0,Sakshi,Narayanaswamy,sakshi@gmail.com


In [207]:
df.sort_values(by=['last','first'],ascending = False)

Unnamed: 0,first,last,email
2,Harry,yrrah,Harry@gmail.com
1,Tom,mot,Tom@gmail.com
6,Steve,Rogers,Cap@avenge.com
0,Sakshi,Narayanaswamy,sakshi@gmail.com


In [208]:
df.sort_values(by=['last','first'],ascending = [False,True], )

Unnamed: 0,first,last,email
2,Harry,yrrah,Harry@gmail.com
1,Tom,mot,Tom@gmail.com
6,Steve,Rogers,Cap@avenge.com
0,Sakshi,Narayanaswamy,sakshi@gmail.com


In [209]:
df.sort_index()

Unnamed: 0,first,last,email
0,Sakshi,Narayanaswamy,sakshi@gmail.com
1,Tom,mot,Tom@gmail.com
2,Harry,yrrah,Harry@gmail.com
6,Steve,Rogers,Cap@avenge.com


In [210]:
df['last'].sort_values()

0    Narayanaswamy
6           Rogers
1              mot
2            yrrah
Name: last, dtype: object

### Handling Missing Values & Casting Data Types 

In [214]:
import numpy as np
people2  = {
    'first' : ['Corey','Jane', 'John', 'Chris', np.nan, None, 'NA'],
    'last' : ['Schafer','Doe', 'Doe', 'Schafer', np.nan, np.nan, 'Missing'],
    'email' : ['CoreyMSchafer@gmail.com','JaneDoe@email.com', 'JohnDoe@email.com', 'None', np.nan, None, 'NA'],
    'age' : ['33','55', '63', '36', None, None, 'Missing']
}

In [217]:
df = pd.DataFrame(people2)
df

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@email.com,55
2,John,Doe,JohnDoe@email.com,63
3,Chris,Schafer,,36
4,,,,
5,,,,
6,,Missing,,Missing


In [218]:
df.dropna()

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@email.com,55
2,John,Doe,JohnDoe@email.com,63
3,Chris,Schafer,,36
6,,Missing,,Missing


In [219]:
df.dropna(axis='index',how='all')

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@email.com,55
2,John,Doe,JohnDoe@email.com,63
3,Chris,Schafer,,36
6,,Missing,,Missing


In [220]:
df.dropna(axis='index',how='any', subset=['email'])

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@email.com,55
2,John,Doe,JohnDoe@email.com,63
3,Chris,Schafer,,36
6,,Missing,,Missing


In [221]:
df.dropna(axis='index',how='any', subset=['last','email'])

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@email.com,55
2,John,Doe,JohnDoe@email.com,63
3,Chris,Schafer,,36
6,,Missing,,Missing


In [222]:
df.replace('NA',np.nan,inplace=True)
df.replace('Missing',np.nan,inplace=True)
df

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33.0
1,Jane,Doe,JaneDoe@email.com,55.0
2,John,Doe,JohnDoe@email.com,63.0
3,Chris,Schafer,,36.0
4,,,,
5,,,,
6,,,,


In [223]:
df.isna()

Unnamed: 0,first,last,email,age
0,False,False,False,False
1,False,False,False,False
2,False,False,False,False
3,False,False,False,False
4,True,True,True,True
5,True,True,True,True
6,True,True,True,True


In [224]:
df.fillna('MISSING')

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@email.com,55
2,John,Doe,JohnDoe@email.com,63
3,Chris,Schafer,,36
4,MISSING,MISSING,MISSING,MISSING
5,MISSING,MISSING,MISSING,MISSING
6,MISSING,MISSING,MISSING,MISSING


In [225]:
df.fillna('0')

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@email.com,55
2,John,Doe,JohnDoe@email.com,63
3,Chris,Schafer,,36
4,0,0,0,0
5,0,0,0,0
6,0,0,0,0


In [226]:
df.dtypes

first    object
last     object
email    object
age      object
dtype: object

In [228]:
df['age'] = df['age'].astype(float)
df.dtypes

first     object
last      object
email     object
age      float64
dtype: object