# pandas
pandas contains data structures and data manipulation tools designed to make data analysis and cleaning fast and easy in python. It has two data structures : 1.Series and 2.DataFrame
#### Datatypes : int64, float64, object, bool, datetime64, timedelta, category

# 1.SERIES

In [1]:
import pandas as pd

In [2]:
data = [1,2,3,4]
s1 = pd.Series(data)
s1

0    1
1    2
2    3
3    4
dtype: int64

In [3]:
s1.values

array([1, 2, 3, 4])

In [4]:
s1.index

RangeIndex(start=0, stop=4, step=1)

In [5]:
s1.index = ['a','b','c','d']

In [6]:
s1

a    1
b    2
c    3
d    4
dtype: int64

In [7]:
s1.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [8]:
s1[0]

1

In [9]:
s1['a']

1

In [10]:
s1.iloc[2]

3

In [11]:
s1.loc['c']

3

In [12]:
s1['e'] = 5
s1

a    1
b    2
c    3
d    4
e    5
dtype: int64

In [13]:
s1.loc['f'] = 6
s1

a    1
b    2
c    3
d    4
e    5
f    6
dtype: int64

In [14]:
s1.loc

<pandas.core.indexing._LocIndexer at 0x7fa42ab79b30>

In [15]:
1 in s1.values

True

In [16]:
7 in s1.values

False

In [17]:
'c' in s1.index

True

In [18]:
'x' in s1.index

False

In [19]:
s1[1:5]

b    2
c    3
d    4
e    5
dtype: int64

In [20]:
data = {'Jaipur' : 'Rajasthan',
       'Mumbai' : 'Maharashtra',
       'Kolkata' : 'West Bengal',
       'Bangaluru' : 'Karnataka',
       'Chandigarh' : 'Punjab'}

s2 = pd.Series(data)
s2

Jaipur          Rajasthan
Mumbai        Maharashtra
Kolkata       West Bengal
Bangaluru       Karnataka
Chandigarh         Punjab
dtype: object

In [21]:
s2.name = 'States'

In [22]:
s2.index.name = 'Capital'

In [23]:
s2

Capital
Jaipur          Rajasthan
Mumbai        Maharashtra
Kolkata       West Bengal
Bangaluru       Karnataka
Chandigarh         Punjab
Name: States, dtype: object

In [24]:
s2['Mumbai']

'Maharashtra'

In [25]:
s2[['Jaipur','Kolkata', 'Chandigarh']]

Capital
Jaipur          Rajasthan
Kolkata       West Bengal
Chandigarh         Punjab
Name: States, dtype: object

In [26]:
capitals = ['Jaipur','Kolkata', 'Chandigarh']
s3 = pd.Series(data, index = capitals)
s3

Jaipur          Rajasthan
Kolkata       West Bengal
Chandigarh         Punjab
dtype: object

In [27]:
s3.isnull()

Jaipur        False
Kolkata       False
Chandigarh    False
dtype: bool

In [28]:
capitals2 = ['Jaipur','Kolkata', 'Chandigarh','Delhi']
s4 = pd.Series(data, index = capitals2)
s4

Jaipur          Rajasthan
Kolkata       West Bengal
Chandigarh         Punjab
Delhi                 NaN
dtype: object

In [29]:
s4.isnull()

Jaipur        False
Kolkata       False
Chandigarh    False
Delhi          True
dtype: bool

In [30]:
s4.notnull()

Jaipur         True
Kolkata        True
Chandigarh     True
Delhi         False
dtype: bool

In [31]:
s = pd.Series(['India','Australia','England'], index = ['Cricket','Cricket','Cricket'])
s

Cricket        India
Cricket    Australia
Cricket      England
dtype: object

In [32]:
s.loc['Cricket']

Cricket        India
Cricket    Australia
Cricket      England
dtype: object

In [33]:
colors = ['Blue', 'Red', None]
pd.Series(colors)

0    Blue
1     Red
2    None
dtype: object

In [34]:
numbers = [1,2, None]
pd.Series(numbers)

0    1.0
1    2.0
2    NaN
dtype: float64

# DATAFRAME

In [35]:
student1 = pd.Series({'Name' : 'Utkarsh',
                     'Id' : 1})
student2 = pd.Series({'Name' : 'Amit',
                     'Id' : 2})
student3 = pd.Series({'Name' : 'Ankit',
                     'Id' : 3})

In [36]:
df1 = pd.DataFrame([student1,student2,student3], index = (101,102,103))

In [37]:
df1

Unnamed: 0,Name,Id
101,Utkarsh,1
102,Amit,2
103,Ankit,3


In [38]:
data = {'Name' : ['Utkarsh', 'Amit', 'Ankit'],
       'ID' : [1,2,3]}
df2 = pd.DataFrame(data, index = [101,102,103])
df2

Unnamed: 0,Name,ID
101,Utkarsh,1
102,Amit,2
103,Ankit,3


In [39]:
df2.head()

Unnamed: 0,Name,ID
101,Utkarsh,1
102,Amit,2
103,Ankit,3


In [40]:
df3 = pd.DataFrame(data, index = [101,102,103], columns = ['Name','ID','Age'])
df3

Unnamed: 0,Name,ID,Age
101,Utkarsh,1,
102,Amit,2,
103,Ankit,3,


In [41]:
df3.Age = 25

In [42]:
df3

Unnamed: 0,Name,ID,Age
101,Utkarsh,1,25
102,Amit,2,25
103,Ankit,3,25


In [43]:
df3.index

Int64Index([101, 102, 103], dtype='int64')

In [44]:
df3.values

array([['Utkarsh', 1, 25],
       ['Amit', 2, 25],
       ['Ankit', 3, 25]], dtype=object)

In [45]:
df3.columns

Index(['Name', 'ID', 'Age'], dtype='object')

In [46]:
df3['Name']

101    Utkarsh
102       Amit
103      Ankit
Name: Name, dtype: object

In [47]:
df3.Name

101    Utkarsh
102       Amit
103      Ankit
Name: Name, dtype: object

In [48]:
df3.Name

101    Utkarsh
102       Amit
103      Ankit
Name: Name, dtype: object

In [49]:
df3.iloc[0]

Name    Utkarsh
ID            1
Age          25
Name: 101, dtype: object

In [50]:
df3.loc[101]

Name    Utkarsh
ID            1
Age          25
Name: 101, dtype: object

In [51]:
df3.loc[104] = ['Akash',4,23]

In [52]:
df3

Unnamed: 0,Name,ID,Age
101,Utkarsh,1,25
102,Amit,2,25
103,Ankit,3,25
104,Akash,4,23


In [53]:
df3.loc[102]['Name']

'Amit'

In [54]:
df3['Age'][102] = 26

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['Age'][102] = 26


In [55]:
df3

Unnamed: 0,Name,ID,Age
101,Utkarsh,1,25
102,Amit,2,26
103,Ankit,3,25
104,Akash,4,23


In [56]:
val = pd.Series([21,23,25,24], index = [101,102,103,104])
df3['Age'] = val
df3

Unnamed: 0,Name,ID,Age
101,Utkarsh,1,21
102,Amit,2,23
103,Ankit,3,25
104,Akash,4,24


In [57]:
val2 = pd.Series([6,5], index = [101,103])
df3['Height'] = val2
df3

Unnamed: 0,Name,ID,Age,Height
101,Utkarsh,1,21,6.0
102,Amit,2,23,
103,Ankit,3,25,5.0
104,Akash,4,24,


In [58]:
df3['Weight'] = df3.Age
df3

Unnamed: 0,Name,ID,Age,Height,Weight
101,Utkarsh,1,21,6.0,21
102,Amit,2,23,,23
103,Ankit,3,25,5.0,25
104,Akash,4,24,,24


In [59]:
df3.drop(103)

Unnamed: 0,Name,ID,Age,Height,Weight
101,Utkarsh,1,21,6.0,21
102,Amit,2,23,,23
104,Akash,4,24,,24


In [60]:
df3

Unnamed: 0,Name,ID,Age,Height,Weight
101,Utkarsh,1,21,6.0,21
102,Amit,2,23,,23
103,Ankit,3,25,5.0,25
104,Akash,4,24,,24


In [61]:
del df3['Weight']

In [62]:
df3

Unnamed: 0,Name,ID,Age,Height
101,Utkarsh,1,21,6.0
102,Amit,2,23,
103,Ankit,3,25,5.0
104,Akash,4,24,


In [63]:
df3.T

Unnamed: 0,101,102,103,104
Name,Utkarsh,Amit,Ankit,Akash
ID,1,2,3,4
Age,21,23,25,24
Height,6.0,,5.0,


In [64]:
df3.index.name = 'Students'
df3.columns.name = 'Details'
df3

Details,Name,ID,Age,Height
Students,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
101,Utkarsh,1,21,6.0
102,Amit,2,23,
103,Ankit,3,25,5.0
104,Akash,4,24,


In [65]:
index = df3.index

In [66]:
index[1] = 'A'

TypeError: Index does not support mutable operations

In [67]:
df4 = df3.reindex([201,202,203,204])
df4

Details,Name,ID,Age,Height
Students,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
201,,,,
202,,,,
203,,,,
204,,,,


In [68]:
df3.dropna()

Details,Name,ID,Age,Height
Students,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
101,Utkarsh,1,21,6.0
103,Ankit,3,25,5.0


In [69]:
df3.fillna(0)

Details,Name,ID,Age,Height
Students,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
101,Utkarsh,1,21,6.0
102,Amit,2,23,0.0
103,Ankit,3,25,5.0
104,Akash,4,24,0.0


In [70]:
df3.set_index(['Name'])

Details,ID,Age,Height
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Utkarsh,1,21,6.0
Amit,2,23,
Ankit,3,25,5.0
Akash,4,24,


In [71]:
df3.reset_index()

Details,Students,Name,ID,Age,Height
0,101,Utkarsh,1,21,6.0
1,102,Amit,2,23,
2,103,Ankit,3,25,5.0
3,104,Akash,4,24,


## Index a dataframe

In [72]:
df3['Name']

Students
101    Utkarsh
102       Amit
103      Ankit
104      Akash
Name: Name, dtype: object

In [73]:
df3[['Name','Age']]

Details,Name,Age
Students,Unnamed: 1_level_1,Unnamed: 2_level_1
101,Utkarsh,21
102,Amit,23
103,Ankit,25
104,Akash,24


In [74]:
df3.loc[102]

Details
Name      Amit
ID           2
Age         23
Height     NaN
Name: 102, dtype: object

In [75]:
df3.loc[[101,103]]

Details,Name,ID,Age,Height
Students,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
101,Utkarsh,1,21,6.0
103,Ankit,3,25,5.0


In [76]:
df3.loc[[101,103],['Name','Age']]

Details,Name,Age
Students,Unnamed: 1_level_1,Unnamed: 2_level_1
101,Utkarsh,21
103,Ankit,25


In [77]:
df3.iloc[3]

Details
Name      Akash
ID            4
Age          24
Height      NaN
Name: 104, dtype: object

In [78]:
df3.loc[:,['Name','Age']]

Details,Name,Age
Students,Unnamed: 1_level_1,Unnamed: 2_level_1
101,Utkarsh,21
102,Amit,23
103,Ankit,25
104,Akash,24


In [79]:
df3.sort_index()

Details,Name,ID,Age,Height
Students,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
101,Utkarsh,1,21,6.0
102,Amit,2,23,
103,Ankit,3,25,5.0
104,Akash,4,24,


In [80]:
df3.sort_values(by = 'Name')

Details,Name,ID,Age,Height
Students,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
104,Akash,4,24,
102,Amit,2,23,
103,Ankit,3,25,5.0
101,Utkarsh,1,21,6.0


In [81]:
df3['Name'].unique()

array(['Utkarsh', 'Amit', 'Ankit', 'Akash'], dtype=object)

## Descriptive Statistics

In [82]:
import numpy as np
data = {'A' : np.arange(11,21),
     'B' : np.arange(21,31),
     'C' : np.arange(31,41)}
df = pd.DataFrame(data)
df

Unnamed: 0,A,B,C
0,11,21,31
1,12,22,32
2,13,23,33
3,14,24,34
4,15,25,35
5,16,26,36
6,17,27,37
7,18,28,38
8,19,29,39
9,20,30,40


In [83]:
df.sum()

A    155
B    255
C    355
dtype: int64

In [84]:
df.sum(axis=1)

0    63
1    66
2    69
3    72
4    75
5    78
6    81
7    84
8    87
9    90
dtype: int64

In [85]:
df.mean()

A    15.5
B    25.5
C    35.5
dtype: float64

In [86]:
df.mean(axis=1)

0    21.0
1    22.0
2    23.0
3    24.0
4    25.0
5    26.0
6    27.0
7    28.0
8    29.0
9    30.0
dtype: float64

In [87]:
df.mean(skipna=True)

A    15.5
B    25.5
C    35.5
dtype: float64

In [88]:
df.min()

A    11
B    21
C    31
dtype: int64

In [89]:
df.max()

A    20
B    30
C    40
dtype: int64

In [90]:
df.var()

A    9.166667
B    9.166667
C    9.166667
dtype: float64

In [91]:
df.std()

A    3.02765
B    3.02765
C    3.02765
dtype: float64

In [92]:
df.median()

A    15.5
B    25.5
C    35.5
dtype: float64

In [93]:
df.describe()

Unnamed: 0,A,B,C
count,10.0,10.0,10.0
mean,15.5,25.5,35.5
std,3.02765,3.02765,3.02765
min,11.0,21.0,31.0
25%,13.25,23.25,33.25
50%,15.5,25.5,35.5
75%,17.75,27.75,37.75
max,20.0,30.0,40.0


In [94]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A,10.0,15.5,3.02765,11.0,13.25,15.5,17.75,20.0
B,10.0,25.5,3.02765,21.0,23.25,25.5,27.75,30.0
C,10.0,35.5,3.02765,31.0,33.25,35.5,37.75,40.0


In [95]:
df2

Unnamed: 0,Name,ID
101,Utkarsh,1
102,Amit,2
103,Ankit,3


In [96]:
df2.drop(columns = ['ID','Name'],axis=1,inplace=True)

In [97]:
df2

101
102
103


# Python | Pandas Merging, Joining, and Concatenating


In [121]:
# importing pandas module
import pandas as pd 
 
# Define a dictionary containing employee data 
data1 = {'Name':['Jai', 'Princi', 'Gaurav', 'Anuj'], 
        'Age':[27, 24, 22, 32], 
        'Address':['Nagpur', 'Kanpur', 'Allahabad', 'Kannuaj'], 
        'Qualification':['Msc', 'MA', 'MCA', 'Phd']} 
   
# Define a dictionary containing employee data 
data2 = {'Name':['Abhi', 'Ayushi', 'Dhiraj', 'Hitesh'], 
        'Age':[17, 14, 12, 52], 
        'Address':['Nagpur', 'Kanpur', 'Allahabad', 'Kannuaj'], 
        'Qualification':['Btech', 'B.A', 'Bcom', 'B.hons']} 
 
# Convert the dictionary into DataFrame  
df = pd.DataFrame(data1,index=[0, 1, 2, 3])
 
# Convert the dictionary into DataFrame  
df1 = pd.DataFrame(data2, index=[4, 5, 6, 7])
 
print(df, "\n\n", df1) 

     Name  Age    Address Qualification
0     Jai   27     Nagpur           Msc
1  Princi   24     Kanpur            MA
2  Gaurav   22  Allahabad           MCA
3    Anuj   32    Kannuaj           Phd 

      Name  Age    Address Qualification
4    Abhi   17     Nagpur         Btech
5  Ayushi   14     Kanpur           B.A
6  Dhiraj   12  Allahabad          Bcom
7  Hitesh   52    Kannuaj        B.hons


In [122]:
# using a .concat() method
frames = [df, df1]
 
res1 = pd.concat(frames)
res1

Unnamed: 0,Name,Age,Address,Qualification
0,Jai,27,Nagpur,Msc
1,Princi,24,Kanpur,MA
2,Gaurav,22,Allahabad,MCA
3,Anuj,32,Kannuaj,Phd
4,Abhi,17,Nagpur,Btech
5,Ayushi,14,Kanpur,B.A
6,Dhiraj,12,Allahabad,Bcom
7,Hitesh,52,Kannuaj,B.hons


In [125]:
# Define a dictionary containing employee data 
data1 = {'Name':['Jai', 'Princi', 'Gaurav', 'Anuj'], 
        'Age':[27, 24, 22, 32], 
        'Address':['Nagpur', 'Kanpur', 'Allahabad', 'Kannuaj'], 
        'Qualification':['Msc', 'MA', 'MCA', 'Phd'],
        'Mobile No': [97, 91, 58, 76]} 
   
# Define a dictionary containing employee data 
data2 = {'Name':['Gaurav', 'Anuj', 'Dhiraj', 'Hitesh'], 
        'Age':[22, 32, 12, 52], 
        'Address':['Allahabad', 'Kannuaj', 'Allahabad', 'Kannuaj'], 
        'Qualification':['MCA', 'Phd', 'Bcom', 'B.hons'],
        'Salary':[1000, 2000, 3000, 4000]} 
 
# Convert the dictionary into DataFrame  
df = pd.DataFrame(data1,index=[0, 1, 2, 3])
 
# Convert the dictionary into DataFrame  
df1 = pd.DataFrame(data2, index=[2, 3, 6, 7]) 
 
print(df, "\n\n", df1) 

     Name  Age    Address Qualification  Mobile No
0     Jai   27     Nagpur           Msc         97
1  Princi   24     Kanpur            MA         91
2  Gaurav   22  Allahabad           MCA         58
3    Anuj   32    Kannuaj           Phd         76 

      Name  Age    Address Qualification  Salary
2  Gaurav   22  Allahabad           MCA    1000
3    Anuj   32    Kannuaj           Phd    2000
6  Dhiraj   12  Allahabad          Bcom    3000
7  Hitesh   52    Kannuaj        B.hons    4000


In [126]:
# applying concat with axes
# join = 'inner'
# Now we set axes join = inner for intersection of dataframe
res2 = pd.concat([df, df1], axis=1, join='inner')
 
res2

Unnamed: 0,Name,Age,Address,Qualification,Mobile No,Name.1,Age.1,Address.1,Qualification.1,Salary
2,Gaurav,22,Allahabad,MCA,58,Gaurav,22,Allahabad,MCA,1000
3,Anuj,32,Kannuaj,Phd,76,Anuj,32,Kannuaj,Phd,2000


In [127]:
# using a .concat for
# union of dataframe
res2 = pd.concat([df, df1], axis=1, sort=False)
 
res2

Unnamed: 0,Name,Age,Address,Qualification,Mobile No,Name.1,Age.1,Address.1,Qualification.1,Salary
0,Jai,27.0,Nagpur,Msc,97.0,,,,,
1,Princi,24.0,Kanpur,MA,91.0,,,,,
2,Gaurav,22.0,Allahabad,MCA,58.0,Gaurav,22.0,Allahabad,MCA,1000.0
3,Anuj,32.0,Kannuaj,Phd,76.0,Anuj,32.0,Kannuaj,Phd,2000.0
6,,,,,,Dhiraj,12.0,Allahabad,Bcom,3000.0
7,,,,,,Hitesh,52.0,Kannuaj,B.hons,4000.0


In [128]:
# importing pandas module
import pandas as pd 
 
# Define a dictionary containing employee data 
data1 = {'Name':['Jai', 'Princi', 'Gaurav', 'Anuj'], 
        'Age':[27, 24, 22, 32], 
        'Address':['Nagpur', 'Kanpur', 'Allahabad', 'Kannuaj'], 
        'Qualification':['Msc', 'MA', 'MCA', 'Phd']} 
   
# Define a dictionary containing employee data 
data2 = {'Name':['Abhi', 'Ayushi', 'Dhiraj', 'Hitesh'], 
        'Age':[17, 14, 12, 52], 
        'Address':['Nagpur', 'Kanpur', 'Allahabad', 'Kannuaj'], 
        'Qualification':['Btech', 'B.A', 'Bcom', 'B.hons']} 
 
# Convert the dictionary into DataFrame  
df = pd.DataFrame(data1,index=[0, 1, 2, 3])
 
# Convert the dictionary into DataFrame  
df1 = pd.DataFrame(data2, index=[4, 5, 6, 7])
 
print(df, "\n\n", df1) 

     Name  Age    Address Qualification
0     Jai   27     Nagpur           Msc
1  Princi   24     Kanpur            MA
2  Gaurav   22  Allahabad           MCA
3    Anuj   32    Kannuaj           Phd 

      Name  Age    Address Qualification
4    Abhi   17     Nagpur         Btech
5  Ayushi   14     Kanpur           B.A
6  Dhiraj   12  Allahabad          Bcom
7  Hitesh   52    Kannuaj        B.hons


In [129]:
# using append function
 
res = df.append(df1)
res

  res = df.append(df1)


Unnamed: 0,Name,Age,Address,Qualification
0,Jai,27,Nagpur,Msc
1,Princi,24,Kanpur,MA
2,Gaurav,22,Allahabad,MCA
3,Anuj,32,Kannuaj,Phd
4,Abhi,17,Nagpur,Btech
5,Ayushi,14,Kanpur,B.A
6,Dhiraj,12,Allahabad,Bcom
7,Hitesh,52,Kannuaj,B.hons


In [130]:
# importing pandas module
import pandas as pd 
 
# Define a dictionary containing employee data 
data1 = {'key': ['K0', 'K1', 'K2', 'K3'],
         'Name':['Jai', 'Princi', 'Gaurav', 'Anuj'], 
        'Age':[27, 24, 22, 32],} 
   
# Define a dictionary containing employee data 
data2 = {'key': ['K0', 'K1', 'K2', 'K3'],
         'Address':['Nagpur', 'Kanpur', 'Allahabad', 'Kannuaj'], 
        'Qualification':['Btech', 'B.A', 'Bcom', 'B.hons']} 
 
# Convert the dictionary into DataFrame  
df = pd.DataFrame(data1)
 
# Convert the dictionary into DataFrame  
df1 = pd.DataFrame(data2) 
  
 
print(df, "\n\n", df1) 

  key    Name  Age
0  K0     Jai   27
1  K1  Princi   24
2  K2  Gaurav   22
3  K3    Anuj   32 

   key    Address Qualification
0  K0     Nagpur         Btech
1  K1     Kanpur           B.A
2  K2  Allahabad          Bcom
3  K3    Kannuaj        B.hons


In [131]:
# using .merge() function
res = pd.merge(df, df1, on='key')
 
res

Unnamed: 0,key,Name,Age,Address,Qualification
0,K0,Jai,27,Nagpur,Btech
1,K1,Princi,24,Kanpur,B.A
2,K2,Gaurav,22,Allahabad,Bcom
3,K3,Anuj,32,Kannuaj,B.hons


In [132]:
# importing pandas module
import pandas as pd 
 
# Define a dictionary containing employee data 
data1 = {'key': ['K0', 'K1', 'K2', 'K3'],
         'key1': ['K0', 'K1', 'K0', 'K1'],
         'Name':['Jai', 'Princi', 'Gaurav', 'Anuj'], 
        'Age':[27, 24, 22, 32],} 
   
# Define a dictionary containing employee data 
data2 = {'key': ['K0', 'K1', 'K2', 'K3'],
         'key1': ['K0', 'K0', 'K0', 'K0'],
         'Address':['Nagpur', 'Kanpur', 'Allahabad', 'Kannuaj'], 
        'Qualification':['Btech', 'B.A', 'Bcom', 'B.hons']} 
 
# Convert the dictionary into DataFrame  
df = pd.DataFrame(data1)
 
# Convert the dictionary into DataFrame  
df1 = pd.DataFrame(data2) 
  
 
print(df, "\n\n", df1) 

  key key1    Name  Age
0  K0   K0     Jai   27
1  K1   K1  Princi   24
2  K2   K0  Gaurav   22
3  K3   K1    Anuj   32 

   key key1    Address Qualification
0  K0   K0     Nagpur         Btech
1  K1   K0     Kanpur           B.A
2  K2   K0  Allahabad          Bcom
3  K3   K0    Kannuaj        B.hons


In [133]:
# using keys from left frame
res = pd.merge(df, df1, how='left', on=['key', 'key1'])
 
res

Unnamed: 0,key,key1,Name,Age,Address,Qualification
0,K0,K0,Jai,27,Nagpur,Btech
1,K1,K1,Princi,24,,
2,K2,K0,Gaurav,22,Allahabad,Bcom
3,K3,K1,Anuj,32,,


In [134]:
# using keys from right frame
res1 = pd.merge(df, df1, how='right', on=['key', 'key1'])
 
res1

Unnamed: 0,key,key1,Name,Age,Address,Qualification
0,K0,K0,Jai,27.0,Nagpur,Btech
1,K1,K0,,,Kanpur,B.A
2,K2,K0,Gaurav,22.0,Allahabad,Bcom
3,K3,K0,,,Kannuaj,B.hons


In [135]:
# getting intersection of keys
res3 = pd.merge(df, df1, how='inner', on=['key', 'key1'])
 
res3

Unnamed: 0,key,key1,Name,Age,Address,Qualification
0,K0,K0,Jai,27,Nagpur,Btech
1,K2,K0,Gaurav,22,Allahabad,Bcom


In [136]:
# importing pandas module
import pandas as pd 
  
# Define a dictionary containing employee data 
data1 = {'Name':['Jai', 'Princi', 'Gaurav', 'Anuj'], 
        'Age':[27, 24, 22, 32]} 
    
# Define a dictionary containing employee data 
data2 = {'Address':['Allahabad', 'Kannuaj', 'Allahabad', 'Kannuaj'], 
        'Qualification':['MCA', 'Phd', 'Bcom', 'B.hons']} 
  
# Convert the dictionary into DataFrame  
df = pd.DataFrame(data1,index=['K0', 'K1', 'K2', 'K3'])
  
# Convert the dictionary into DataFrame  
df1 = pd.DataFrame(data2, index=['K0', 'K2', 'K3', 'K4'])
 
 
print(df, "\n\n", df1)  

      Name  Age
K0     Jai   27
K1  Princi   24
K2  Gaurav   22
K3    Anuj   32 

       Address Qualification
K0  Allahabad           MCA
K2    Kannuaj           Phd
K3  Allahabad          Bcom
K4    Kannuaj        B.hons


In [137]:
# joining dataframe
res = df.join(df1)
 
res

Unnamed: 0,Name,Age,Address,Qualification
K0,Jai,27,Allahabad,MCA
K1,Princi,24,,
K2,Gaurav,22,Kannuaj,Phd
K3,Anuj,32,Allahabad,Bcom


In [138]:
# getting union
res1 = df.join(df1, how='outer')
 
res1

Unnamed: 0,Name,Age,Address,Qualification
K0,Jai,27.0,Allahabad,MCA
K1,Princi,24.0,,
K2,Gaurav,22.0,Kannuaj,Phd
K3,Anuj,32.0,Allahabad,Bcom
K4,,,Kannuaj,B.hons


# Python | Pandas.pivot_table()

In [139]:
import pandas as pd
import numpy as np
   
# creating a dataframe
df = pd.DataFrame({'A': ['John', 'Boby', 'Mina', 'Peter', 'Nicky'],
      'B': ['Masters', 'Graduate', 'Graduate', 'Masters', 'Graduate'],
      'C': [27, 23, 21, 23, 24]})
   
df

Unnamed: 0,A,B,C
0,John,Masters,27
1,Boby,Graduate,23
2,Mina,Graduate,21
3,Peter,Masters,23
4,Nicky,Graduate,24


In [140]:
# Simplest pivot table must have a dataframe
# and an index/list of index.
table = pd.pivot_table(df, index =['A', 'B'])
  
table

Unnamed: 0_level_0,Unnamed: 1_level_0,C
A,B,Unnamed: 2_level_1
Boby,Graduate,23
John,Masters,27
Mina,Graduate,21
Nicky,Graduate,24
Peter,Masters,23


In [141]:
# Creates a pivot table dataframe
# create a spreadsheet-style pivot table as a DataFrame.
table = pd.pivot_table(df, values ='A', index =['B', 'C'],
                         columns =['B'], aggfunc = np.sum)
  
table

Unnamed: 0_level_0,B,Graduate,Masters
B,C,Unnamed: 2_level_1,Unnamed: 3_level_1
Graduate,21,Mina,
Graduate,23,Boby,
Graduate,24,Nicky,
Masters,23,,Peter
Masters,27,,John
