# Pandas

Pandas is an open-source, BSD-licensed Python library providing high-performance, easy-to-use data structures and data analysis tools for the Python programming language. Python with Pandas is used in a wide range of fields including academic and commercial domains including finance, economics, Statistics, analytics, etc.

# Key Features of Pandas

#1.Tools for loading data from different file formats
#2.Handles missing values
#3.Reshaping and pivoting of data sets
#4.Merging and joining of data sets
#5.Fast and efficient tool for manipulating and analyzing data

# Types of Data Structures :
    Pandas deals with two types of data structures
    1. Series
    2. DataFrame

In [13]:
import numpy as np                      #importing numpy as pandas library
import pandas as pd

# Pandas -- Series

In [2]:
pd.Series()

  """Entry point for launching an IPython kernel.


Series([], dtype: float64)

In [3]:
data = [1,2,3]               # here by default index is 0,1,2
s = pd.Series(data)      
print(s)

0    1
1    2
2    3
dtype: int64


In [4]:
data = (1,2,2)                                   #here we defined 100,200,300 as an index
s = pd.Series(data,index=[100,200,500])
print(s)

100    1
200    2
500    2
dtype: int64


In [5]:
data = np.array(['a','b','c','d'])
s = pd.Series(data,index=[100,101,102,103])
print(s)

100    a
101    b
102    c
103    d
dtype: object


# Creating a series from dictionary

In [6]:
data = {'a':1,'b':2,'c':3}        # dictionary contains keys and values . Here a,b,c are keys and 1,2,3 are values
a = pd.Series(data)
print(a)

a    1
b    2
c    3
dtype: int64


In [7]:
data = {'a':1,'b':2,'c':3}        # here data type is float
a = pd.Series(data,dtype='float')     
print(a)

a    1.0
b    2.0
c    3.0
dtype: float64


In [8]:
data = {'a':1,'b':2,'c':3}        # here data type is complex
a = pd.Series(data,dtype='complex')     
print(a)

a    1.000000+0.000000j
b    2.000000+0.000000j
c    3.000000+0.000000j
dtype: complex128


# Creating Series From a Scalar

In [10]:
import numpy as np
import pandas as pd
series = pd.Series(10,dtype='float',index=[0,1,2,3,4])
print(series)

0    10.0
1    10.0
2    10.0
3    10.0
4    10.0
dtype: float64


In [11]:
import pandas as pd
import numpy as np
s1 = pd.Series([12,10,15,25],index=[0,1,2,3])
print(s1)    

0    12
1    10
2    15
3    25
dtype: int64


In [12]:
#Retrieve first three element of the series
s1[:3]

0    12
1    10
2    15
dtype: int64

In [13]:
#Retrieve last three element of the series
s1[-3:]

1    10
2    15
3    25
dtype: int64

In [19]:
#Retrieve first element of the series
s1[0]

12

In [17]:
#Retrieve last element of the series
s1[-1:]

3    25
dtype: int64

# DataFrames

In [20]:
a=pd.DataFrame()
print(a)


Empty DataFrame
Columns: []
Index: []


# DataFrame From a List

In [21]:
a = ([[1,2,7,5,4],[4,5,6,8,5]])
df = pd.DataFrame(a)
df

Unnamed: 0,0,1,2,3,4
0,1,2,7,5,4
1,4,5,6,8,5


In [22]:
import pandas as pd
data = [['Alex',10],['Bob',12],['Clarke',13]]
df = pd.DataFrame(data,columns=['Name','Age'])
print(df)

     Name  Age
0    Alex   10
1     Bob   12
2  Clarke   13


In [23]:
import pandas as pd
data = [['Alex',10],['Bob',12],['Clarke',13]]
df = pd.DataFrame(data,columns=['Name','Age'],dtype=float)
df

Unnamed: 0,Name,Age
0,Alex,10.0
1,Bob,12.0
2,Clarke,13.0


# DataFrame From List of Dictionary

In [24]:
import pandas as pd
data = [{'a': 1, 'b': 2},{'a': 5, 'b': 10, 'c': 20}]
df = pd.DataFrame(data)
df

Unnamed: 0,a,b,c
0,1,2,
1,5,10,20.0


In [25]:
import pandas as pd
data = [{'a': 1, 'b': 2},{'a': 5, 'b': 10, 'c': 20}]
df = pd.DataFrame(data, index=['first', 'second'])
df

Unnamed: 0,a,b,c
first,1,2,
second,5,10,20.0


In [26]:
import numpy as np
import pandas as pd
data = {'Name':['Ram','John','Roman'],'Marks':[70,80,99]}
df = pd.DataFrame(data)
print(df)

    Name  Marks
0    Ram     70
1   John     80
2  Roman     99


# Selction of Column in DataFrame

In [25]:
import numpy as np
import pandas as pd
data = {'Name':['Ram','John','Roman'],'Age':[40,18,19]}
df = pd.DataFrame(data)

print(df)              #prints dataframe  

print('---------------------------------------------------------------------------------------------')


print(df['Name'])      #prints only a column (Name)

print('----------------------------------------------------------------------------------------------')

print(df.Name)         #prints only a column (Name)

    Name  Age
0    Ram   40
1   John   18
2  Roman   19
---------------------------------------------------------------------------------------------
0      Ram
1     John
2    Roman
Name: Name, dtype: object
----------------------------------------------------------------------------------------------
0      Ram
1     John
2    Roman
Name: Name, dtype: object


# Addition of Column in DataFrame

In [1]:
import numpy as np
import pandas as pd
data = {'Name':['Ram','John','Roman'],'English':[80,98,99],'Maths':[65,100,95]}
df = pd.DataFrame(data)
#print dataframe
print(df)

print('------------------------------------------------------------------------------')

#addition of a new column

df['Total Marks'] = df['English']+df['Maths']
print(df)

    Name  English  Maths
0    Ram       80     65
1   John       98    100
2  Roman       99     95
------------------------------------------------------------------------------
    Name  English  Maths  Total Marks
0    Ram       80     65          145
1   John       98    100          198
2  Roman       99     95          194


# Drop of Column in DataFrame

In [6]:
import numpy as np
import pandas as pd
data = {'Name':['Ram','John','Roman'],'English':[80,98,99],'Maths':[65,100,95]}
df = pd.DataFrame(data)
#print dataframe
print(df)

print('-------------------------------------------------------------------------')

# Drop of a column 

df.drop('English',axis=1,inplace=True)
print('After dropping column:')
print(df)

    Name  English  Maths
0    Ram       80     65
1   John       98    100
2  Roman       99     95
-------------------------------------------------------------------------
After dropping column:
    Name  Maths
0    Ram     65
1   John    100
2  Roman     95


# Attributes of DataFrame

In [7]:
import numpy as np
import pandas as pd
data = {'Name':['Ram','John','Roman'],'Age':[40,18,19]}
df = pd.DataFrame(data)
#print dataframe
print(df)
print('------------------------------------------------------------------------------------')
#print shape of the dataframe
print(df.shape)

    Name  Age
0    Ram   40
1   John   18
2  Roman   19
------------------------------------------------------------------------------------
(3, 2)


In [11]:
print(df.dtypes)                                #datatype of each column

Name    object
Age      int64
dtype: object


In [12]:
df.columns                                                 #columns of the dataframe

Index(['Name', 'Age'], dtype='object')

# Working with text data

In [14]:
s = pd.Series(['Tom','Jerry','Spike'])                          
s

0      Tom
1    Jerry
2    Spike
dtype: object

# Converting Strings to Lower / Upper Case

In [21]:
print(s.str.lower())                       #converts strings in series to lower case

print('-------------------------------------------------------------')

print(s.str.upper())                        #converts strings in series to upper case

0      tom
1    jerry
2    spike
dtype: object
-------------------------------------------------------------
0      TOM
1    JERRY
2    SPIKE
dtype: object


In [22]:
import numpy as np
import pandas as pd
s = pd.Series(['Tom','Jerry','Spike'])                          
s

0      Tom
1    Jerry
2    Spike
dtype: object

In [23]:
s.str.len()                         #computes the strings length

0    3
1    5
2    5
dtype: int64

In [24]:
s.str.contains('e')         # Returns a Boolean value  for each element if the substring contains in the element

0    False
1     True
2     True
dtype: bool

In [89]:
data = ['Cuite pie' , 'Sweetie Pie' ,'Hottie','Sexy']
s = pd.Series(data)
s

0      Cuite pie
1    Sweetie Pie
2         Hottie
3           Sexy
dtype: object

In [92]:
len(data)

4

In [34]:
s.str.len()                                                                                   

0     9
1    11
2     6
3     4
dtype: int64

In [94]:
s.str.get_dummies()

Unnamed: 0,Cuite pie,Hottie,Sexy,Sweetie Pie
0,1,0,0,0
1,0,0,0,1
2,0,1,0,0
3,0,0,1,0


In [98]:
s.str.count('z')

0    0
1    0
2    0
3    0
dtype: int64

# swapcase() method with text data

In [26]:
series = pd.Series(['tom','John','RomaN'])
print('Series without swapcase :')                          #without swapcase
print(series)
print('-------------------------------------------------------------------------------------------------------')
print('Series with swapcase')
print(series.str.swapcase())                                #with swapcase

Series without swapcase :
0      tom
1     John
2    RomaN
dtype: object
-------------------------------------------------------------------------------------------------------
Series with swapcase
0      TOM
1     jOHN
2    rOMAn
dtype: object


# islower method with text()

In [27]:
series = pd.Series(['tom','John','RomaN'])
print('Before using method :')                          
print(series)
print('-------------------------------------------------------------------------------------------------------')
print('After using the method')                     #Checks whether all characters in each string in the Series/Index 
                                                     # in lower case or not. Returns Boolean
print(series.str.islower()) 

Before using method :
0      tom
1     John
2    RomaN
dtype: object
-------------------------------------------------------------------------------------------------------
After using the method
0     True
1    False
2    False
dtype: bool


# isupper method with text()

In [28]:
series = pd.Series(['tom','John','RomaN'])
print('Before using method :')                          
print(series)
print('-------------------------------------------------------------------------------------------------------')
print('After using the method')                     #Checks whether all characters in each string in the Series/Index 
                                                     # in upper case or not. Returns Boolean
print(series.str.isupper()) 

Before using method :
0      tom
1     John
2    RomaN
dtype: object
-------------------------------------------------------------------------------------------------------
After using the method
0    False
1    False
2    False
dtype: bool


# isnumeric method with text data

In [29]:
series = pd.Series(['123','John','RomaN45'])
print('Before using method :')                          
print(series)
print('-------------------------------------------------------------------------------------------------------')
print('After using the method')                     #Checks whether all characters in each string in the Series/Index 
                                                     # are numeric. Returns Boolean
print(series.str.isnumeric()) 

Before using method :
0        123
1       John
2    RomaN45
dtype: object
-------------------------------------------------------------------------------------------------------
After using the method
0     True
1    False
2    False
dtype: bool


# get_dummies method with text data

In [30]:
series = pd.Series(['tom','John','RomaN'])
print('Before using method :')                          
print(series)
print('-------------------------------------------------------------------------------------------------------')
print('After using the method')                     #Returns the series with One-Hot Encoded values.
                                
print(series.str.get_dummies()) 

Before using method :
0      tom
1     John
2    RomaN
dtype: object
-------------------------------------------------------------------------------------------------------
After using the method
   John  RomaN  tom
0     0      0    1
1     1      0    0
2     0      1    0


# count method with text data

In [32]:
series = pd.Series(['tom','John','RomaN'])
print('Before using method :')                          
print(series)
print('-------------------------------------------------------------------------------------------------------')
print('After using the method')                     # Returns count of appearance of pattern in each element.
                                
print(series.str.count('m'))

Before using method :
0      tom
1     John
2    RomaN
dtype: object
-------------------------------------------------------------------------------------------------------
After using the method
0    1
1    0
2    1
dtype: int64


# startswith method with text data

In [34]:
series = pd.Series(['tom','John','RomaN'])
print('Before using method :')                          
print(series)
print('-------------------------------------------------------------------------------------------------------')
print('After using the method')                     # Returns true if the element in the Series/Index starts with the pattern.
                                
print(series.str.startswith('m'))

Before using method :
0      tom
1     John
2    RomaN
dtype: object
-------------------------------------------------------------------------------------------------------
After using the method
0    False
1    False
2    False
dtype: bool


# endswith method with text data

In [36]:
series = pd.Series(['tom','John','RomaN'])
print('Before using method :')                          
print(series)
print('-------------------------------------------------------------------------------------------------------')
print('After using the method')                     # Returns true if the element in the Series/Index ends with the pattern.
                                
print(series.str.endswith('m'))

Before using method :
0      tom
1     John
2    RomaN
dtype: object
-------------------------------------------------------------------------------------------------------
After using the method
0     True
1    False
2    False
dtype: bool


# Merging/Joining

In [100]:
import pandas as pd
left = pd.DataFrame({
   'id':[1,2,3,4,5],
   'Name': ['Alex', 'Amy', 'Allen', 'Alice', 'Ayoung'],
   'subject_id':['sub1','sub2','sub4','sub6','sub5']})
right = pd.DataFrame(
   {'id':[1,2,3,4,5],
   'Name': ['Billy', 'Brian', 'Bran', 'Bryce', 'Betty'],
   'subject_id':['sub2','sub4','sub3','sub6','sub5']})
print(left)
print(right)

   id    Name subject_id
0   1    Alex       sub1
1   2     Amy       sub2
2   3   Allen       sub4
3   4   Alice       sub6
4   5  Ayoung       sub5
   id   Name subject_id
0   1  Billy       sub2
1   2  Brian       sub4
2   3   Bran       sub3
3   4  Bryce       sub6
4   5  Betty       sub5


In [None]:
pd.merge()

In [56]:
import pandas as pd
left = pd.DataFrame({
   'id':[1,2,3,4,5],
   'Name': ['Alex', 'Amy', 'Allen', 'Alice', 'Ayoung'],
   'subject_id':['sub1','sub2','sub4','sub6','sub5']})
right = pd.DataFrame(
   {'id':[1,2,3,4,5],
   'Name': ['Billy', 'Brian', 'Bran', 'Bryce', 'Betty'],
   'subject_id':['sub2','sub4','sub3','sub6','sub5']})
pd.merge(left,right,on='id')

Unnamed: 0,id,Name_x,subject_id_x,Name_y,subject_id_y
0,1,Alex,sub1,Billy,sub2
1,2,Amy,sub2,Brian,sub4
2,3,Allen,sub4,Bran,sub3
3,4,Alice,sub6,Bryce,sub6
4,5,Ayoung,sub5,Betty,sub5
