In [None]:
# Pandas - Panel Data, it's a multidimensional data involving measurements over time
# created in 2015 by Wes McKinney
# Features - series obj & data frame,aligns data, slicing, indexing, subseting, handles missing data, groups by functionality
# Features - merging & joining, labeling of axes hierarchially, time-series functionality, reshaping & robust input/output tool

In [None]:
# Pandas vs Numpy
# Pandas - great for >500k rows, works great for tabular data, arbitrary matrix & time series matrix
# Numpy - <500k rows

# Series Object in Pandas

In [5]:
import pandas as pd
data=[5,2,3,4]
series1 = pd.Series(data)
print(series1)
#series1 #(output)

# how to check the type
print('\n', type(series1))#(to check type)

#how to change the index name from 0123 to abcd
series2 = pd.Series(data, index=['a','b','c','d'])
print('\n',series2)


0    5
1    2
2    3
3    4
dtype: int64

 <class 'pandas.core.series.Series'>

 a    5
b    2
c    3
d    4
dtype: int64


# How to create a data frame

In [7]:
import pandas as pd
data = [1,2,3,4]
df = pd.DataFrame(data)
df

Unnamed: 0,0
0,1
1,2
2,3
3,4


In [8]:
#creating a dataframe using a dictionary
dictionary = {'fruits' : ['apples', 'banana', 'mangoes'], 'count' :[10,20,15]}
df = pd.DataFrame(dictionary)
df

Unnamed: 0,fruits,count
0,apples,10
1,banana,20
2,mangoes,15


In [12]:
#creating a dataframe using a series
series = pd.Series([6,12], index = ['a','b'])
df = pd.DataFrame(series)
df

Unnamed: 0,0
a,6
b,12


In [13]:
#creating a data frame using numpy array

import numpy as np
numpyarray = np.array([[50000,60000], ['John','James']])
df = pd.DataFrame({'name': numpyarray[1], 'salary': numpyarray[0]})
df

Unnamed: 0,name,salary
0,John,50000
1,James,60000


# Perform merge operations

In [14]:
import pandas as pd
player = ['Player1','Player2','Player3']
point = [8,9,6]
title = ['Game1','Game2','Game3']
df1 = pd.DataFrame({'Player':player, 'Points': point, 'Title': title})
df1

Unnamed: 0,Player,Points,Title
0,Player1,8,Game1
1,Player2,9,Game2
2,Player3,6,Game3


In [15]:
player = ['Player1', 'Player5', 'Player6']
power = ['Punch', 'kick', 'Elbow']
title = ['Game1', 'Game5','Game6']
df2 = pd.DataFrame({'Player': player, 'Power': power, 'Title': title})
df2

Unnamed: 0,Player,Power,Title
0,Player1,Punch,Game1
1,Player5,kick,Game5
2,Player6,Elbow,Game6


In [16]:
#inner merge
df1.merge(df2, on='Player', how='inner')

Unnamed: 0,Player,Points,Title_x,Power,Title_y
0,Player1,8,Game1,Punch,Game1


In [17]:
df1.merge(df2)

Unnamed: 0,Player,Points,Title,Power
0,Player1,8,Game1,Punch


In [18]:
#left merge
#all tables would be merged together but bcoz player 2&3 aren't available in 2nd table we have NaN against them
df1.merge(df2, on='Player', how='left')

Unnamed: 0,Player,Points,Title_x,Power,Title_y
0,Player1,8,Game1,Punch,Game1
1,Player2,9,Game2,,
2,Player3,6,Game3,,


In [19]:
#right merge
df1.merge(df2, on='Player', how='right')

Unnamed: 0,Player,Points,Title_x,Power,Title_y
0,Player1,8.0,Game1,Punch,Game1
1,Player5,,,kick,Game5
2,Player6,,,Elbow,Game6


In [20]:
#outer merge
df1.merge(df2, on='Player', how='outer')

Unnamed: 0,Player,Points,Title_x,Power,Title_y
0,Player1,8.0,Game1,Punch,Game1
1,Player2,9.0,Game2,,
2,Player3,6.0,Game3,,
3,Player5,,,kick,Game5
4,Player6,,,Elbow,Game6


# Perform join statements

In [22]:
player = ['Player1','Player2','Player3']
point = [8,9,6]
title = ['Game1','Game2','Game3']
df3 = pd.DataFrame({'Player':player, 'Points': point, 'Title': title}, index=['L1','L2','L3'])
df3

Unnamed: 0,Player,Points,Title
L1,Player1,8,Game1
L2,Player2,9,Game2
L3,Player3,6,Game3


In [23]:
player = ['Player1', 'Player5', 'Player6']
power = ['Punch', 'Kick', 'Elbow']
title = ['Game1', 'Game5', 'Game6']
df4 = pd.DataFrame({'Players':player, 'Power':power, 'Titles':title}, index=['L2','L3','L4'])
df4

Unnamed: 0,Players,Power,Titles
L2,Player1,Punch,Game1
L3,Player5,Kick,Game5
L4,Player6,Elbow,Game6


In [24]:
#inner join
df3.join(df4, how='inner')

Unnamed: 0,Player,Points,Title,Players,Power,Titles
L2,Player2,9,Game2,Player1,Punch,Game1
L3,Player3,6,Game3,Player5,Kick,Game5


In [25]:
#left join
df3.join(df4, how='left')

Unnamed: 0,Player,Points,Title,Players,Power,Titles
L1,Player1,8,Game1,,,
L2,Player2,9,Game2,Player1,Punch,Game1
L3,Player3,6,Game3,Player5,Kick,Game5


In [26]:
#outer join
df3.join(df4, how='outer')

Unnamed: 0,Player,Points,Title,Players,Power,Titles
L1,Player1,8.0,Game1,,,
L2,Player2,9.0,Game2,Player1,Punch,Game1
L3,Player3,6.0,Game3,Player5,Kick,Game5
L4,,,,Player6,Elbow,Game6


# Steps to concatenate 2 DFs using Pandas

In [27]:
pd.concat([df3,df4])

Unnamed: 0,Player,Points,Title,Players,Power,Titles
L1,Player1,8.0,Game1,,,
L2,Player2,9.0,Game2,,,
L3,Player3,6.0,Game3,,,
L2,,,,Player1,Punch,Game1
L3,,,,Player5,Kick,Game5
L4,,,,Player6,Elbow,Game6
