Pandas is an open-source Python library that provides data structures and functions designed to make working with structured data (like tables) easy and efficient.

Pandas Syntax

In [None]:
import pandas as pd

Pandas Series and Dataframes

In [12]:
labels = [ 'a', 'b' , 'c']
data = [10,20,30]
arr = np.array(data)
d = {}
for label, item in zip(labels, data):
    d[label] = item

d

{'a': 10, 'b': 20, 'c': 30}

In [None]:
 s = pd.Series([10,20,50], index=[ 'a', 'b', 'c'])
 print(s)

a    10
b    20
c    50
dtype: int64


In [13]:
s = pd.Series(data=data)
print(s)

0    10
1    20
2    30
dtype: int64


In [18]:
s = pd.Series(arr, labels)
print(s)

a    10
b    20
c    30
dtype: int64


In [21]:
data = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, 35],
    'City': ['NY', 'LA', 'Chicago']
}

df = pd.DataFrame(data)
print(df)


      Name  Age     City
0    Alice   25       NY
1      Bob   30       LA
2  Charlie   35  Chicago


Grabing Values  

In [22]:
countryNames = ['India', 'USA', 'Canada', 'Russia', 'Japan']
rankings = [1,2,3,4,5]

ser1 = pd.Series(rankings, countryNames)
ser2 = pd.Series([1,2,3,5,4], ['India', 'USA', 'Canada', 'AUS', 'Russia'])

In [23]:
ser1

Unnamed: 0,0
India,1
USA,2
Canada,3
Russia,4
Japan,5


In [24]:
ser2

Unnamed: 0,0
India,1
USA,2
Canada,3
AUS,5
Russia,4


In [25]:
ser1['India']

np.int64(1)

Performing operations on series

In [26]:
print(ser1 + ser2)
print('-' *25)
print(ser1 - ser2)
print('-' *25)
print(ser1 * ser2)
print('-' *25)
print(ser1 / ser2)

AUS       NaN
Canada    6.0
India     2.0
Japan     NaN
Russia    8.0
USA       4.0
dtype: float64
-------------------------
AUS       NaN
Canada    0.0
India     0.0
Japan     NaN
Russia    0.0
USA       0.0
dtype: float64
-------------------------
AUS        NaN
Canada     9.0
India      1.0
Japan      NaN
Russia    16.0
USA        4.0
dtype: float64
-------------------------
AUS       NaN
Canada    1.0
India     1.0
Japan     NaN
Russia    1.0
USA       1.0
dtype: float64


Pandas Data Frame

In [27]:
np.random.seed(101)

In [28]:
# Creating a Data Frame...

df = pd.DataFrame(
    np.random.randn(5,4), # data [5 x 4]
    ['A', 'B', 'C', 'D', 'E'], # row - labels
    ['W', 'X', 'Y', 'Z'] # col - labels
)
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [30]:
type(df)

Creating a Dataframe from dictionery:

In [31]:
d = {
    'A' : [4,5,6],
    'B' : [5,8,1],
    'C' : [1,2,3]
}

pd.DataFrame(d)

Unnamed: 0,A,B,C
0,4,5,1
1,5,8,2
2,6,1,3


Error: Runtime no longer has a reference to this dataframe, please re-run this cell and try again.
Error: Runtime no longer has a reference to this dataframe, please re-run this cell and try again.
Error: Runtime no longer has a reference to this dataframe, please re-run this cell and try again.
Error: Runtime no longer has a reference to this dataframe, please re-run this cell and try again.
Error: Runtime no longer has a reference to this dataframe, please re-run this cell and try again.
Error: Runtime no longer has a reference to this dataframe, please re-run this cell and try again.


Indexing and Selection

In [33]:
df['W'] # col selection


Unnamed: 0,W
A,2.70685
B,0.651118
C,-2.018168
D,0.188695
E,0.190794


In [34]:
df.loc['A'] # row selection

Unnamed: 0,A
W,2.70685
X,0.628133
Y,0.907969
Z,0.503826


In [35]:
df.loc['B', 'X']

np.float64(-0.31931804459303326)

In [36]:
df.loc[
    ['A', 'B'],
    ['X', 'Y']
]

Unnamed: 0,X,Y
A,0.628133,0.907969
B,-0.319318,-0.848077


In [37]:
df.iloc[[1,2]]

Unnamed: 0,W,X,Y,Z
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001


New Column and Row addition

In [45]:
df['new_col'] = df['W'] + df['X'] + df['Y'] + df['Z']
df

Unnamed: 0,W,X,Y,Z,new_col
A,2.70685,0.628133,0.907969,0.503826,4.746778
B,0.651118,-0.319318,-0.848077,0.605965,0.089688
C,-2.018168,0.740122,0.528813,-0.589001,-1.338233
D,0.188695,-0.758872,-0.933237,0.955057,-0.548357
E,0.190794,1.978757,2.605967,0.683509,5.459028


Deleting / Drop Column

In [46]:
df.drop('new_col', axis=1, inplace=True)
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


Deleting / Drop Row

In [44]:
df.drop('E')

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057


Conditional Selection

In [47]:
df>0

Unnamed: 0,W,X,Y,Z
A,True,True,True,True
B,True,False,False,True
C,False,True,True,False
D,True,False,False,True
E,True,True,True,True


In [48]:
df[df>0]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,,,0.605965
C,,0.740122,0.528813,
D,0.188695,,,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [49]:
df[df['W'] > 0]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [50]:
df[ df['W'] > 0]['X']


Unnamed: 0,X
A,0.628133
B,-0.319318
D,-0.758872
E,1.978757


Multiple Condition

In [51]:

(df['W'] > 0 ) & (df['Y'] > 1 )

Unnamed: 0,0
A,False
B,False
C,False
D,False
E,True


Reset the index to Default:

In [52]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [53]:
df.reset_index()

Unnamed: 0,index,W,X,Y,Z
0,A,2.70685,0.628133,0.907969,0.503826
1,B,0.651118,-0.319318,-0.848077,0.605965
2,C,-2.018168,0.740122,0.528813,-0.589001
3,D,0.188695,-0.758872,-0.933237,0.955057
4,E,0.190794,1.978757,2.605967,0.683509


Set the Index

In [54]:
new_index = 'ca ny wy or co'.upper().split()
new_index

df['States'] = new_index
df

Unnamed: 0,W,X,Y,Z,States
A,2.70685,0.628133,0.907969,0.503826,CA
B,0.651118,-0.319318,-0.848077,0.605965,NY
C,-2.018168,0.740122,0.528813,-0.589001,WY
D,0.188695,-0.758872,-0.933237,0.955057,OR
E,0.190794,1.978757,2.605967,0.683509,CO
