In [1]:
import numpy as np
import pandas as pd

In [2]:
dictionary = {'a':[0,4],'b':[6,3],'c':[9,11]}
print(dictionary)

{'a': [0, 4], 'b': [6, 3], 'c': [9, 11]}


In [3]:
pd.Series(dictionary)

a     [0, 4]
b     [6, 3]
c    [9, 11]
dtype: object

In [4]:
s1 = pd.Series(['USA','Canada','Germany','Japan','France','UK'],['a','b','c','d','e','f'])

In [5]:
np.random.randn(12,2)

array([[ 0.82657727, -0.46583558],
       [-0.11648451,  1.28964129],
       [ 1.1131661 , -1.1845035 ],
       [-1.75941139,  0.98905913],
       [ 1.07273918,  0.80932663],
       [ 0.26033196,  1.18544979],
       [-0.74020457, -0.91441259],
       [ 1.61738014, -0.25066508],
       [ 0.22126997, -0.04561192],
       [ 0.90866263,  0.73184428],
       [ 0.44440055, -1.10346286],
       [-0.22593281, -0.14762206]])

In [6]:
s1

a        USA
b     Canada
c    Germany
d      Japan
e     France
f         UK
dtype: object

In [7]:
s1[0:3]

a        USA
b     Canada
c    Germany
dtype: object

In [8]:
s1['a']

'USA'

In [9]:
s2 = pd.Series([1,2,3,4],['USA','Canada','Germany','Japan'])

In [10]:
s2

USA        1
Canada     2
Germany    3
Japan      4
dtype: int64

In [11]:
s3 = pd.Series([1,2,3,4],['France','Australia','Canada','USA'])

In [12]:
s3

France       1
Australia    2
Canada       3
USA          4
dtype: int64

In [13]:
s2 + s3

Australia    NaN
Canada       5.0
France       NaN
Germany      NaN
Japan        NaN
USA          5.0
dtype: float64

## Data Frames

In [14]:
df = pd.DataFrame(np.random.randint(1,26,25).reshape(5,5),['A','B','C','D','E'],['V','W','X','Y','Z'])

In [15]:
df

Unnamed: 0,V,W,X,Y,Z
A,18,5,9,21,5
B,18,12,15,8,24
C,4,21,13,16,21
D,21,4,17,2,15
E,12,19,6,13,25


In [16]:
df.loc['E',['Y','W','Z']]

Y    13
W    19
Z    25
Name: E, dtype: int64

In [17]:
df.loc[['E','D']]

Unnamed: 0,V,W,X,Y,Z
E,12,19,6,13,25
D,21,4,17,2,15


In [18]:
df.loc[:,'V']

A    18
B    18
C     4
D    21
E    12
Name: V, dtype: int64

In [19]:
df.columns

Index(['V', 'W', 'X', 'Y', 'Z'], dtype='object')

In [20]:
df.iloc[-1,0:3]

V    12
W    19
X     6
Name: E, dtype: int64

#### Dropping a column/row

In [21]:
df.drop('E')

Unnamed: 0,V,W,X,Y,Z
A,18,5,9,21,5
B,18,12,15,8,24
C,4,21,13,16,21
D,21,4,17,2,15


In [22]:
df.drop('Y',axis=1)

Unnamed: 0,V,W,X,Z
A,18,5,9,5
B,18,12,15,24
C,4,21,13,21
D,21,4,17,15
E,12,19,6,25


In [23]:
df.dropna()

Unnamed: 0,V,W,X,Y,Z
A,18,5,9,21,5
B,18,12,15,8,24
C,4,21,13,16,21
D,21,4,17,2,15
E,12,19,6,13,25


In [24]:
df.size

25

In [25]:
df.shape

(5, 5)

#### Adding a column/row

In [26]:
df['K'] = [1,2,3,4,5]

In [27]:
df

Unnamed: 0,V,W,X,Y,Z,K
A,18,5,9,21,5,1
B,18,12,15,8,24,2
C,4,21,13,16,21,3
D,21,4,17,2,15,4
E,12,19,6,13,25,5


In [28]:
df['K'] = df['K']+df['K']

In [29]:
df

Unnamed: 0,V,W,X,Y,Z,K
A,18,5,9,21,5,2
B,18,12,15,8,24,4
C,4,21,13,16,21,6
D,21,4,17,2,15,8
E,12,19,6,13,25,10


In [30]:
df['L'] = df['W']+df['Y']
print(df)

    V   W   X   Y   Z   K   L
A  18   5   9  21   5   2  26
B  18  12  15   8  24   4  20
C   4  21  13  16  21   6  37
D  21   4  17   2  15   8   6
E  12  19   6  13  25  10  32


### Conditions

In [31]:
df[df['V']>15]

Unnamed: 0,V,W,X,Y,Z,K,L
A,18,5,9,21,5,2,26
B,18,12,15,8,24,4,20
D,21,4,17,2,15,8,6


In [32]:
boole = df.loc['E']>10

In [33]:
ndf =df.to_numpy()

In [34]:
df[df[['V','W']]>5]

Unnamed: 0,V,W,X,Y,Z,K,L
A,18.0,,,,,,
B,18.0,12.0,,,,,
C,,21.0,,,,,
D,21.0,,,,,,
E,12.0,19.0,,,,,


### Selecting other columns after applying the condition to another column 

In [35]:
df[df['W']>10]['V']

B    18
C     4
E    12
Name: V, dtype: int64

In [36]:
df[df['W']>10][['V','W']]

Unnamed: 0,V,W
B,18,12
C,4,21
E,12,19


In [37]:
#Applying multiple conditions

In [38]:
#You have to use an & operator in place of standard 'and', 
#and | operator in place of stanrad 'or'
#BOTH the conditions need to be PLACED IN TWO SEPARATE BRACKETS

In [39]:
df

Unnamed: 0,V,W,X,Y,Z,K,L
A,18,5,9,21,5,2,26
B,18,12,15,8,24,4,20
C,4,21,13,16,21,6,37
D,21,4,17,2,15,8,6
E,12,19,6,13,25,10,32


In [40]:
df[(df['W']>5) & (df['Y']>5)][['W','Y']]

Unnamed: 0,W,Y
B,12,8
C,21,16
E,19,13


In [41]:
df[(df['W']>7) & (df['V']<15)]

Unnamed: 0,V,W,X,Y,Z,K,L
C,4,21,13,16,21,6,37
E,12,19,6,13,25,10,32


In [42]:
df[(df['W']>7) & (df['X']<20)]

Unnamed: 0,V,W,X,Y,Z,K,L
B,18,12,15,8,24,4,20
C,4,21,13,16,21,6,37
E,12,19,6,13,25,10,32


### Resetting and setting new index

In [43]:
#To reset an index, I can use the method 'reset_index' and if I choose 'inplace = True',
#it will permanently change the index names

In [44]:
df.reset_index()

Unnamed: 0,index,V,W,X,Y,Z,K,L
0,A,18,5,9,21,5,2,26
1,B,18,12,15,8,24,4,20
2,C,4,21,13,16,21,6,37
3,D,21,4,17,2,15,8,6
4,E,12,19,6,13,25,10,32


In [45]:
df.reset_index(drop=True) #This will not allow the old index to be made into a new column either

Unnamed: 0,V,W,X,Y,Z,K,L
0,18,5,9,21,5,2,26
1,18,12,15,8,24,4,20
2,4,21,13,16,21,6,37
3,21,4,17,2,15,8,6
4,12,19,6,13,25,10,32


In [46]:
#To set a new index, I can use the method 'set_index'

In [47]:
#I am creating a new list using split method instead of using commas because it is quicker
#this way

In [48]:
Names = ('Anil Ashok Priya Sam Rahim').split()

In [49]:
Names

['Anil', 'Ashok', 'Priya', 'Sam', 'Rahim']

In [50]:
#Step 1 is to put this list as a column
df['Names']=Names

In [51]:
df

Unnamed: 0,V,W,X,Y,Z,K,L,Names
A,18,5,9,21,5,2,26,Anil
B,18,12,15,8,24,4,20,Ashok
C,4,21,13,16,21,6,37,Priya
D,21,4,17,2,15,8,6,Sam
E,12,19,6,13,25,10,32,Rahim


In [52]:
df.set_index('Names',inplace=True)

In [53]:
df.drop(['L','Z','K'],axis=1,inplace=True)

In [54]:
df

Unnamed: 0_level_0,V,W,X,Y
Names,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Anil,18,5,9,21
Ashok,18,12,15,8
Priya,4,21,13,16
Sam,21,4,17,2
Rahim,12,19,6,13


In [55]:
#I can also change the column names using the set_axis method and mentioning that it is axis =1

In [56]:
df.set_axis([1,2,3,4],axis=1)

Unnamed: 0_level_0,1,2,3,4
Names,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Anil,18,5,9,21
Ashok,18,12,15,8
Priya,4,21,13,16
Sam,21,4,17,2
Rahim,12,19,6,13


In [57]:
#I can also change the row names using the same trick. Here, I have specified axis=0

In [58]:
df.set_axis([1,2,3,4,5],axis=0)

Unnamed: 0,V,W,X,Y
1,18,5,9,21
2,18,12,15,8
3,4,21,13,16
4,21,4,17,2
5,12,19,6,13


### Making a dataframe from a dictionary

In [60]:
data = pd.DataFrame(data={'A':[14,56,89],'B':[67,81,93],'Names':['Mike','Ulesseys','Pulsar']},index=[0,1,2])

In [61]:
data

Unnamed: 0,A,B,Names
0,14,67,Mike
1,56,81,Ulesseys
2,89,93,Pulsar


In [62]:
new_names = 'Sam Harmon Kyle'.split()

In [63]:
new_names

['Sam', 'Harmon', 'Kyle']

In [68]:
data2 = pd.DataFrame(data={'A':[78,28,10],'B':[93,23,22],'Names':new_names},index=[3,4,5])

In [69]:
data2

Unnamed: 0,A,B,Names
3,78,93,Sam
4,28,23,Harmon
5,10,22,Kyle


In [75]:
data_full = pd.concat([data,data2])

In [76]:
data_full

Unnamed: 0,A,B,Names
0,14,67,Mike
1,56,81,Ulesseys
2,89,93,Pulsar
3,78,93,Sam
4,28,23,Harmon
5,10,22,Kyle


In [77]:
pd.concat([data,data2],axis =1 )

Unnamed: 0,A,B,Names,A.1,B.1,Names.1
0,14.0,67.0,Mike,,,
1,56.0,81.0,Ulesseys,,,
2,89.0,93.0,Pulsar,,,
3,,,,78.0,93.0,Sam
4,,,,28.0,23.0,Harmon
5,,,,10.0,22.0,Kyle


In [173]:
x =np.random.seed(46)
data3 = pd.DataFrame(data = {'A':[46,48],'B':np.random.randint(13,40,2),'Names':['Ilk','Lark']},index=[6,7])

In [174]:
data3

Unnamed: 0,A,B,Names
6,46,18,Ilk
7,48,21,Lark


In [175]:
pd.concat([data,data2,data3])

Unnamed: 0,A,B,Names
0,14,67,Mike
1,56,81,Ulesseys
2,89,93,Pulsar
3,78,93,Sam
4,28,23,Harmon
5,10,22,Kyle
6,46,18,Ilk
7,48,21,Lark


In [176]:
pd.concat([data,data2,data3],axis=1)

Unnamed: 0,A,B,Names,A.1,B.1,Names.1,A.2,B.2,Names.2
0,14.0,67.0,Mike,,,,,,
1,56.0,81.0,Ulesseys,,,,,,
2,89.0,93.0,Pulsar,,,,,,
3,,,,78.0,93.0,Sam,,,
4,,,,28.0,23.0,Harmon,,,
5,,,,10.0,22.0,Kyle,,,
6,,,,,,,46.0,18.0,Ilk
7,,,,,,,48.0,21.0,Lark


### Merging

In [178]:
A = pd.DataFrame({'Key':[1,2,3],'Class':['A','B','C']})

In [179]:
A

Unnamed: 0,Key,Class
0,1,A
1,2,B
2,3,C


In [180]:
B = pd.DataFrame({'Key':[1,8,3],'Section':[1,7,9]})

In [181]:
B

Unnamed: 0,Key,Section
0,1,1
1,8,7
2,3,9


In [182]:
pd.merge(A,B,how='inner')

Unnamed: 0,Key,Class,Section
0,1,A,1
1,3,C,9


In [183]:
pd.merge(A,B,how='outer')

Unnamed: 0,Key,Class,Section
0,1,A,1.0
1,2,B,
2,3,C,9.0
3,8,,7.0


In [184]:
pd.merge(A,B,how='right')

Unnamed: 0,Key,Class,Section
0,1,A,1
1,8,,7
2,3,C,9


In [185]:
pd.merge(A,B,how='left')

Unnamed: 0,Key,Class,Section
0,1,A,1.0
1,2,B,
2,3,C,9.0


### Joining the DataFrames

In [189]:
left = pd.DataFrame({'A':[17,18,19],'B':['Dockworth','Naper','Eetenlough']},index=['X','Y','Z'])

In [190]:
left

Unnamed: 0,A,B
X,17,Dockworth
Y,18,Naper
Z,19,Eetenlough


In [192]:
right = pd.DataFrame({'City':['Anchem','Nazta Faria','Hunlope']},index=['X','Y','H'])

In [193]:
right

Unnamed: 0,City
X,Anchem
Y,Nazta Faria
H,Hunlope


In [194]:
left.join(right)

Unnamed: 0,A,B,City
X,17,Dockworth,Anchem
Y,18,Naper,Nazta Faria
Z,19,Eetenlough,


In [198]:
left.join(right,how='outer')

Unnamed: 0,A,B,City
H,,,Hunlope
X,17.0,Dockworth,Anchem
Y,18.0,Naper,Nazta Faria
Z,19.0,Eetenlough,
