In [1]:
import numpy as np
import pandas as pd

In [2]:
s1 = pd.Series(['USA','Canada','Germany','Japan','France','UK'],['a','b','c','d','e','f'])

In [3]:
s1

a        USA
b     Canada
c    Germany
d      Japan
e     France
f         UK
dtype: object

In [4]:
s1[0:3]

a        USA
b     Canada
c    Germany
dtype: object

In [5]:
s1['a']

'USA'

In [6]:
s2 = pd.Series([1,2,3,4],['USA','Canada','Germany','Japan'])

In [7]:
s2

USA        1
Canada     2
Germany    3
Japan      4
dtype: int64

In [8]:
s3 = pd.Series([1,2,3,4],['France','Australia','Canada','USA'])

In [9]:
s3

France       1
Australia    2
Canada       3
USA          4
dtype: int64

In [10]:
s2 + s3

Australia    NaN
Canada       5.0
France       NaN
Germany      NaN
Japan        NaN
USA          5.0
dtype: float64

## Data Frames

In [11]:
df = pd.DataFrame(np.random.randint(1,26,25).reshape(5,5),['A','B','C','D','E'],['V','W','X','Y','Z'])

In [12]:
df

Unnamed: 0,V,W,X,Y,Z
A,5,11,9,19,15
B,25,3,20,11,18
C,14,4,16,12,16
D,10,10,15,6,24
E,19,25,19,9,15


In [13]:
df.loc['E',['Y','W','Z']]

Y     9
W    25
Z    15
Name: E, dtype: int64

In [14]:
df.columns

Index(['V', 'W', 'X', 'Y', 'Z'], dtype='object')

In [15]:
df.iloc[-1,0:3]

V    19
W    25
X    19
Name: E, dtype: int64

#### Dropping a column/row

In [16]:
df.drop('E')

Unnamed: 0,V,W,X,Y,Z
A,5,11,9,19,15
B,25,3,20,11,18
C,14,4,16,12,16
D,10,10,15,6,24


In [17]:
df.drop('Y',axis=1)

Unnamed: 0,V,W,X,Z
A,5,11,9,15
B,25,3,20,18
C,14,4,16,16
D,10,10,15,24
E,19,25,19,15


In [18]:
df.dropna()

Unnamed: 0,V,W,X,Y,Z
A,5,11,9,19,15
B,25,3,20,11,18
C,14,4,16,12,16
D,10,10,15,6,24
E,19,25,19,9,15


In [19]:
df.size

25

In [20]:
df.shape

(5, 5)

#### Adding a column/row

In [21]:
df['K'] = [1,2,3,4,5]

In [22]:
df

Unnamed: 0,V,W,X,Y,Z,K
A,5,11,9,19,15,1
B,25,3,20,11,18,2
C,14,4,16,12,16,3
D,10,10,15,6,24,4
E,19,25,19,9,15,5


In [23]:
df['K'] = df['K']+df['K']

In [24]:
df

Unnamed: 0,V,W,X,Y,Z,K
A,5,11,9,19,15,2
B,25,3,20,11,18,4
C,14,4,16,12,16,6
D,10,10,15,6,24,8
E,19,25,19,9,15,10


In [25]:
df['L'] = df['W']+df['Y']
print(df)

    V   W   X   Y   Z   K   L
A   5  11   9  19  15   2  30
B  25   3  20  11  18   4  14
C  14   4  16  12  16   6  16
D  10  10  15   6  24   8  16
E  19  25  19   9  15  10  34


### Conditions

In [26]:
df[df['V']>15]

Unnamed: 0,V,W,X,Y,Z,K,L
B,25,3,20,11,18,4,14
E,19,25,19,9,15,10,34


In [36]:
boole = df.loc['E']>10

In [40]:
ndf =df.to_numpy()

In [55]:
df[df[['V','W']]>5]

Unnamed: 0,V,W,X,Y,Z,K,L
A,,11.0,,,,,
B,25.0,,,,,,
C,14.0,,,,,,
D,10.0,10.0,,,,,
E,19.0,25.0,,,,,


### Selecting other columns after applying the condition to another column 

In [83]:
df[df['W']>10]['V']

A     5
E    19
Name: V, dtype: int64

In [60]:
df[df['W']>10][['V','W']]

Unnamed: 0,V,W
A,5,11
E,19,25


In [61]:
#Applying multiple conditions

In [70]:
#You have to use an & operator in place of standard 'and', 
#and | operator in place of stanrad 'or'
#BOTH the conditions need to be PLACED IN TWO SEPARATE BRACKETS

In [71]:
df[(df['W']>7) & (df['V']<15)]

Unnamed: 0,V,W,X,Y,Z,K,L
A,5,11,9,19,15,2,30
D,10,10,15,6,24,8,16


In [78]:
df[(df['W']>7) & (df['X']<20)]

Unnamed: 0,V,W,X,Y,Z,K,L
A,5,11,9,19,15,2,30
D,10,10,15,6,24,8,16
E,19,25,19,9,15,10,34


### Resetting and setting new index

In [84]:
#To reset an index, I can use the method 'reset_index' and if I choose 'inplace = True',
#it will permanently change the index names

In [87]:
df.reset_index()

Unnamed: 0,index,V,W,X,Y,Z,K,L
0,A,5,11,9,19,15,2,30
1,B,25,3,20,11,18,4,14
2,C,14,4,16,12,16,6,16
3,D,10,10,15,6,24,8,16
4,E,19,25,19,9,15,10,34


In [89]:
df.reset_index(drop=True) #This will not allow the old index to be made into a new column either

Unnamed: 0,V,W,X,Y,Z,K,L
0,5,11,9,19,15,2,30
1,25,3,20,11,18,4,14
2,14,4,16,12,16,6,16
3,10,10,15,6,24,8,16
4,19,25,19,9,15,10,34


In [90]:
#To set a new index, I can use the method 'set_index'

In [92]:
#I am creating a new list using split method instead of using commas because it is quicker
#this way

In [102]:
Names = ('Anil Ashok Priya Sam Rahim').split()

In [103]:
Names

['Anil', 'Ashok', 'Priya', 'Sam', 'Rahim']

In [107]:
#Step 1 is to put this list as a column
df['Names']=Names

In [108]:
df

Unnamed: 0,V,W,X,Y,Z,K,L,Names
A,5,11,9,19,15,2,30,Anil
B,25,3,20,11,18,4,14,Ashok
C,14,4,16,12,16,6,16,Priya
D,10,10,15,6,24,8,16,Sam
E,19,25,19,9,15,10,34,Rahim


In [113]:
df.set_index('Names',inplace=True)

In [117]:
df.drop(['L','Z','K'],axis=1,inplace=True)

In [118]:
df

Unnamed: 0_level_0,V,W,X,Y
Names,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Anil,5,11,9,19
Ashok,25,3,20,11
Priya,14,4,16,12
Sam,10,10,15,6
Rahim,19,25,19,9


In [120]:
#I can also change the column names using the set_axis method and mentioning that it is axis =1

In [121]:
df.set_axis([1,2,3,4],axis=1)

Unnamed: 0_level_0,1,2,3,4
Names,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Anil,5,11,9,19
Ashok,25,3,20,11
Priya,14,4,16,12
Sam,10,10,15,6
Rahim,19,25,19,9


In [124]:
#I can also change the row names using the same trick. Here, I have specified axis=0

In [125]:
df.set_axis([1,2,3,4,5],axis=0)

Unnamed: 0,V,W,X,Y
1,5,11,9,19
2,25,3,20,11
3,14,4,16,12
4,10,10,15,6
5,19,25,19,9
