In [4]:
import pandas as pd

In [5]:
data = [1,2,3,4,5,6,7,8,9]
s1 = pd.Series(data)
s1

0    1
1    2
2    3
3    4
4    5
5    6
6    7
7    8
8    9
dtype: int64

In [6]:
type(s1)

pandas.core.series.Series

In [7]:
s1.values

array([1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int64)

In [8]:
s1.index

RangeIndex(start=0, stop=9, step=1)

In [9]:
data = [1,2,3,4,5]
s2 = pd.Series(data)
s2

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [10]:
s2.index = ['a','b','c','d','e']
s2

a    1
b    2
c    3
d    4
e    5
dtype: int64

In [13]:
s2[3] #reverse indexing is not possible

4

In [14]:
s2['c']#always write the index inside this bracket 

3

In [15]:
s2['c'] = 5
s2

a    1
b    2
c    5
d    4
e    5
dtype: int64

In [16]:
s2['g'] = 5   # if the index is not in the series it will append if any value is assigned to it
s2

a    1
b    2
c    5
d    4
e    5
g    5
dtype: int64

In [17]:
1 in s1.values

True

In [18]:
'c' in s2.index

True

In [23]:
s1[1:7:2]

1    2
3    4
5    6
dtype: int64

In [24]:
data = {'jaipur' : 'rajasthan',
        'mumbai' : 'maharashtra',
        'kolkata' : 'west bengal',
        'bengluru' : 'karnataka',
        'chandigarh' : 'punjab'}
s2 = pd.Series(data)
s2

jaipur          rajasthan
mumbai        maharashtra
kolkata       west bengal
bengluru        karnataka
chandigarh         punjab
dtype: object

In [25]:
s2.name = 'capitals and states'
s2.index.name = 'capitals'
s2

capitals
jaipur          rajasthan
mumbai        maharashtra
kolkata       west bengal
bengluru        karnataka
chandigarh         punjab
Name: capitals and states, dtype: object

In [26]:
s2['jaipur']

'rajasthan'

In [27]:
s2[['jaipur', 'kolkata', 'mumbai']]

capitals
jaipur       rajasthan
kolkata    west bengal
mumbai     maharashtra
Name: capitals and states, dtype: object

In [28]:
s2[['jaipur']]

capitals
jaipur    rajasthan
Name: capitals and states, dtype: object

In [30]:
capitals = ['jaipur', 'kolkata', 'mumbai', 'delhi']  # NaN means not a number it is similar to null
s3 = pd.Series(s2,index = capitals)
s3

jaipur       rajasthan
kolkata    west bengal
mumbai     maharashtra
delhi              NaN
Name: capitals and states, dtype: object

In [31]:
s3.isnull()

jaipur     False
kolkata    False
mumbai     False
delhi       True
Name: capitals and states, dtype: bool

In [32]:
s3.notnull()

jaipur      True
kolkata     True
mumbai      True
delhi      False
Name: capitals and states, dtype: bool

In [33]:
s3.duplicated()

jaipur     False
kolkata    False
mumbai     False
delhi      False
Name: capitals and states, dtype: bool

In [34]:
s3.duplicated().sum()

0

In [35]:
s2 = pd.Series(['india', 'australia', 'england'], index = ['cricket', 'cricket', 'cricket'])
s2

cricket        india
cricket    australia
cricket      england
dtype: object

In [36]:
s2['cricket']

cricket        india
cricket    australia
cricket      england
dtype: object

# Data Frames

In [37]:
student1 = pd.Series({'name' : 'pranav', 'id' : '1'})
student2 = pd.Series({'name' : 'yashraj', 'id' : '2'})
student3 = pd.Series({'name' : 'diptesh', 'id' : '3'})   # first method to create dataframe

In [38]:
df1 = pd.DataFrame([student1,student2,student3], index = [101,102,103])
df1

Unnamed: 0,name,id
101,pranav,1
102,yashraj,2
103,diptesh,3


In [44]:
data_1 = {'name' : ['pranav','vipul', 'yasha'], 'Id' : [1,2,3]}
df2 = pd.DataFrame(data, index=[101,102,103])    # second method to create dataframe
df2

Unnamed: 0,name,Id
101,pranav,1
102,vipul,2
103,yasha,3


In [45]:
df3 = pd.DataFrame(data = data_1, index = [101,102,103], columns = ['name', 'id', 'age'])
df3           # third method to create dataframe

Unnamed: 0,name,id,age
101,pranav,,
102,vipul,,
103,yasha,,


In [46]:
df3.index

Int64Index([101, 102, 103], dtype='int64')

In [47]:
df3.values

array([['pranav', nan, nan],
       ['vipul', nan, nan],
       ['yasha', nan, nan]], dtype=object)

In [50]:
df3.columns

Index(['name', 'id', 'age'], dtype='object')

In [51]:
df3.age = [23,54,13]

In [52]:
df3.age

101    23
102    54
103    13
Name: age, dtype: int64

In [54]:
df3.id = [1,2,3]

In [55]:
df3

Unnamed: 0,name,id,age
101,pranav,1,23
102,vipul,2,54
103,yasha,3,13


In [56]:
df3.iloc[0]

name    pranav
id           1
age         23
Name: 101, dtype: object

In [57]:
df3.loc[101]

name    pranav
id           1
age         23
Name: 101, dtype: object

In [58]:
df3[1:3]

Unnamed: 0,name,id,age
102,vipul,2,54
103,yasha,3,13


In [60]:
df3[:]

Unnamed: 0,name,id,age
101,pranav,1,23
102,vipul,2,54
103,yasha,3,13


In [61]:
# how to access specific values

df3.loc[101]['name']

'pranav'

In [64]:
df3.loc[102]['name']

'vipul'

In [65]:
df3['name'][101]   #this is noly allowed bcoz loc is not written here

'pranav'

In [66]:
df3.loc[[101,102], ['name', 'age']]

Unnamed: 0,name,age
101,pranav,23
102,vipul,54


In [67]:
df3.iloc[0]

name    pranav
id           1
age         23
Name: 101, dtype: object

In [68]:
df3['name']

101    pranav
102     vipul
103     yasha
Name: name, dtype: object

In [69]:
type(df3)

pandas.core.frame.DataFrame

In [70]:
type(df3['name'])

pandas.core.series.Series

In [71]:
df3.iloc[[0,1]]['name']

101    pranav
102     vipul
Name: name, dtype: object

In [72]:
df3.loc[:103,['name','age']]

Unnamed: 0,name,age
101,pranav,23
102,vipul,54
103,yasha,13


In [73]:
df3.head(3)

Unnamed: 0,name,id,age
101,pranav,1,23
102,vipul,2,54
103,yasha,3,13


In [74]:
df3.tail(2)

Unnamed: 0,name,id,age
102,vipul,2,54
103,yasha,3,13


In [76]:
df3.sort_index(ascending = False)

Unnamed: 0,name,id,age
103,yasha,3,13
102,vipul,2,54
101,pranav,1,23


In [79]:
df3.sort_values(by  = 'age')

Unnamed: 0,name,id,age
103,yasha,3,13
101,pranav,1,23
102,vipul,2,54


In [103]:
pd.concat([df3,df3])

Unnamed: 0,name,id,age
101,pranav,1,23
102,vipul,2,54
103,yasha,3,13
101,pranav,1,23
102,vipul,2,54
103,yasha,3,13


In [104]:
pd.concat([df3,df3], axis = 1)

Unnamed: 0,name,id,age,name.1,id.1,age.1
101,pranav,1,23,pranav,1,23
102,vipul,2,54,vipul,2,54
103,yasha,3,13,yasha,3,13


In [98]:
df3[df3.age > 20]

Unnamed: 0,name,id,age
101,pranav,1,23
102,vipul,2,54


In [99]:
df3[df3.age > 20]['name']

101    pranav
102     vipul
Name: name, dtype: object

In [101]:
df3.query('age>20')

Unnamed: 0,name,id,age
101,pranav,1,23
102,vipul,2,54


In [81]:
df3['name'].unique()

array(['pranav', 'vipul', 'yasha'], dtype=object)

In [82]:
df3['name'].nunique()

3

In [83]:
df3.dropna()

Unnamed: 0,name,id,age
101,pranav,1,23
102,vipul,2,54
103,yasha,3,13


In [84]:
df3.fillna(5)

Unnamed: 0,name,id,age
101,pranav,1,23
102,vipul,2,54
103,yasha,3,13


In [96]:
df3.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3 entries, 101 to 103
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   name    3 non-null      object
 1   id      3 non-null      int64 
 2   age     3 non-null      int64 
dtypes: int64(2), object(1)
memory usage: 204.0+ bytes


In [85]:
import numpy as np
data = {'A' : np.arange(11,21),
        'B' : np.arange(21,31),
        'C' : np.arange(31,41)}
df = pd.DataFrame(data)
df

Unnamed: 0,A,B,C
0,11,21,31
1,12,22,32
2,13,23,33
3,14,24,34
4,15,25,35
5,16,26,36
6,17,27,37
7,18,28,38
8,19,29,39
9,20,30,40


In [86]:
df.sum()

A    155
B    255
C    355
dtype: int64

In [87]:
df.sum(axis = 1) # sum along the rows by default its axis is set to 0

0    63
1    66
2    69
3    72
4    75
5    78
6    81
7    84
8    87
9    90
dtype: int64

In [88]:
df.mean()

A    15.5
B    25.5
C    35.5
dtype: float64

In [89]:
df.mean(axis = 1)

0    21.0
1    22.0
2    23.0
3    24.0
4    25.0
5    26.0
6    27.0
7    28.0
8    29.0
9    30.0
dtype: float64

In [90]:
df.min() # same can be done along the row using axis  = 1

A    11
B    21
C    31
dtype: int32

In [91]:
df.var()

A    9.166667
B    9.166667
C    9.166667
dtype: float64

In [92]:
df.std() #standard deviation

A    3.02765
B    3.02765
C    3.02765
dtype: float64

In [93]:
df.median()

A    15.5
B    25.5
C    35.5
dtype: float64

In [94]:
df.describe()

Unnamed: 0,A,B,C
count,10.0,10.0,10.0
mean,15.5,25.5,35.5
std,3.02765,3.02765,3.02765
min,11.0,21.0,31.0
25%,13.25,23.25,33.25
50%,15.5,25.5,35.5
75%,17.75,27.75,37.75
max,20.0,30.0,40.0


In [105]:
df.corr()

Unnamed: 0,A,B,C
A,1.0,1.0,1.0
B,1.0,1.0,1.0
C,1.0,1.0,1.0
