In [2]:
import numpy as np
import pandas as pd

In [3]:
s = pd.Series([2,3,4,5,'ab'], index = ['a','b','c','d','e']) # key becomes index
print(s)

a     2
b     3
c     4
d     5
e    ab
dtype: object


In [4]:
s = pd.Series({'a':2,'b':3,'d':5})
print(s)

a    2
b    3
d    5
dtype: int64


In [5]:
#broadcasting
s = pd.Series(5,index = ['a','b','c','d','e'])
print(s)

a    5
b    5
c    5
d    5
e    5
dtype: int64


In [6]:
#here 'a' , 'b' .. is explicit indexing
#it also have 0,1,2.. implicit indexing 
s = pd.Series({'a':2,'b':3,'d':5,'e':6,'f':8})
print(s['a'])
print(s[1])

2
3


In [7]:
#in implicit indexing it not takes second number inclusively
s[0:3]

a    2
b    3
d    5
dtype: int64

In [8]:
#in implicit indexing it takes second number inclusively
s['a':'e']

a    2
b    3
d    5
e    6
dtype: int64

In [9]:
s = pd.Series([1,2,3,4,5],index = [2,3,4,5,6])
print(s)

2    1
3    2
4    3
5    4
6    5
dtype: int64


In [10]:
# By default it takes explicit index
s[2]

1

In [11]:
s.iloc[3] # on which index 3 is present

4

In [12]:
s.loc[2] # on 2nd index which element is present

1

In [13]:
#support fancy indexing
s[[2,4,5]]

2    1
4    3
5    4
dtype: int64

In [14]:
s

2    1
3    2
4    3
5    4
6    5
dtype: int64

In [15]:
s>3

2    False
3    False
4    False
5     True
6     True
dtype: bool

In [16]:
#masking for getting the value
s[s>3]

5    4
6    5
dtype: int64

In [17]:
# here we cant use 'and' bcoz its logical operator
#and here '&' is bitwise operator
s[(s>2) & (s<4)]

4    3
dtype: int64

In [18]:
#shows all index
s.index

Int64Index([2, 3, 4, 5, 6], dtype='int64')

In [19]:
#DataFrame

In [20]:
df = pd.DataFrame(np.random.randint(0,10,(3,3)))
df

Unnamed: 0,0,1,2
0,4,9,3
1,5,1,5
2,9,5,2


In [21]:
df = pd.DataFrame(np.random.randint(0,10,(3,3)), index = ['a','b','c'], columns=['ab','bc','ac'])
df

Unnamed: 0,ab,bc,ac
a,2,0,3
b,9,1,3
c,0,5,5


In [22]:
df = pd.DataFrame([[2,3,4],[5,6],[1]])
df

Unnamed: 0,0,1,2
0,2,3.0,4.0
1,5,6.0,
2,1,,


In [23]:
df.columns

RangeIndex(start=0, stop=3, step=1)

In [24]:
df.index

RangeIndex(start=0, stop=3, step=1)

In [25]:
population_dict = {'Bhopal':38332521,'Indore':26448193,'Gandhinagar':19651127,'Ahmedabad':19552860,'Goa':2135}
population = pd.Series(population_dict)

area_dict = {'Bhopal123':48121,'Indore':87845,'Gandhinagar':45416,'Ahmedabad':798456,'Goa':481}
area = pd.Series(area_dict)

data = pd.DataFrame({'pop':population,'area':area})

In [26]:
data

Unnamed: 0,pop,area
Ahmedabad,19552860.0,798456.0
Bhopal,38332521.0,
Bhopal123,,48121.0
Gandhinagar,19651127.0,45416.0
Goa,2135.0,481.0
Indore,26448193.0,87845.0


In [27]:
#creating a new collumn
data['density'] = data['pop']/data['area']
data

Unnamed: 0,pop,area,density
Ahmedabad,19552860.0,798456.0,24.488337
Bhopal,38332521.0,,
Bhopal123,,48121.0,
Gandhinagar,19651127.0,45416.0,432.691717
Goa,2135.0,481.0,4.438669
Indore,26448193.0,87845.0,301.077955


In [28]:
#data['pop'] is no accessing row , it accesses collumn
data['pop']

Ahmedabad      19552860.0
Bhopal         38332521.0
Bhopal123             NaN
Gandhinagar    19651127.0
Goa                2135.0
Indore         26448193.0
Name: pop, dtype: float64

In [29]:
#gives only values respective col and row
data.values

array([[1.95528600e+07, 7.98456000e+05, 2.44883375e+01],
       [3.83325210e+07,            nan,            nan],
       [           nan, 4.81210000e+04,            nan],
       [1.96511270e+07, 4.54160000e+04, 4.32691717e+02],
       [2.13500000e+03, 4.81000000e+02, 4.43866944e+00],
       [2.64481930e+07, 8.78450000e+04, 3.01077955e+02]])

In [30]:
data.iloc[:,1]

Ahmedabad      798456.0
Bhopal              NaN
Bhopal123       48121.0
Gandhinagar     45416.0
Goa               481.0
Indore          87845.0
Name: area, dtype: float64

In [31]:
data.loc['Bhopal':'Gandhinagar']

Unnamed: 0,pop,area,density
Bhopal,38332521.0,,
Bhopal123,,48121.0,
Gandhinagar,19651127.0,45416.0,432.691717


In [32]:
data['pop']

Ahmedabad      19552860.0
Bhopal         38332521.0
Bhopal123             NaN
Gandhinagar    19651127.0
Goa                2135.0
Indore         26448193.0
Name: pop, dtype: float64

In [33]:
data.pop

<bound method DataFrame.pop of                     pop      area     density
Ahmedabad    19552860.0  798456.0   24.488337
Bhopal       38332521.0       NaN         NaN
Bhopal123           NaN   48121.0         NaN
Gandhinagar  19651127.0   45416.0  432.691717
Goa              2135.0     481.0    4.438669
Indore       26448193.0   87845.0  301.077955>

In [34]:
data

Unnamed: 0,pop,area,density
Ahmedabad,19552860.0,798456.0,24.488337
Bhopal,38332521.0,,
Bhopal123,,48121.0,
Gandhinagar,19651127.0,45416.0,432.691717
Goa,2135.0,481.0,4.438669
Indore,26448193.0,87845.0,301.077955


In [35]:
data[data['density']>100]

Unnamed: 0,pop,area,density
Gandhinagar,19651127.0,45416.0,432.691717
Indore,26448193.0,87845.0,301.077955


In [36]:
data[data['area']>545231]

Unnamed: 0,pop,area,density
Ahmedabad,19552860.0,798456.0,24.488337


In [37]:
data.loc[data.density>100,['pop','area']]

Unnamed: 0,pop,area
Gandhinagar,19651127.0,45416.0
Indore,26448193.0,87845.0


In [38]:
data.loc['japan',['pop','area']] = 200,300
data

Unnamed: 0,pop,area,density
Ahmedabad,19552860.0,798456.0,24.488337
Bhopal,38332521.0,,
Bhopal123,,48121.0,
Gandhinagar,19651127.0,45416.0,432.691717
Goa,2135.0,481.0,4.438669
Indore,26448193.0,87845.0,301.077955
japan,200.0,300.0,


In [39]:
data.loc['japan',['pop','area','density']] = 200,300,200/300
data

Unnamed: 0,pop,area,density
Ahmedabad,19552860.0,798456.0,24.488337
Bhopal,38332521.0,,
Bhopal123,,48121.0,
Gandhinagar,19651127.0,45416.0,432.691717
Goa,2135.0,481.0,4.438669
Indore,26448193.0,87845.0,301.077955
japan,200.0,300.0,0.666667


In [40]:
data.loc['Bhopal',['area']] = 200
data

Unnamed: 0,pop,area,density
Ahmedabad,19552860.0,798456.0,24.488337
Bhopal,38332521.0,200.0,
Bhopal123,,48121.0,
Gandhinagar,19651127.0,45416.0,432.691717
Goa,2135.0,481.0,4.438669
Indore,26448193.0,87845.0,301.077955
japan,200.0,300.0,0.666667


In [41]:
data.drop('japan')
#here its removed
# its because it gives new dataframe

Unnamed: 0,pop,area,density
Ahmedabad,19552860.0,798456.0,24.488337
Bhopal,38332521.0,200.0,
Bhopal123,,48121.0,
Gandhinagar,19651127.0,45416.0,432.691717
Goa,2135.0,481.0,4.438669
Indore,26448193.0,87845.0,301.077955


In [42]:
data #but here its available

Unnamed: 0,pop,area,density
Ahmedabad,19552860.0,798456.0,24.488337
Bhopal,38332521.0,200.0,
Bhopal123,,48121.0,
Gandhinagar,19651127.0,45416.0,432.691717
Goa,2135.0,481.0,4.438669
Indore,26448193.0,87845.0,301.077955
japan,200.0,300.0,0.666667


In [43]:
data.drop('japan',inplace=True)

In [44]:
data #permentally removed

Unnamed: 0,pop,area,density
Ahmedabad,19552860.0,798456.0,24.488337
Bhopal,38332521.0,200.0,
Bhopal123,,48121.0,
Gandhinagar,19651127.0,45416.0,432.691717
Goa,2135.0,481.0,4.438669
Indore,26448193.0,87845.0,301.077955


In [45]:
tempdata = data

In [46]:
#removing collumn
tempdata.drop('density',axis=1,inplace=True)

In [47]:
tempdata

Unnamed: 0,pop,area
Ahmedabad,19552860.0,798456.0
Bhopal,38332521.0,200.0
Bhopal123,,48121.0
Gandhinagar,19651127.0,45416.0
Goa,2135.0,481.0
Indore,26448193.0,87845.0


In [48]:
data = pd.Series([2,4,6,8])
data

0    2
1    4
2    6
3    8
dtype: int64

In [49]:
np.power(2,data)

0      4
1     16
2     64
3    256
dtype: int64

In [50]:
np.random.seed(False) # to freeze the random values
df1 = pd.DataFrame(np.random.randint(0,10,(3,3)),columns=list('ABC'))
df1

Unnamed: 0,A,B,C
0,5,0,3
1,3,7,9
2,3,5,2


In [51]:
df2 = pd.DataFrame(np.random.randint(0,10,(3,3)),columns=list('BAD'))
df2

Unnamed: 0,B,A,D
0,4,7,6
1,8,8,1
2,6,7,7


In [52]:
#order doesnt matter it adds value in same column
df1+df2

Unnamed: 0,A,B,C,D
0,12,4,,
1,11,15,,
2,10,11,,


In [53]:
#fills the missing values as 0
df1.add(df2, fill_value=0)

Unnamed: 0,A,B,C,D
0,12,4,3.0,6.0
1,11,15,9.0,1.0
2,10,11,2.0,7.0


In [54]:
df1.mean() #gives means column wise

A    3.666667
B    4.000000
C    4.666667
dtype: float64

In [55]:
df1.add(df2, fill_value=df1.mean())

Unnamed: 0,A,B,C,D
0,12,4,6.666667,9.666667
1,11,15,13.0,5.0
2,10,11,6.666667,11.666667


In [56]:
print(df1)
df1.stack()

   A  B  C
0  5  0  3
1  3  7  9
2  3  5  2


0  A    5
   B    0
   C    3
1  A    3
   B    7
   C    9
2  A    3
   B    5
   C    2
dtype: int32

In [57]:
df1.stack().mean() # global mean

4.111111111111111

In [58]:
population_dict = {'Bhopal':38332521,'Indore':26448193,'Gandhinagar':19651127,'Ahmedabad':19552860,'Goa':2135}
population = pd.Series(population_dict)

area_dict = {'Bhopal123':48121,'Indore':87845,'Gandhinagar':45416,'Ahmedabad':798456,'Goa':481}
area = pd.Series(area_dict)

data = pd.DataFrame({'pop':population,'area':area})
data

Unnamed: 0,pop,area
Ahmedabad,19552860.0,798456.0
Bhopal,38332521.0,
Bhopal123,,48121.0
Gandhinagar,19651127.0,45416.0
Goa,2135.0,481.0
Indore,26448193.0,87845.0


In [59]:
data['density'] = data['pop']/data['area']
data

Unnamed: 0,pop,area,density
Ahmedabad,19552860.0,798456.0,24.488337
Bhopal,38332521.0,,
Bhopal123,,48121.0,
Gandhinagar,19651127.0,45416.0,432.691717
Goa,2135.0,481.0,4.438669
Indore,26448193.0,87845.0,301.077955


In [60]:
data.index

Index(['Ahmedabad', 'Bhopal', 'Bhopal123', 'Gandhinagar', 'Goa', 'Indore'], dtype='object')

In [61]:
data.columns

Index(['pop', 'area', 'density'], dtype='object')

In [63]:
data

Unnamed: 0,pop,area,density
Ahmedabad,19552860.0,798456.0,24.488337
Bhopal,38332521.0,,
Bhopal123,,48121.0,
Gandhinagar,19651127.0,45416.0,432.691717
Goa,2135.0,481.0,4.438669
Indore,26448193.0,87845.0,301.077955


In [62]:
area.index & population.index # both having some values (not NaN)


  area.index & population.index # both having some values (not NaN)


Index(['Indore', 'Gandhinagar', 'Ahmedabad', 'Goa'], dtype='object')