In [3]:
import pandas as pd
import numpy as np

In [4]:
idx = pd.Index([10, 20, 30])

In [5]:
idx

Int64Index([10, 20, 30], dtype='int64')

In [6]:
idx = pd.Index([1, 3.14])

In [7]:
idx

Float64Index([1.0, 3.14], dtype='float64')

In [8]:
idx = pd.Index(['element 1', 'element 2'])

In [9]:
idx

Index(['element 1', 'element 2'], dtype='object')

In [10]:
idx = pd.Index([2, 4, 6, 8, 10])
idx

Int64Index([2, 4, 6, 8, 10], dtype='int64')

In [11]:
idx[0]

2

In [12]:
idx[1:4]

Int64Index([4, 6, 8], dtype='int64')

In [13]:
idx[[1, 3, 4]]

Int64Index([4, 8, 10], dtype='int64')

In [14]:
idx = pd.Index(['London', 'Paris', 'New York', 'Tokyo'])
idx[idx != 'Tokyo']

Index(['London', 'Paris', 'New York'], dtype='object')

In [15]:
try:
    idx[0] = 100
except TypeError as ex:
    print('TypeError: ', ex)

TypeError:  Index does not support mutable operations


In [16]:
idx_1 = pd.Index(['a', 'b', 'c'])
idx_2 = pd.Index(['c', 'd', 'e'])


In [17]:
idx_1 & idx_2


  idx_1 & idx_2


Index(['c'], dtype='object')

In [18]:
idx_1.intersection(idx_2)

Index(['c'], dtype='object')

In [19]:
idx_1 | idx_2

  idx_1 | idx_2


Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

In [20]:
idx_1.union(idx_2)

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

In [21]:
idx_2.union(idx_1)


Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

In [22]:
pd.Index([1, 2, 3]).union(pd.Index([1.1, 2.2, 3.0]))

Float64Index([1.0, 1.1, 2.0, 2.2, 3.0], dtype='float64')

In [23]:
pd.Index(range(2, 10, 2))

RangeIndex(start=2, stop=10, step=2)

In [24]:
idx = pd.RangeIndex(2, 10, 2)
idx

RangeIndex(start=2, stop=10, step=2)

In [25]:
idx

RangeIndex(start=2, stop=10, step=2)

In [26]:
idx[1:4]

RangeIndex(start=4, stop=10, step=2)

In [27]:
idx[::-1]

RangeIndex(start=8, stop=0, step=-2)

In [28]:
idx_1 = pd.RangeIndex(0, 5)
list(idx_1)

[0, 1, 2, 3, 4]

In [29]:
idx_2 = pd.RangeIndex(4, 8)
list(idx_2)

[4, 5, 6, 7]

In [30]:
idx_1.intersection(idx_2)

RangeIndex(start=4, stop=5, step=1)

In [31]:
list(idx_1.union(idx_2))


[0, 1, 2, 3, 4, 5, 6, 7]

In [32]:
list(pd.RangeIndex(1, 10, 2).union(pd.RangeIndex(1, 10, 3)))

[1, 3, 4, 5, 7, 9]

In [33]:
idx_1 = pd.Index(['a', 'b', 'c'])
idx_2 = pd.RangeIndex(0, 10, 2)


In [34]:
'b' in idx_1

True

In [35]:
8 in idx_2

True

In [36]:
10 in idx_2

False

In [37]:
idx = pd.Index([1, 1, 2, 2, 3, 3])

In [38]:
idx

Int64Index([1, 1, 2, 2, 3, 3], dtype='int64')

In [39]:
s = pd.Series([10, 20, 30], index=['a', 'b', 'c'])

In [40]:
s

a    10
b    20
c    30
dtype: int64

In [41]:
s['a']

10

In [42]:
s['c'] = 100

In [43]:
s

a     10
b     20
c    100
dtype: int64

In [44]:
s['d'] = 500

In [45]:
s

a     10
b     20
c    100
d    500
dtype: int64

In [46]:
capitals = {
    'USA': 'Washington D.C.',
    'Canada': 'Ottawa',
    'UK': 'London',
    'France': 'Paris'
}

In [47]:
s = pd.Series(capitals)

In [48]:
s

USA       Washington D.C.
Canada             Ottawa
UK                 London
France              Paris
dtype: object

In [49]:
s.index

Index(['USA', 'Canada', 'UK', 'France'], dtype='object')

In [50]:
s.values

array(['Washington D.C.', 'Ottawa', 'London', 'Paris'], dtype=object)

In [51]:
type(s.values)

numpy.ndarray

In [52]:
s.items()

<zip at 0x2c40451cbc0>

In [53]:
list(s.items())

[('USA', 'Washington D.C.'),
 ('Canada', 'Ottawa'),
 ('UK', 'London'),
 ('France', 'Paris')]

In [54]:
areas = pd.Series(
    ['USA', 'Topeka', 'France', 'Lyon', 'UK', 'Glasgow'],
    index=['country', 'city', 'country', 'city', 'country', 'city']
)

In [55]:
areas

country        USA
city        Topeka
country     France
city          Lyon
country         UK
city       Glasgow
dtype: object

In [56]:
areas['city']

city     Topeka
city       Lyon
city    Glasgow
dtype: object

In [57]:
areas[5]

'Glasgow'

In [58]:
areas[1] = 'Rom'

In [59]:
areas

country        USA
city           Rom
country     France
city          Lyon
country         UK
city       Glasgow
dtype: object

In [60]:
s = pd.Series([10, 20, 30, 40, 50], index=list('abcde'))

In [61]:
s['a':'d']

a    10
b    20
c    30
d    40
dtype: int64

In [62]:
s[:3]

a    10
b    20
c    30
dtype: int64

In [63]:
s[['a', 'c', 'd']]

a    10
c    30
d    40
dtype: int64

In [64]:
s = pd.Series([100, 200, 300], index=[10, 20, 30])

In [65]:
s

10    100
20    200
30    300
dtype: int64

In [66]:
s[10]

100

In [67]:
s[100]

KeyError: 100

In [68]:
s[0]

KeyError: 0

In [69]:
try:
    s[0]
except KeyError as ex:
    print('KeyError: ', ex)

KeyError:  0


In [70]:
s[0:3]

10    100
20    200
30    300
dtype: int64

In [71]:
s.iloc[0]

100

In [72]:
s.loc[10]

100

In [73]:
s.iloc[0:4]

10    100
20    200
30    300
dtype: int64

In [74]:
s.loc[10:30]

10    100
20    200
30    300
dtype: int64

In [75]:
s

10    100
20    200
30    300
dtype: int64

In [76]:
s.name = 'test'

In [77]:
s

10    100
20    200
30    300
Name: test, dtype: int64

In [78]:
areas = pd.Series(
    ['USA', 'Topeka', 'France', 'Lyon', 'UK', 'Glasgow'],
    index=['country', 'city', 'country', 'city', 'country', 'city'],
    name='Areas'
)
areas

country        USA
city        Topeka
country     France
city          Lyon
country         UK
city       Glasgow
Name: Areas, dtype: object

In [79]:
areas[areas != 'Glasgow']

country       USA
city       Topeka
country    France
city         Lyon
country        UK
Name: Areas, dtype: object

In [80]:
s = pd.Series([10, 20, 30], index=list('abc'), name='test')

In [81]:
s

a    10
b    20
c    30
Name: test, dtype: int64

In [82]:
new = s.drop(['a', 'c'])

In [83]:
new

b    20
Name: test, dtype: int64

In [84]:
s

a    10
b    20
c    30
Name: test, dtype: int64

In [85]:
s.index

Index(['a', 'b', 'c'], dtype='object')

In [86]:
s.index[[0, 2]]

Index(['a', 'c'], dtype='object')

In [87]:
s.drop(s.index[[0, 2]])

b    20
Name: test, dtype: int64

In [88]:
s

a    10
b    20
c    30
Name: test, dtype: int64

In [89]:
columns = pd.Index(
    [
        'The Bronx',
        'Brooklyn',
        'Manhattan',
        'Queens',
        'Staten Island'
    ]
)
counties = pd.Series(
    ['Bronx', 'Kings', 'New York', 'Queens', 'Richmond'],
    index=columns,
    name='county'
)
populations = pd.Series(
    [1_418_207, 2_559_903, 1_628_706, 2_253_858, 476_143],
    index=columns,
    name='population'
)
gdp = pd.Series(
    [42.695, 91.559, 600.244, 93.310, 14.514],
    index=columns,
    name='gdp'
)
areas = pd.Series(
    [42.10, 70.82, 22.83, 108.53, 58.37],
    index=columns,
    name='area'
)

In [90]:
new_york = pd.DataFrame([counties, populations, gdp, areas])
new_york

Unnamed: 0,The Bronx,Brooklyn,Manhattan,Queens,Staten Island
county,Bronx,Kings,New York,Queens,Richmond
population,1418207,2559903,1628706,2253858,476143
gdp,42.695,91.559,600.244,93.31,14.514
area,42.1,70.82,22.83,108.53,58.37


In [91]:
new_york.transpose()

Unnamed: 0,county,population,gdp,area
The Bronx,Bronx,1418207,42.695,42.1
Brooklyn,Kings,2559903,91.559,70.82
Manhattan,New York,1628706,600.244,22.83
Queens,Queens,2253858,93.31,108.53
Staten Island,Richmond,476143,14.514,58.37


In [92]:
d = {
    'county': counties,
    'population': populations,
    'gdp': gdp,
    'area': areas
}
new_york = pd.DataFrame(d)
new_york

Unnamed: 0,county,population,gdp,area
The Bronx,Bronx,1418207,42.695,42.1
Brooklyn,Kings,2559903,91.559,70.82
Manhattan,New York,1628706,600.244,22.83
Queens,Queens,2253858,93.31,108.53
Staten Island,Richmond,476143,14.514,58.37


In [93]:
new_york.transpose()

Unnamed: 0,The Bronx,Brooklyn,Manhattan,Queens,Staten Island
county,Bronx,Kings,New York,Queens,Richmond
population,1418207,2559903,1628706,2253858,476143
gdp,42.695,91.559,600.244,93.31,14.514
area,42.1,70.82,22.83,108.53,58.37


In [94]:
counties = {
    'The Bronx': 'Bronx',
    'Brooklyn': 'Kings',
    'Manhattan': 'New York',
    'Queens': 'Queens',
    'Staten Island': 'Richmond'
}
populations = {
    # note how the keys are not necessarily in the same order
    'Manhattan': 1_628_706,
    'Queens': 2_253_858,
    'Staten Island': 476_143,
    'The Bronx': 1_418_207,
    'Brooklyn': 2_559_903
}
gdp = {
    'The Bronx': 42.695,
    'Brooklyn': 91.559,
    'Manhattan': 600.244,
    'Queens': 93.310,
    'Staten Island': 14.514
}
areas = {
    'The Bronx': 2.10,
    'Brooklyn': 70.82,
    'Manhattan': 22.83,
    'Queens': 108.53,
    'Staten Island': 58.37
}

d = {
    'county': counties,
    'population': populations,
    'gpd': gdp,
    'area': areas
}

In [95]:
new_york = pd.DataFrame(d)
new_york

Unnamed: 0,county,population,gpd,area
The Bronx,Bronx,1418207,42.695,2.1
Brooklyn,Kings,2559903,91.559,70.82
Manhattan,New York,1628706,600.244,22.83
Queens,Queens,2253858,93.31,108.53
Staten Island,Richmond,476143,14.514,58.37


In [96]:
import numpy as np
import pandas as pd

new_york = pd.DataFrame([counties, populations, gdp, areas])
new_york

Unnamed: 0,The Bronx,Brooklyn,Manhattan,Queens,Staten Island
0,Bronx,Kings,New York,Queens,Richmond
1,1418207,2559903,1628706,2253858,476143
2,42.695,91.559,600.244,93.31,14.514
3,2.1,70.82,22.83,108.53,58.37


In [97]:
new_york.rename(index={0: 'county', 1: 'population', 2: 'gdp', 3: 'area'})

Unnamed: 0,The Bronx,Brooklyn,Manhattan,Queens,Staten Island
county,Bronx,Kings,New York,Queens,Richmond
population,1418207,2559903,1628706,2253858,476143
gdp,42.695,91.559,600.244,93.31,14.514
area,2.1,70.82,22.83,108.53,58.37


In [98]:
new_york

Unnamed: 0,The Bronx,Brooklyn,Manhattan,Queens,Staten Island
0,Bronx,Kings,New York,Queens,Richmond
1,1418207,2559903,1628706,2253858,476143
2,42.695,91.559,600.244,93.31,14.514
3,2.1,70.82,22.83,108.53,58.37


In [100]:
new_york.rename(index={0: 'county', 1: 'population', 2: 'gdp', 3: 'area'}).transpose()


Unnamed: 0,county,population,gdp,area
The Bronx,Bronx,1418207,42.695,2.1
Brooklyn,Kings,2559903,91.559,70.82
Manhattan,New York,1628706,600.244,22.83
Queens,Queens,2253858,93.31,108.53
Staten Island,Richmond,476143,14.514,58.37


In [101]:
new_york.transpose()

Unnamed: 0,0,1,2,3
The Bronx,Bronx,1418207,42.695,2.1
Brooklyn,Kings,2559903,91.559,70.82
Manhattan,New York,1628706,600.244,22.83
Queens,Queens,2253858,93.31,108.53
Staten Island,Richmond,476143,14.514,58.37


In [103]:
new_york = new_york.rename(
    columns={0: 'county', 1: 'population', 2: 'gdp', 3: 'area'}
)
new_york

Unnamed: 0,The Bronx,Brooklyn,Manhattan,Queens,Staten Island
0,Bronx,Kings,New York,Queens,Richmond
1,1418207,2559903,1628706,2253858,476143
2,42.695,91.559,600.244,93.31,14.514
3,2.1,70.82,22.83,108.53,58.37


In [104]:
burroughs = ['The Bronx', 'Brooklyn', 'Manhattan', 'Queens', 'Staten Island']
counties = ['Bronx', 'Kings', 'New York', 'Queens', 'Richmond']
populations = [1_418_207, 2_559_903, 1_628_706, 2_253_858, 476_143]
gdp = [42.695, 91.559, 600.244, 93.310, 14.514]
areas = [42.10, 70.82, 22.83, 108.53, 58.37]

In [105]:
data = [burroughs, counties, populations, gdp, areas]

In [106]:
data

[['The Bronx', 'Brooklyn', 'Manhattan', 'Queens', 'Staten Island'],
 ['Bronx', 'Kings', 'New York', 'Queens', 'Richmond'],
 [1418207, 2559903, 1628706, 2253858, 476143],
 [42.695, 91.559, 600.244, 93.31, 14.514],
 [42.1, 70.82, 22.83, 108.53, 58.37]]

In [117]:
new_york = pd.DataFrame(
    data,
    index=['BURROUGHS', 'COUNTY', 'POPULATION', 'GDP', 'AREA']
)

In [118]:
new_york

Unnamed: 0,0,1,2,3,4
BURROUGHS,The Bronx,Brooklyn,Manhattan,Queens,Staten Island
COUNTY,Bronx,Kings,New York,Queens,Richmond
POPULATION,1418207,2559903,1628706,2253858,476143
GDP,42.695,91.559,600.244,93.31,14.514
AREA,42.1,70.82,22.83,108.53,58.37


In [120]:
new_york = new_york.transpose()
new_york

Unnamed: 0,BURROUGHS,COUNTY,POPULATION,GDP,AREA
0,The Bronx,Bronx,1418207,42.695,42.1
1,Brooklyn,Kings,2559903,91.559,70.82
2,Manhattan,New York,1628706,600.244,22.83
3,Queens,Queens,2253858,93.31,108.53
4,Staten Island,Richmond,476143,14.514,58.37


In [121]:
new_york.set_index('BURROUGHS')

Unnamed: 0_level_0,COUNTY,POPULATION,GDP,AREA
BURROUGHS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
The Bronx,Bronx,1418207,42.695,42.1
Brooklyn,Kings,2559903,91.559,70.82
Manhattan,New York,1628706,600.244,22.83
Queens,Queens,2253858,93.31,108.53
Staten Island,Richmond,476143,14.514,58.37


In [122]:
new_york.set_index('COUNTY')

Unnamed: 0_level_0,BURROUGHS,POPULATION,GDP,AREA
COUNTY,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bronx,The Bronx,1418207,42.695,42.1
Kings,Brooklyn,2559903,91.559,70.82
New York,Manhattan,1628706,600.244,22.83
Queens,Queens,2253858,93.31,108.53
Richmond,Staten Island,476143,14.514,58.37


In [123]:
new_york.columns

Index(['BURROUGHS', 'COUNTY', 'POPULATION', 'GDP', 'AREA'], dtype='object')

In [124]:
new_york.index.name = None
new_york

Unnamed: 0,BURROUGHS,COUNTY,POPULATION,GDP,AREA
0,The Bronx,Bronx,1418207,42.695,42.1
1,Brooklyn,Kings,2559903,91.559,70.82
2,Manhattan,New York,1628706,600.244,22.83
3,Queens,Queens,2253858,93.31,108.53
4,Staten Island,Richmond,476143,14.514,58.37


In [125]:
new_york.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   BURROUGHS   5 non-null      object
 1   COUNTY      5 non-null      object
 2   POPULATION  5 non-null      object
 3   GDP         5 non-null      object
 4   AREA        5 non-null      object
dtypes: object(5)
memory usage: 328.0+ bytes


In [127]:
new_york

Unnamed: 0,BURROUGHS,COUNTY,POPULATION,GDP,AREA
0,The Bronx,Bronx,1418207,42.695,42.1
1,Brooklyn,Kings,2559903,91.559,70.82
2,Manhattan,New York,1628706,600.244,22.83
3,Queens,Queens,2253858,93.31,108.53
4,Staten Island,Richmond,476143,14.514,58.37


In [128]:
new_york

Unnamed: 0,BURROUGHS,COUNTY,POPULATION,GDP,AREA
0,The Bronx,Bronx,1418207,42.695,42.1
1,Brooklyn,Kings,2559903,91.559,70.82
2,Manhattan,New York,1628706,600.244,22.83
3,Queens,Queens,2253858,93.31,108.53
4,Staten Island,Richmond,476143,14.514,58.37


In [129]:
new_df = new_york.drop(columns='COUNTY')

In [130]:
new_df

Unnamed: 0,BURROUGHS,POPULATION,GDP,AREA
0,The Bronx,1418207,42.695,42.1
1,Brooklyn,2559903,91.559,70.82
2,Manhattan,1628706,600.244,22.83
3,Queens,2253858,93.31,108.53
4,Staten Island,476143,14.514,58.37


In [134]:
new_york.set_index('BURROUGHS')
new_df = new_york.drop(
    index=[1, 3]
)

In [136]:
new_df

Unnamed: 0,BURROUGHS,COUNTY,POPULATION,GDP,AREA
0,The Bronx,Bronx,1418207,42.695,42.1
2,Manhattan,New York,1628706,600.244,22.83
4,Staten Island,Richmond,476143,14.514,58.37


In [138]:
df = pd.DataFrame(
    np.arange(9).reshape(3, 3),
    index=list('ABC'),
    columns=list('abc')
)

In [139]:
df

Unnamed: 0,a,b,c
A,0,1,2
B,3,4,5
C,6,7,8


In [140]:
df.rename(
    columns={'a': 'aa', 'b': 'bb', 'c': 'cc'},
    index={'A': 'AA', 'B': 'BB', 'C': 'CC'}
)

Unnamed: 0,aa,bb,cc
AA,0,1,2
BB,3,4,5
CC,6,7,8


In [141]:
df

Unnamed: 0,a,b,c
A,0,1,2
B,3,4,5
C,6,7,8


In [142]:
df.rename(
    columns={'a': 'AA'}
)

Unnamed: 0,AA,b,c
A,0,1,2
B,3,4,5
C,6,7,8


In [146]:
arr = np.arange(1, 10).reshape(3, 3)
df = pd.DataFrame(
    arr,
    index=['r1', 'r2', 'r3'],
    columns=['c1', 'c2', 'c3']
)

In [147]:
df

Unnamed: 0,c1,c2,c3
r1,1,2,3
r2,4,5,6
r3,7,8,9


In [148]:
df.index

Index(['r1', 'r2', 'r3'], dtype='object')

In [149]:
df['c2']

r1    2
r2    5
r3    8
Name: c2, dtype: int32

In [152]:
try:
    df[0]
except  KeyError as ex:
    print('KeyError:', ex)

KeyError: 0


In [153]:
df['c2'][1]

5

In [156]:
df['c2']['r1']

2

In [157]:
df.values[1, 2]

6

In [158]:
df.iloc[1, 2]

6

In [159]:
df

Unnamed: 0,c1,c2,c3
r1,1,2,3
r2,4,5,6
r3,7,8,9


In [160]:
df.loc['r2', 'c3']

6

In [161]:
print(df)
df.loc['r1':'r2', 'c2':'c3']

    c1  c2  c3
r1   1   2   3
r2   4   5   6
r3   7   8   9


Unnamed: 0,c2,c3
r1,2,3
r2,5,6


In [162]:
print(df)
df.iloc[0:1, 1:2]

    c1  c2  c3
r1   1   2   3
r2   4   5   6
r3   7   8   9


Unnamed: 0,c2
r1,2


In [163]:
df.iloc[:, 1:2]

Unnamed: 0,c2
r1,2
r2,5
r3,8


In [164]:
df.iloc[0:2, :]

Unnamed: 0,c1,c2,c3
r1,1,2,3
r2,4,5,6


In [165]:
df.iloc[0:2]

Unnamed: 0,c1,c2,c3
r1,1,2,3
r2,4,5,6


In [166]:
df.loc[:, ['c1', 'c3']]

Unnamed: 0,c1,c3
r1,1,3
r2,4,6
r3,7,9


In [169]:
df.iloc[:, [0, 2]]

Unnamed: 0,c1,c3
r1,1,3
r2,4,6
r3,7,9


In [170]:
print(df)
tmp = df.iloc[0:2, :]
tmp

    c1  c2  c3
r1   1   2   3
r2   4   5   6
r3   7   8   9


Unnamed: 0,c1,c2,c3
r1,1,2,3
r2,4,5,6


In [171]:
df.iloc[0:2, :].loc[:, ['c1', 'c3']]

Unnamed: 0,c1,c3
r1,1,3
r2,4,6


In [172]:
df

Unnamed: 0,c1,c2,c3
r1,1,2,3
r2,4,5,6
r3,7,8,9


In [173]:
df.iloc[0, 0] = -10

In [174]:
df

Unnamed: 0,c1,c2,c3
r1,-10,2,3
r2,4,5,6
r3,7,8,9


In [175]:
df.loc['r1':'r2', 'c1':'c2']

Unnamed: 0,c1,c2
r1,-10,2
r2,4,5


In [176]:
df.loc['r1':'r2', 'c1':'c2'] = np.array([10, 20, 30, 40]).reshape(2, 2)
df

Unnamed: 0,c1,c2,c3
r1,10,20,3
r2,30,40,6
r3,7,8,9


In [178]:
df.loc['r1':'r2', 'c1':'c2'] = -100
df


Unnamed: 0,c1,c2,c3
r1,-100,-100,3
r2,-100,-100,6
r3,7,8,9


In [179]:
df.loc['r1':'r2', 'c1':'c2'] = [100, 200]


In [180]:
df

Unnamed: 0,c1,c2,c3
r1,100,200,3
r2,100,200,6
r3,7,8,9


In [182]:
float('NaN')

nan

In [183]:
float('nan')

nan

In [184]:
float('nan') == float('NaN')

False

In [186]:
import math

math.isnan(float('NAN'))

True

In [187]:
a = np.array([1, 2, np.nan, 3, np.nan])

In [188]:
a

array([ 1.,  2., nan,  3., nan])

In [190]:
s = pd.Series([3.14, 2.5, None, 5])

In [191]:
s

0    3.14
1    2.50
2     NaN
3    5.00
dtype: float64

In [192]:
type(s[2])

numpy.float64

In [193]:
pd.Series([1, 2, 3, None])

0    1.0
1    2.0
2    3.0
3    NaN
dtype: float64

In [198]:
s = pd.Series(['a', 'b', None, np.nan])


In [199]:
s[2] is None

True

In [201]:
s[3] is None

False

In [202]:
s = pd.Series(['aaa', 'bbb', None, 'ddd', np.nan], index=list('abcde'))

In [203]:
s

a     aaa
b     bbb
c    None
d     ddd
e     NaN
dtype: object

In [204]:
pd.isnull(s)

a    False
b    False
c     True
d    False
e     True
dtype: bool

In [205]:
s[pd.isnull(s)]

c    None
e     NaN
dtype: object

In [206]:
s[~pd.isnull(s)]

a    aaa
b    bbb
d    ddd
dtype: object

In [210]:
s[pd.notnull(s)]

a    aaa
b    bbb
d    ddd
dtype: object

In [211]:
s.dropna()

a    aaa
b    bbb
d    ddd
dtype: object

In [212]:
s

a     aaa
b     bbb
c    None
d     ddd
e     NaN
dtype: object

In [215]:
s.fillna('missing')

a        aaa
b        bbb
c    missing
d        ddd
e    missing
dtype: object

In [216]:
s.fillna(method='ffill')

a    aaa
b    bbb
c    bbb
d    ddd
e    ddd
dtype: object

In [217]:
s.fillna(method='bfill')

a    aaa
b    bbb
c    ddd
d    ddd
e    NaN
dtype: object

In [218]:
s.fillna(method='ffill').fillna(method='bfill')

a    aaa
b    bbb
c    bbb
d    ddd
e    ddd
dtype: object

In [219]:
s=pd.Series([1,2,None,4,None,7])

In [220]:
s

0    1.0
1    2.0
2    NaN
3    4.0
4    NaN
5    7.0
dtype: float64

In [221]:
s.interpolate(method='linear')

0    1.0
1    2.0
2    3.0
3    4.0
4    5.5
5    7.0
dtype: float64

In [222]:
d = {
    'col1': {'row1': 1, 'row2': 10, 'row3': 100, 'row4': 1000, 'row5': 10000},
    'col2': {'row1': 2, 'row2': None, 'row3': None, 'row4': 2000, 'row5': 20000},
    'col3': {'row1': 3, 'row2': 30, 'row3': 300, 'row4': None, 'row5': 40000},
    'col4': {'row1': 4, 'row2': 40, 'row3': 400, 'row4': 4000, 'row5': 40000}
}

In [223]:
df = pd.DataFrame(d)

In [224]:
df

Unnamed: 0,col1,col2,col3,col4
row1,1,2.0,3.0,4
row2,10,,30.0,40
row3,100,,300.0,400
row4,1000,2000.0,,4000
row5,10000,20000.0,40000.0,40000


In [226]:
df.isnull()

Unnamed: 0,col1,col2,col3,col4
row1,False,False,False,False
row2,False,True,False,False
row3,False,True,False,False
row4,False,False,True,False
row5,False,False,False,False


In [227]:
df.fillna(0)

Unnamed: 0,col1,col2,col3,col4
row1,1,2.0,3.0,4
row2,10,0.0,30.0,40
row3,100,0.0,300.0,400
row4,1000,2000.0,0.0,4000
row5,10000,20000.0,40000.0,40000


In [228]:
print(df)
df.fillna(method='ffill')

       col1     col2     col3   col4
row1      1      2.0      3.0      4
row2     10      NaN     30.0     40
row3    100      NaN    300.0    400
row4   1000   2000.0      NaN   4000
row5  10000  20000.0  40000.0  40000


Unnamed: 0,col1,col2,col3,col4
row1,1,2.0,3.0,4
row2,10,2.0,30.0,40
row3,100,2.0,300.0,400
row4,1000,2000.0,300.0,4000
row5,10000,20000.0,40000.0,40000


In [230]:
print(df)
df.fillna(method='ffill', axis=1)


       col1     col2     col3   col4
row1      1      2.0      3.0      4
row2     10      NaN     30.0     40
row3    100      NaN    300.0    400
row4   1000   2000.0      NaN   4000
row5  10000  20000.0  40000.0  40000


Unnamed: 0,col1,col2,col3,col4
row1,1.0,2.0,3.0,4.0
row2,10.0,10.0,30.0,40.0
row3,100.0,100.0,300.0,400.0
row4,1000.0,2000.0,2000.0,4000.0
row5,10000.0,20000.0,40000.0,40000.0


In [231]:
print(df)
df.interpolate(method='linear')


       col1     col2     col3   col4
row1      1      2.0      3.0      4
row2     10      NaN     30.0     40
row3    100      NaN    300.0    400
row4   1000   2000.0      NaN   4000
row5  10000  20000.0  40000.0  40000


Unnamed: 0,col1,col2,col3,col4
row1,1,2.0,3.0,4
row2,10,668.0,30.0,40
row3,100,1334.0,300.0,400
row4,1000,2000.0,20150.0,4000
row5,10000,20000.0,40000.0,40000


In [234]:
print(df)
df.interpolate(
    method='linear'
)

       col1     col2     col3   col4
row1      1      2.0      3.0      4
row2     10      NaN     30.0     40
row3    100      NaN    300.0    400
row4   1000   2000.0      NaN   4000
row5  10000  20000.0  40000.0  40000


Unnamed: 0,col1,col2,col3,col4
row1,1,2.0,3.0,4
row2,10,668.0,30.0,40
row3,100,1334.0,300.0,400
row4,1000,2000.0,20150.0,4000
row5,10000,20000.0,40000.0,40000


In [235]:
print(df)
df.dropna()

       col1     col2     col3   col4
row1      1      2.0      3.0      4
row2     10      NaN     30.0     40
row3    100      NaN    300.0    400
row4   1000   2000.0      NaN   4000
row5  10000  20000.0  40000.0  40000


Unnamed: 0,col1,col2,col3,col4
row1,1,2.0,3.0,4
row5,10000,20000.0,40000.0,40000


In [236]:
print(df)
df.dropna(axis=1)


       col1     col2     col3   col4
row1      1      2.0      3.0      4
row2     10      NaN     30.0     40
row3    100      NaN    300.0    400
row4   1000   2000.0      NaN   4000
row5  10000  20000.0  40000.0  40000


Unnamed: 0,col1,col4
row1,1,4
row2,10,40
row3,100,400
row4,1000,4000
row5,10000,40000
