# how to handle missing values in pandas?

In [120]:
import pandas as pd

In [121]:
ufo = pd.read_csv('http://bit.ly/uforeports')

In [122]:
ufo.tail()

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
18236,Grant Park,,TRIANGLE,IL,12/31/2000 23:00
18237,Spirit Lake,,DISK,IA,12/31/2000 23:00
18238,Eagle River,,,WI,12/31/2000 23:45
18239,Eagle River,RED,LIGHT,WI,12/31/2000 23:45
18240,Ybor,,OVAL,FL,12/31/2000 23:59


In [123]:
# NaN means a not a number or a missing value
# how to extract
ufo.isnull().tail()

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
18236,False,True,False,False,False
18237,False,True,False,False,False
18238,False,True,True,False,False
18239,False,False,False,False,False
18240,False,True,False,False,False


In [124]:
#inverse method
ufo.notnull().tail()

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
18236,True,False,True,True,True
18237,True,False,True,True,True
18238,True,False,False,True,True
18239,True,True,True,True,True
18240,True,False,True,True,True


In [125]:
ufo.isnull().sum()  # axis = 0 default y axis columns
# in pandas when we use mathematival operations on bools 
# it converts them to binary

City                  25
Colors Reported    15359
Shape Reported      2644
State                  0
Time                   0
dtype: int64

In [126]:
ufo[ufo.City.isnull()]

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
21,,,,LA,8/15/1943 0:00
22,,,LIGHT,LA,8/15/1943 0:00
204,,,DISK,CA,7/15/1952 12:30
241,,BLUE,DISK,MT,7/4/1953 14:00
613,,,DISK,NV,7/1/1960 12:00
1877,,YELLOW,CIRCLE,AZ,8/15/1969 1:00
2013,,,,NH,8/1/1970 9:30
2546,,,FIREBALL,OH,10/25/1973 23:30
3123,,RED,TRIANGLE,WV,11/25/1975 23:00
4736,,,SPHERE,CA,6/23/1982 23:00


In [127]:
# how to repair it
#1
ufo.shape

(18241, 5)

In [128]:
#1
ufo.dropna(how='any').shape    #inplace = False
#this will loose all the rows having a NaN

(2486, 5)

In [129]:
#2
ufo.dropna(how='all').shape
# it will drop only if all th eentries are NaN

(18241, 5)

In [130]:
#3
ufo.dropna(subset=['City','Shape Reported'],how= 'all').shape 
# this will remove the rows where both the rows are NaN

(18237, 5)

In [131]:
#how to fill the place
ufo['Shape Reported'].value_counts()
# by default doesn't counts the NaN values

LIGHT        2803
DISK         2122
TRIANGLE     1889
OTHER        1402
CIRCLE       1365
SPHERE       1054
FIREBALL     1039
OVAL          845
CIGAR         617
FORMATION     434
VARIOUS       333
RECTANGLE     303
CYLINDER      294
CHEVRON       248
DIAMOND       234
EGG           197
FLASH         188
TEARDROP      119
CONE           60
CROSS          36
DELTA           7
ROUND           2
CRESCENT        2
DOME            1
FLARE           1
PYRAMID         1
HEXAGON         1
Name: Shape Reported, dtype: int64

In [132]:
ufo['Shape Reported'].value_counts(dropna=False)

LIGHT        2803
NaN          2644
DISK         2122
TRIANGLE     1889
OTHER        1402
CIRCLE       1365
SPHERE       1054
FIREBALL     1039
OVAL          845
CIGAR         617
FORMATION     434
VARIOUS       333
RECTANGLE     303
CYLINDER      294
CHEVRON       248
DIAMOND       234
EGG           197
FLASH         188
TEARDROP      119
CONE           60
CROSS          36
DELTA           7
CRESCENT        2
ROUND           2
DOME            1
FLARE           1
PYRAMID         1
HEXAGON         1
Name: Shape Reported, dtype: int64

In [133]:
ufo['Shape Reported'].fillna(value='Various',inplace=True)

In [134]:
ufo['Shape Reported'].value_counts(dropna=False)

LIGHT        2803
Various      2644
DISK         2122
TRIANGLE     1889
OTHER        1402
CIRCLE       1365
SPHERE       1054
FIREBALL     1039
OVAL          845
CIGAR         617
FORMATION     434
VARIOUS       333
RECTANGLE     303
CYLINDER      294
CHEVRON       248
DIAMOND       234
EGG           197
FLASH         188
TEARDROP      119
CONE           60
CROSS          36
DELTA           7
ROUND           2
CRESCENT        2
DOME            1
FLARE           1
PYRAMID         1
HEXAGON         1
Name: Shape Reported, dtype: int64

# 2) Indexes in Pandas

In [135]:
drinks = pd.read_csv('http://bit.ly/drinksbycountry')

In [136]:
drinks.head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0,0,0,0.0,Asia
1,Albania,89,132,54,4.9,Europe
2,Algeria,25,0,14,0.7,Africa
3,Andorra,245,138,312,12.4,Europe
4,Angola,217,57,45,5.9,Africa


In [137]:
drinks.index
#this is 'the Index or thorough label'

RangeIndex(start=0, stop=193, step=1)

In [138]:
drinks.columns
#technically column is also a type of an Index

Index(['country', 'beer_servings', 'spirit_servings', 'wine_servings',
       'total_litres_of_pure_alcohol', 'continent'],
      dtype='object')

In [139]:
drinks.shape
#index is not the part of the dataframe 

(193, 6)

In [140]:
pd.read_table ('http://bit.ly/movieusers',header=None,sep='|').head()

Unnamed: 0,0,1,2,3,4
0,1,24,M,technician,85711
1,2,53,F,other,94043
2,3,23,M,writer,32067
3,4,24,M,technician,43537
4,5,33,F,other,15213


In [141]:
# identification, selection and alignment are the main purpose
# identification 

drinks[drinks.continent=='South America']
# index remained same hence we can identify even after filtering

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
6,Argentina,193,25,221,8.3,South America
20,Bolivia,167,41,8,3.8,South America
23,Brazil,245,145,16,7.2,South America
35,Chile,130,124,172,7.6,South America
37,Colombia,159,76,3,4.2,South America
52,Ecuador,162,74,3,4.2,South America
72,Guyana,93,302,1,7.1,South America
132,Paraguay,213,117,74,7.3,South America
133,Peru,163,160,21,6.1,South America
163,Suriname,128,178,7,5.6,South America


In [142]:
#selection
drinks.loc[23,'beer_servings']

245

In [143]:
# indexes are replaced with contries column
drinks.set_index('country',inplace=True)
drinks.head()

Unnamed: 0_level_0,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Afghanistan,0,0,0,0.0,Asia
Albania,89,132,54,4.9,Europe
Algeria,25,0,14,0.7,Africa
Andorra,245,138,312,12.4,Europe
Angola,217,57,45,5.9,Africa


In [144]:
drinks.index

Index(['Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola',
       'Antigua & Barbuda', 'Argentina', 'Armenia', 'Australia', 'Austria',
       ...
       'Tanzania', 'USA', 'Uruguay', 'Uzbekistan', 'Vanuatu', 'Venezuela',
       'Vietnam', 'Yemen', 'Zambia', 'Zimbabwe'],
      dtype='object', name='country', length=193)

In [145]:
drinks.columns

Index(['beer_servings', 'spirit_servings', 'wine_servings',
       'total_litres_of_pure_alcohol', 'continent'],
      dtype='object')

In [146]:
drinks.shape

(193, 5)

In [147]:
drinks.loc['Brazil','beer_servings']

245

In [148]:
#index name can be removed
drinks.index.name = None
drinks.head()

Unnamed: 0,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
Afghanistan,0,0,0,0.0,Asia
Albania,89,132,54,4.9,Europe
Algeria,25,0,14,0.7,Africa
Andorra,245,138,312,12.4,Europe
Angola,217,57,45,5.9,Africa


In [149]:
# redo it
drinks.index.name='country'
drinks.reset_index(inplace=True)
drinks.head

<bound method NDFrame.head of          country  beer_servings  spirit_servings  wine_servings  \
0    Afghanistan              0                0              0   
1        Albania             89              132             54   
2        Algeria             25                0             14   
3        Andorra            245              138            312   
4         Angola            217               57             45   
..           ...            ...              ...            ...   
188    Venezuela            333              100              3   
189      Vietnam            111                2              1   
190        Yemen              6                0              0   
191       Zambia             32               19              4   
192     Zimbabwe             64               18              4   

     total_litres_of_pure_alcohol      continent  
0                             0.0           Asia  
1                             4.9         Europe  
2           

In [150]:
drinks.describe()

Unnamed: 0,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol
count,193.0,193.0,193.0,193.0
mean,106.160622,80.994819,49.450777,4.717098
std,101.143103,88.284312,79.697598,3.773298
min,0.0,0.0,0.0,0.0
25%,20.0,4.0,1.0,1.3
50%,76.0,56.0,8.0,4.2
75%,188.0,128.0,59.0,7.2
max,376.0,438.0,370.0,14.4


In [151]:
drinks.describe().index

Index(['count', 'mean', 'std', 'min', '25%', '50%', '75%', 'max'], dtype='object')

In [152]:
drinks.describe().columns

Index(['beer_servings', 'spirit_servings', 'wine_servings',
       'total_litres_of_pure_alcohol'],
      dtype='object')

In [153]:
drinks.describe().loc['25%','beer_servings']

20.0

# 3) indexes in pandas 2

In [154]:
drinks = pd.read_csv('http://bit.ly/drinksbycountry')

In [155]:
drinks.head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0,0,0,0.0,Asia
1,Albania,89,132,54,4.9,Europe
2,Algeria,25,0,14,0.7,Africa
3,Andorra,245,138,312,12.4,Europe
4,Angola,217,57,45,5.9,Africa


In [156]:
#alignment
drinks.continent.head()

0      Asia
1    Europe
2    Africa
3    Europe
4    Africa
Name: continent, dtype: object

In [157]:
drinks.set_index('country',inplace=True)


In [158]:
drinks.head()

Unnamed: 0_level_0,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Afghanistan,0,0,0,0.0,Asia
Albania,89,132,54,4.9,Europe
Algeria,25,0,14,0.7,Africa
Andorra,245,138,312,12.4,Europe
Angola,217,57,45,5.9,Africa


In [159]:
drinks.continent.head()

country
Afghanistan      Asia
Albania        Europe
Algeria        Africa
Andorra        Europe
Angola         Africa
Name: continent, dtype: object

In [160]:
drinks.continent.value_counts()

Africa           53
Europe           45
Asia             44
North America    23
Oceania          16
South America    12
Name: continent, dtype: int64

In [161]:
drinks.continent.value_counts().values

array([53, 45, 44, 23, 16, 12], dtype=int64)

In [162]:
# similar to .loc series oprtr
drinks.continent.value_counts()['Africa']

53

In [163]:
#sorting
drinks.continent.value_counts().sort_values()

South America    12
Oceania          16
North America    23
Asia             44
Europe           45
Africa           53
Name: continent, dtype: int64

In [164]:
drinks.continent.value_counts().sort_index()

Africa           53
Asia             44
Europe           45
North America    23
Oceania          16
South America    12
Name: continent, dtype: int64

In [165]:
#constructing a series
people = pd.Series([3000000,85000],['Albania','Andorra'], name='population')
people

Albania    3000000
Andorra      85000
Name: population, dtype: int64

In [166]:
#total bear servings in country per person
drinks.beer_servings * people

Afghanistan            NaN
Albania        267000000.0
Algeria                NaN
Andorra         20825000.0
Angola                 NaN
                  ...     
Venezuela              NaN
Vietnam                NaN
Yemen                  NaN
Zambia                 NaN
Zimbabwe               NaN
Length: 193, dtype: float64

In [167]:
# how to take people series and add it to dataframe
#concatenate 
pd.concat([drinks,people],axis=1).head

<bound method NDFrame.head of              beer_servings  spirit_servings  wine_servings  \
Afghanistan              0                0              0   
Albania                 89              132             54   
Algeria                 25                0             14   
Andorra                245              138            312   
Angola                 217               57             45   
...                    ...              ...            ...   
Venezuela              333              100              3   
Vietnam                111                2              1   
Yemen                    6                0              0   
Zambia                  32               19              4   
Zimbabwe                64               18              4   

             total_litres_of_pure_alcohol      continent  population  
Afghanistan                           0.0           Asia         NaN  
Albania                               4.9         Europe   3000000.0  
Algeria     

# how to select multiple rows and columns from pandas dataframe

In [168]:
import pandas as pd

## .loc

In [169]:
# loc, iloc,ix are important
ufo = pd.read_csv('http://bit.ly/uforeports')

In [170]:
ufo.head(3)

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
0,Ithaca,,TRIANGLE,NY,6/1/1930 22:00
1,Willingboro,,OTHER,NJ,6/30/1930 20:00
2,Holyoke,,OVAL,CO,2/15/1931 14:00


In [171]:
ufo.loc[0,:]     #first row
#loc selects things form labels as index and header
#uses square brackets and arguments are taken as rows and columns

City                       Ithaca
Colors Reported               NaN
Shape Reported           TRIANGLE
State                          NY
Time               6/1/1930 22:00
Name: 0, dtype: object

In [172]:
ufo.loc[0:2:1,:]# its inclusive for 2 unlike range
# or ufo.loc[[0,1,2],:]

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
0,Ithaca,,TRIANGLE,NY,6/1/1930 22:00
1,Willingboro,,OTHER,NJ,6/30/1930 20:00
2,Holyoke,,OVAL,CO,2/15/1931 14:00


In [173]:
ufo.loc[0:2]    # pandas is rather being explicit than implicit
# leave columns and pandas by default selects all not recommend

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
0,Ithaca,,TRIANGLE,NY,6/1/1930 22:00
1,Willingboro,,OTHER,NJ,6/30/1930 20:00
2,Holyoke,,OVAL,CO,2/15/1931 14:00


In [174]:
ufo.loc[:,'City'] 

0                      Ithaca
1                 Willingboro
2                     Holyoke
3                     Abilene
4        New York Worlds Fair
                 ...         
18236              Grant Park
18237             Spirit Lake
18238             Eagle River
18239             Eagle River
18240                    Ybor
Name: City, Length: 18241, dtype: object

In [175]:
ufo.loc[:,['City','State']] 

Unnamed: 0,City,State
0,Ithaca,NY
1,Willingboro,NJ
2,Holyoke,CO
3,Abilene,KS
4,New York Worlds Fair,NY
...,...,...
18236,Grant Park,IL
18237,Spirit Lake,IA
18238,Eagle River,WI
18239,Eagle River,WI


In [176]:
ufo.loc[:,'City':'State']  # similar as range

Unnamed: 0,City,Colors Reported,Shape Reported,State
0,Ithaca,,TRIANGLE,NY
1,Willingboro,,OTHER,NJ
2,Holyoke,,OVAL,CO
3,Abilene,,DISK,KS
4,New York Worlds Fair,,LIGHT,NY
...,...,...,...,...
18236,Grant Park,,TRIANGLE,IL
18237,Spirit Lake,,DISK,IA
18238,Eagle River,,,WI
18239,Eagle River,RED,LIGHT,WI


In [177]:
ufo.loc[0:2,'City':'State'] 

Unnamed: 0,City,Colors Reported,Shape Reported,State
0,Ithaca,,TRIANGLE,NY
1,Willingboro,,OTHER,NJ
2,Holyoke,,OVAL,CO


In [178]:
ufo.head(3).drop('Time',axis=1)

Unnamed: 0,City,Colors Reported,Shape Reported,State
0,Ithaca,,TRIANGLE,NY
1,Willingboro,,OTHER,NJ
2,Holyoke,,OVAL,CO


### .loc is superpowerful

In [179]:
# using loc with boolean condsn
ufo[ufo.City == 'Oakland']

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
1694,Oakland,,CIGAR,CA,7/21/1968 14:00
2144,Oakland,,DISK,CA,8/19/1971 0:00
4686,Oakland,,LIGHT,MD,6/1/1982 0:00
7293,Oakland,,LIGHT,CA,3/28/1994 17:00
8488,Oakland,,,CA,8/10/1995 21:45
8768,Oakland,,,CA,10/10/1995 22:40
10816,Oakland,,LIGHT,OR,10/1/1997 21:30
10948,Oakland,,DISK,CA,11/14/1997 19:55
11045,Oakland,,TRIANGLE,CA,12/10/1997 1:30
12322,Oakland,,FIREBALL,CA,10/9/1998 19:40


In [180]:
ufo.loc[ufo.City == 'Oakland',:]

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
1694,Oakland,,CIGAR,CA,7/21/1968 14:00
2144,Oakland,,DISK,CA,8/19/1971 0:00
4686,Oakland,,LIGHT,MD,6/1/1982 0:00
7293,Oakland,,LIGHT,CA,3/28/1994 17:00
8488,Oakland,,,CA,8/10/1995 21:45
8768,Oakland,,,CA,10/10/1995 22:40
10816,Oakland,,LIGHT,OR,10/1/1997 21:30
10948,Oakland,,DISK,CA,11/14/1997 19:55
11045,Oakland,,TRIANGLE,CA,12/10/1997 1:30
12322,Oakland,,FIREBALL,CA,10/9/1998 19:40


In [181]:
ufo.loc[ufo.City == 'Oakland','State'] #explicit with no problems

1694     CA
2144     CA
4686     MD
7293     CA
8488     CA
8768     CA
10816    OR
10948    CA
11045    CA
12322    CA
12941    CA
16803    MD
17322    CA
Name: State, dtype: object

In [182]:
ufo.loc[ufo.City == 'Oakland'].State
#chained indexing may cause problems

1694     CA
2144     CA
4686     MD
7293     CA
8488     CA
8768     CA
10816    OR
10948    CA
11045    CA
12322    CA
12941    CA
16803    MD
17322    CA
Name: State, dtype: object

## .iloc

In [183]:
#integer position
ufo.iloc[:,[0,3]]           # based on position 

Unnamed: 0,City,State
0,Ithaca,NY
1,Willingboro,NJ
2,Holyoke,CO
3,Abilene,KS
4,New York Worlds Fair,NY
...,...,...
18236,Grant Park,IL
18237,Spirit Lake,IA
18238,Eagle River,WI
18239,Eagle River,WI


In [184]:
ufo.iloc[:,0:3] # iloc is exclusive for the last index like range

Unnamed: 0,City,Colors Reported,Shape Reported
0,Ithaca,,TRIANGLE
1,Willingboro,,OTHER
2,Holyoke,,OVAL
3,Abilene,,DISK
4,New York Worlds Fair,,LIGHT
...,...,...,...
18236,Grant Park,,TRIANGLE
18237,Spirit Lake,,DISK
18238,Eagle River,,
18239,Eagle River,RED,LIGHT


In [185]:
ufo.columns

Index(['City', 'Colors Reported', 'Shape Reported', 'State', 'Time'], dtype='object')

In [186]:
ufo.iloc[0:3,:] #exclusive for last index

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
0,Ithaca,,TRIANGLE,NY,6/1/1930 22:00
1,Willingboro,,OTHER,NJ,6/30/1930 20:00
2,Holyoke,,OVAL,CO,2/15/1931 14:00


## shortcuts 

In [187]:
ufo[['City','State']]   #ufo.loc[:,['city','states']] recommended

Unnamed: 0,City,State
0,Ithaca,NY
1,Willingboro,NJ
2,Holyoke,CO
3,Abilene,KS
4,New York Worlds Fair,NY
...,...,...
18236,Grant Park,IL
18237,Spirit Lake,IA
18238,Eagle River,WI
18239,Eagle River,WI


In [188]:
ufo[0:2]    #or ufo.iloc[0:2,:]
#rows by default  #not recommended

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
0,Ithaca,,TRIANGLE,NY,6/1/1930 22:00
1,Willingboro,,OTHER,NJ,6/30/1930 20:00


## .ix

In [189]:
import pandas as pd

drinks = pd.read_csv('http://bit.ly/drinksbycountry', index_col='country')

In [190]:
drinks.head()

Unnamed: 0_level_0,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Afghanistan,0,0,0,0.0,Asia
Albania,89,132,54,4.9,Europe
Algeria,25,0,14,0.7,Africa
Andorra,245,138,312,12.4,Europe
Angola,217,57,45,5.9,Africa


In [191]:
  # it gueses the index or label by itself

# using inplace parameter in pandas

In [192]:
ufo = pd.read_csv('http://bit.ly/uforeports')

In [193]:
ufo.shape

(18241, 5)

In [194]:
ufo.head()

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
0,Ithaca,,TRIANGLE,NY,6/1/1930 22:00
1,Willingboro,,OTHER,NJ,6/30/1930 20:00
2,Holyoke,,OVAL,CO,2/15/1931 14:00
3,Abilene,,DISK,KS,6/1/1931 13:00
4,New York Worlds Fair,,LIGHT,NY,4/18/1933 19:00


In [195]:
ufo.drop('City', axis=1).head()

Unnamed: 0,Colors Reported,Shape Reported,State,Time
0,,TRIANGLE,NY,6/1/1930 22:00
1,,OTHER,NJ,6/30/1930 20:00
2,,OVAL,CO,2/15/1931 14:00
3,,DISK,KS,6/1/1931 13:00
4,,LIGHT,NY,4/18/1933 19:00


In [196]:
ufo.head()

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
0,Ithaca,,TRIANGLE,NY,6/1/1930 22:00
1,Willingboro,,OTHER,NJ,6/30/1930 20:00
2,Holyoke,,OVAL,CO,2/15/1931 14:00
3,Abilene,,DISK,KS,6/1/1931 13:00
4,New York Worlds Fair,,LIGHT,NY,4/18/1933 19:00


In [197]:
# the city column is not gone its just not shown
ufo.drop('City', axis=1,inplace=True)
# if nothing is happened it means that the inplace command has took place correctly

#### now the ufo dataframe has been chaged permanently
### # why inplace is false by defalult because it allows us to experiment us without permanently changing the dataframe

In [198]:
ufo.dropna(how ='any').shape

(2490, 4)

In [199]:
# but nothing is lost
ufo.shape

(18241, 4)

In [200]:
ufo.rename({'State':'state'})

Unnamed: 0,Colors Reported,Shape Reported,State,Time
0,,TRIANGLE,NY,6/1/1930 22:00
1,,OTHER,NJ,6/30/1930 20:00
2,,OVAL,CO,2/15/1931 14:00
3,,DISK,KS,6/1/1931 13:00
4,,LIGHT,NY,4/18/1933 19:00
...,...,...,...,...
18236,,TRIANGLE,IL,12/31/2000 23:00
18237,,DISK,IA,12/31/2000 23:00
18238,,,WI,12/31/2000 23:45
18239,RED,LIGHT,WI,12/31/2000 23:45


In [201]:
# rename,sort_values,set_index

### why we use inplace instead of assignment :- because assignment creates two copies of dataframe if its in GB and it makes slower,but inplace may save us from this. also there is no guarantee that inplce = True is more efficient than the assignment statement  

In [202]:
ufo.fillna(method='bfill').tail()

Unnamed: 0,Colors Reported,Shape Reported,State,Time
18236,RED,TRIANGLE,IL,12/31/2000 23:00
18237,RED,DISK,IA,12/31/2000 23:00
18238,RED,LIGHT,WI,12/31/2000 23:45
18239,RED,LIGHT,WI,12/31/2000 23:45
18240,,OVAL,FL,12/31/2000 23:59


In [203]:
ufo.fillna(method='ffill').tail()

Unnamed: 0,Colors Reported,Shape Reported,State,Time
18236,RED,TRIANGLE,IL,12/31/2000 23:00
18237,RED,DISK,IA,12/31/2000 23:00
18238,RED,DISK,WI,12/31/2000 23:45
18239,RED,LIGHT,WI,12/31/2000 23:45
18240,RED,OVAL,FL,12/31/2000 23:59


In [204]:
ufo.tail()

Unnamed: 0,Colors Reported,Shape Reported,State,Time
18236,,TRIANGLE,IL,12/31/2000 23:00
18237,,DISK,IA,12/31/2000 23:00
18238,,,WI,12/31/2000 23:45
18239,RED,LIGHT,WI,12/31/2000 23:45
18240,,OVAL,FL,12/31/2000 23:59


In [205]:
# Hence we can check with help of 'inplace false' by dflt