# How do i handle missing values of pandas

In [1]:
import pandas as pd

In [2]:
ufo = pd.read_csv('http://bit.ly/uforeports')

In [3]:
ufo.tail()

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
18236,Grant Park,,TRIANGLE,IL,12/31/2000 23:00
18237,Spirit Lake,,DISK,IA,12/31/2000 23:00
18238,Eagle River,,,WI,12/31/2000 23:45
18239,Eagle River,RED,LIGHT,WI,12/31/2000 23:45
18240,Ybor,,OVAL,FL,12/31/2000 23:59


In [4]:
ufo.isnull().tail()

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
18236,False,True,False,False,False
18237,False,True,False,False,False
18238,False,True,True,False,False
18239,False,False,False,False,False
18240,False,True,False,False,False


In [5]:
ufo.notnull().tail () #not null inverse method

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
18236,True,False,True,True,True
18237,True,False,True,True,True
18238,True,False,False,True,True
18239,True,True,True,True,True
18240,True,False,True,True,True


In [6]:
ufo.isnull().sum() #the numbers are missing values of each column

City                  25
Colors Reported    15359
Shape Reported      2644
State                  0
Time                   0
dtype: int64

In [7]:
ufo[ufo.City.isnull()]

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
21,,,,LA,8/15/1943 0:00
22,,,LIGHT,LA,8/15/1943 0:00
204,,,DISK,CA,7/15/1952 12:30
241,,BLUE,DISK,MT,7/4/1953 14:00
613,,,DISK,NV,7/1/1960 12:00
1877,,YELLOW,CIRCLE,AZ,8/15/1969 1:00
2013,,,,NH,8/1/1970 9:30
2546,,,FIREBALL,OH,10/25/1973 23:30
3123,,RED,TRIANGLE,WV,11/25/1975 23:00
4736,,,SPHERE,CA,6/23/1982 23:00


In [9]:
ufo.shape

(18241, 5)

In [10]:
ufo.dropna(how='any').shape

(2486, 5)

In [11]:
ufo.dropna(how='all').shape

(18241, 5)

# How do i use the index in pandas series

In [12]:
import pandas as pd

In [13]:
drinks = pd.read_csv('http://bit.ly/drinksbycountry')

In [14]:
drinks.head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0,0,0,0.0,Asia
1,Albania,89,132,54,4.9,Europe
2,Algeria,25,0,14,0.7,Africa
3,Andorra,245,138,312,12.4,Europe
4,Angola,217,57,45,5.9,Africa


In [15]:
drinks.index

RangeIndex(start=0, stop=193, step=1)

In [16]:
drinks.columns

Index(['country', 'beer_servings', 'spirit_servings', 'wine_servings',
       'total_litres_of_pure_alcohol', 'continent'],
      dtype='object')

In [17]:
drinks.shape

(193, 6)

In [18]:
pd.read_table('http://bit.ly/movieusers', header=None, sep='|').head()

Unnamed: 0,0,1,2,3,4
0,1,24,M,technician,85711
1,2,53,F,other,94043
2,3,23,M,writer,32067
3,4,24,M,technician,43537
4,5,33,F,other,15213


# How do i select multiple rows and columns

In [19]:
import pandas as pd

In [20]:
ufo = pd.read_csv('http://bit.ly/uforeports')

In [21]:
ufo.head(3)

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
0,Ithaca,,TRIANGLE,NY,6/1/1930 22:00
1,Willingboro,,OTHER,NJ,6/30/1930 20:00
2,Holyoke,,OVAL,CO,2/15/1931 14:00


In [22]:
ufo.loc[0, :]

City                       Ithaca
Colors Reported               NaN
Shape Reported           TRIANGLE
State                          NY
Time               6/1/1930 22:00
Name: 0, dtype: object

In [24]:
ufo.loc[0:2, :]

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
0,Ithaca,,TRIANGLE,NY,6/1/1930 22:00
1,Willingboro,,OTHER,NJ,6/30/1930 20:00
2,Holyoke,,OVAL,CO,2/15/1931 14:00


In [25]:
#columns selection

In [26]:
ufo.loc[:, 'City']

0                      Ithaca
1                 Willingboro
2                     Holyoke
3                     Abilene
4        New York Worlds Fair
                 ...         
18236              Grant Park
18237             Spirit Lake
18238             Eagle River
18239             Eagle River
18240                    Ybor
Name: City, Length: 18241, dtype: object

In [27]:
ufo.loc[:, 'City':'State']

Unnamed: 0,City,Colors Reported,Shape Reported,State
0,Ithaca,,TRIANGLE,NY
1,Willingboro,,OTHER,NJ
2,Holyoke,,OVAL,CO
3,Abilene,,DISK,KS
4,New York Worlds Fair,,LIGHT,NY
...,...,...,...,...
18236,Grant Park,,TRIANGLE,IL
18237,Spirit Lake,,DISK,IA
18238,Eagle River,,,WI
18239,Eagle River,RED,LIGHT,WI


In [28]:
#combine rows and columns

In [30]:
ufo.loc[0:2, 'City':'State']

Unnamed: 0,City,Colors Reported,Shape Reported,State
0,Ithaca,,TRIANGLE,NY
1,Willingboro,,OTHER,NJ
2,Holyoke,,OVAL,CO


In [31]:
ufo.head(3).drop('Time', axis=1)

Unnamed: 0,City,Colors Reported,Shape Reported,State
0,Ithaca,,TRIANGLE,NY
1,Willingboro,,OTHER,NJ
2,Holyoke,,OVAL,CO


In [41]:
ufo[ufo.City=='Ithaca']

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
0,Ithaca,,TRIANGLE,NY,6/1/1930 22:00
4068,Ithaca,,CIGAR,NY,6/1/1979 19:00
5631,Ithaca,,OTHER,MI,6/1/1987 17:00
6961,Ithaca,,OTHER,NY,1/10/1993 0:30
7573,Ithaca,RED GREEN,LIGHT,NY,10/15/1994 18:00
9088,Ithaca,,,NY,2/16/1996 21:45
16537,Ithaca,,FLASH,MI,6/3/2000 22:35
17049,Ithaca,,TEARDROP,NY,7/30/2000 20:20


In [39]:
ufo[ufo.City=='Holyoke'].State

2        CO
771      MA
3221     MA
7982     MA
18211    MA
Name: State, dtype: object

In [42]:
ufo.loc[ufo.City=='Ithaca',:]

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
0,Ithaca,,TRIANGLE,NY,6/1/1930 22:00
4068,Ithaca,,CIGAR,NY,6/1/1979 19:00
5631,Ithaca,,OTHER,MI,6/1/1987 17:00
6961,Ithaca,,OTHER,NY,1/10/1993 0:30
7573,Ithaca,RED GREEN,LIGHT,NY,10/15/1994 18:00
9088,Ithaca,,,NY,2/16/1996 21:45
16537,Ithaca,,FLASH,MI,6/3/2000 22:35
17049,Ithaca,,TEARDROP,NY,7/30/2000 20:20


In [38]:
#USING LOC METHOD 
ufo.loc[ufo.City=='Holyoke', 'State']

2        CO
771      MA
3221     MA
7982     MA
18211    MA
Name: State, dtype: object

In [43]:
#using iloc method

In [44]:
ufo.iloc[:, [0,3]]

Unnamed: 0,City,State
0,Ithaca,NY
1,Willingboro,NJ
2,Holyoke,CO
3,Abilene,KS
4,New York Worlds Fair,NY
...,...,...
18236,Grant Park,IL
18237,Spirit Lake,IA
18238,Eagle River,WI
18239,Eagle River,WI


In [45]:
ufo.iloc[:, 0:4]

Unnamed: 0,City,Colors Reported,Shape Reported,State
0,Ithaca,,TRIANGLE,NY
1,Willingboro,,OTHER,NJ
2,Holyoke,,OVAL,CO
3,Abilene,,DISK,KS
4,New York Worlds Fair,,LIGHT,NY
...,...,...,...,...
18236,Grant Park,,TRIANGLE,IL
18237,Spirit Lake,,DISK,IA
18238,Eagle River,,,WI
18239,Eagle River,RED,LIGHT,WI


In [47]:
ufo.iloc[0:3, :]

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
0,Ithaca,,TRIANGLE,NY,6/1/1930 22:00
1,Willingboro,,OTHER,NJ,6/30/1930 20:00
2,Holyoke,,OVAL,CO,2/15/1931 14:00


In [52]:
ufo[['City','State']]

Unnamed: 0,City,State
0,Ithaca,NY
1,Willingboro,NJ
2,Holyoke,CO
3,Abilene,KS
4,New York Worlds Fair,NY
...,...,...
18236,Grant Park,IL
18237,Spirit Lake,IA
18238,Eagle River,WI
18239,Eagle River,WI


In [48]:
ufo.loc[:,['City','State']]

Unnamed: 0,City,State
0,Ithaca,NY
1,Willingboro,NJ
2,Holyoke,CO
3,Abilene,KS
4,New York Worlds Fair,NY
...,...,...
18236,Grant Park,IL
18237,Spirit Lake,IA
18238,Eagle River,WI
18239,Eagle River,WI


In [54]:
ufo.iloc[:, 0:3]

Unnamed: 0,City,Colors Reported,Shape Reported
0,Ithaca,,TRIANGLE
1,Willingboro,,OTHER
2,Holyoke,,OVAL
3,Abilene,,DISK
4,New York Worlds Fair,,LIGHT
...,...,...,...
18236,Grant Park,,TRIANGLE
18237,Spirit Lake,,DISK
18238,Eagle River,,
18239,Eagle River,RED,LIGHT


In [55]:
drinks = pd.read_csv('http://bit.ly/drinksbycountry', index_col='country')

In [56]:
drinks.head()

Unnamed: 0_level_0,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Afghanistan,0,0,0,0.0,Asia
Albania,89,132,54,4.9,Europe
Algeria,25,0,14,0.7,Africa
Andorra,245,138,312,12.4,Europe
Angola,217,57,45,5.9,Africa


In [60]:
drinks = pd.read_csv('http://bit.ly/drinksbycountry')

In [61]:
drinks.head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0,0,0,0.0,Asia
1,Albania,89,132,54,4.9,Europe
2,Algeria,25,0,14,0.7,Africa
3,Andorra,245,138,312,12.4,Europe
4,Angola,217,57,45,5.9,Africa


In [70]:
drinks.iloc[0:4, 2:6]

Unnamed: 0,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,0,0,0.0,Asia
1,132,54,4.9,Europe
2,0,14,0.7,Africa
3,138,312,12.4,Europe


In [71]:
drinks.continent.str.upper()

0               ASIA
1             EUROPE
2             AFRICA
3             EUROPE
4             AFRICA
           ...      
188    SOUTH AMERICA
189             ASIA
190             ASIA
191           AFRICA
192           AFRICA
Name: continent, Length: 193, dtype: object

In [76]:
drinks['continent_upper']=drinks.continent.str.upper()

In [77]:
drinks.head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent,continent_upper
0,Afghanistan,0,0,0,0.0,Asia,ASIA
1,Albania,89,132,54,4.9,Europe,EUROPE
2,Algeria,25,0,14,0.7,Africa,AFRICA
3,Andorra,245,138,312,12.4,Europe,EUROPE
4,Angola,217,57,45,5.9,Africa,AFRICA
