# Pandas Series
one-dimensional labelled array capable of holding data of any type (integer, string, float, python objects, etc.). The axis labels are collectively called indexes. Pandas Series is nothing but a column in an excel sheet. Labels need not be unique but must be a hashable type. The object supports both integer and label-based indexing and provides a host of methods for performing operations involving the index.

## Creating a Series

In [7]:
import pandas as pd
import numpy as np


# Creating empty series
ser = pd.Series()
print(ser)

# simple array
data = np.array(['rita', 'girwar', 'priyanshi', 'pragya', 'vijaya'])

ser = pd.Series(data)
print(ser)


Series([], dtype: float64)
0         rita
1       girwar
2    priyanshi
3       pragya
4       vijaya
dtype: object


# Pandas DataFrame
two-dimensional size-mutable, potentially heterogeneous tabular data structure with labeled axes (rows and columns). A Data frame is a two-dimensional data structure, i.e., data is aligned in a tabular fashion in rows and columns. Pandas DataFrame consists of three principal components, the data, rows, and columns

## Creating a DataFrame 

In [5]:
import pandas as pd

# Calling DataFrame constructor
df = pd.DataFrame()
print(df)

# list of strings
lst = ['rita', 'girwar', 'priyanshi', 'pragya', 'vijaya']

# Calling DataFrame constructor on list
df = pd.DataFrame(lst)
df


Empty DataFrame
Columns: []
Index: []


Unnamed: 0,0
0,rita
1,girwar
2,priyanshi
3,pragya
4,vijaya


### Creating DataFrame from dict of ndarray/lists

In [4]:
import pandas as pd

# intialise data of lists.
data = {'Name':['rita', 'girwar', 'priyanshi', 'pragya', 'vijaya'],
        'Age':[24, 24, 23, 24,25]}
 
# Create DataFrame
df = pd.DataFrame(data)
 
# Print the output.
df

Unnamed: 0,Name,Age
0,rita,24
1,girwar,24
2,priyanshi,23
3,pragya,24
4,vijaya,25


### Column Selection

In [9]:
import pandas as pd
 
# Define a dictionary containing employee data
data = {'Name':['rita', 'girwar', 'priyanshi', 'pragya', 'vijaya'],
        'Age':[24, 24, 23, 24,25],
        'Address':['Bhopal', 'Jodhpur', 'Bareili', 'Vidisha','Kolkata']}
 
# Convert the dictionary into DataFrame 
df = pd.DataFrame(data)
 
# select two columns
df[['Name', 'Address']]

Unnamed: 0,Name,Address
0,rita,Bhopal
1,girwar,Jodhpur
2,priyanshi,Bareili
3,pragya,Vidisha
4,vijaya,Kolkata


### Row Selection

In [41]:
# importing pandas package
import pandas as pd
 
# making data frame from csv file
df = pd.read_csv("Automobile_data.csv", index_col='company')
df

Unnamed: 0_level_0,body-style,wheel-base,length,engine-type,num-of-cylinders,horsepower,average-mileage,price
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
alfa-romero,convertible,88.6,168.8,dohc,four,111,21,13495.0
alfa-romero,convertible,88.6,168.8,dohc,four,111,21,16500.0
alfa-romero,hatchback,94.5,171.2,ohcv,six,154,19,16500.0
audi,sedan,99.8,176.6,ohc,four,102,24,13950.0
audi,sedan,99.4,176.6,ohc,five,115,18,17450.0
...,...,...,...,...,...,...,...,...
volkswagen,sedan,97.3,171.7,ohc,four,85,27,7975.0
volkswagen,sedan,97.3,171.7,ohc,four,52,37,7995.0
volkswagen,sedan,97.3,171.7,ohc,four,100,26,9995.0
volvo,sedan,104.3,188.8,ohc,four,114,23,12940.0


In [45]:
# retrieving row by loc method
print('Rows having company name : volvo ')
first = df.loc['volvo']
first

Rows having company name : volvo 


Unnamed: 0_level_0,body-style,wheel-base,length,engine-type,num-of-cylinders,horsepower,average-mileage,price
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
volvo,sedan,104.3,188.8,ohc,four,114,23,12940.0
volvo,wagon,104.3,188.8,ohc,four,114,23,13415.0


In [46]:
print('Rows having company name : audi ')
second = df.loc['audi']
second

Rows having company name : audi 


Unnamed: 0_level_0,body-style,wheel-base,length,engine-type,num-of-cylinders,horsepower,average-mileage,price
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
audi,sedan,99.8,176.6,ohc,four,102,24,13950.0
audi,sedan,99.4,176.6,ohc,five,115,18,17450.0
audi,sedan,99.8,177.3,ohc,five,110,19,15250.0
audi,wagon,105.8,192.7,ohc,five,110,19,18920.0


### Indexing/Subset Selection and Selecting Data

Using df[]

In [66]:
import pandas as pd
 
# making data frame from csv file
df = pd.read_csv("Automobile_data.csv")

# retrieving columns by indexing operator
comp_col = df['company']
comp_col

0     alfa-romero
1     alfa-romero
2     alfa-romero
3            audi
4            audi
         ...     
56     volkswagen
57     volkswagen
58     volkswagen
59          volvo
60          volvo
Name: company, Length: 61, dtype: object

Using df.loc[]

In [56]:
import pandas as pd
 
# making data frame from csv file
df = pd.read_csv("Automobile_data.csv",index_col='company')

# retrieving row by loc method
print('Rows having company name : volvo ')
first = df.loc['volvo']
first

Rows having company name : volvo 


Unnamed: 0_level_0,body-style,wheel-base,length,engine-type,num-of-cylinders,horsepower,average-mileage,price
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
volvo,sedan,104.3,188.8,ohc,four,114,23,12940.0
volvo,wagon,104.3,188.8,ohc,four,114,23,13415.0


Using df.iloc[<position>]

In [65]:
import pandas as pd
 
# making data frame from csv file
df = pd.read_csv("Automobile_data.csv",index_col='company')

# retrieving rows by iloc method 
first = df.iloc[0,7] #value at 1 row and 9 column
first

13495.0

### Accessing dataframe with a boolean index

In [74]:
# importing pandas as pd
import pandas as pd

# dictionary of lists
data = {'Name':['rita', 'girwar', 'priyanshi', 'pragya', 'vijaya'],
        'Age':[24, 24, 23, 24,25],
        'Address':['Bhopal', 'Jodhpur', 'Bareili', 'Vidisha','Kolkata']}

df = pd.DataFrame(data, index = [True, False, True, False,True])

print(df)

            Name  Age  Address
True        rita   24   Bhopal
False     girwar   24  Jodhpur
True   priyanshi   23  Bareili
False     pragya   24  Vidisha
True      vijaya   25  Kolkata


#### Accessing a Dataframe with a boolean index using .loc[]

In [76]:
df_true = df.loc[True]
df_true

Unnamed: 0,Name,Age,Address
True,rita,24,Bhopal
True,priyanshi,23,Bareili
True,vijaya,25,Kolkata


#### Accessing a Dataframe with a boolean index using .iloc[]

In [79]:
df_false = df.iloc[1]
df_false

Name        girwar
Age             24
Address    Jodhpur
dtype: object

#### Accessing a Dataframe with a boolean index using .ix[] (deprecated)

In [81]:
df_= df.ix[True]
df_

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """Entry point for launching an IPython kernel.


Unnamed: 0,Name,Age,Address
True,rita,24,Bhopal
True,priyanshi,23,Bareili
True,vijaya,25,Kolkata


In [82]:
df__ = df.ix[1]
df__

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """Entry point for launching an IPython kernel.


Name        girwar
Age             24
Address    Jodhpur
dtype: object

#### Applying a boolean mask to a dataframe

In [93]:
df = pd.DataFrame(data, index = [0, 1, 2, 3, 4])
df___ = df[[True, True, True, False, False]]
df___

Unnamed: 0,Name,Age,Address
0,rita,24,Bhopal
1,girwar,24,Jodhpur
2,priyanshi,23,Bareili


#### Masking data based on column value

In [88]:
df_age = df['Age'] == 24
df_age

0     True
1     True
2    False
3     True
4    False
Name: Age, dtype: bool

In [94]:
df_age = df['Age'] > 24
df_age

0    False
1    False
2    False
3    False
4     True
Name: Age, dtype: bool

In [98]:
df1 = df.index > 2
df[df1]

Unnamed: 0,Name,Age,Address
3,pragya,24,Vidisha
4,vijaya,25,Kolkata


## Working with Missing Data

### Checking for missing values using isnull() and notnull()

In [104]:
# importing pandas package
import pandas as pd
 
# making data frame from csv file
df = pd.read_csv("Automobile_data.csv", index_col='company')
df.isnull()

Unnamed: 0_level_0,body-style,wheel-base,length,engine-type,num-of-cylinders,horsepower,average-mileage,price
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
alfa-romero,False,False,False,False,False,False,False,False
alfa-romero,False,False,False,False,False,False,True,False
alfa-romero,False,False,False,False,False,False,False,False
audi,False,False,False,False,False,False,False,False
audi,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...
volkswagen,False,False,False,False,False,False,False,False
volkswagen,False,False,False,False,False,False,False,False
volkswagen,False,False,False,False,False,False,False,False
volvo,False,False,False,False,False,False,False,False


In [123]:
df_null= pd.isnull(df['average-mileage'])
df_null

company
alfa-romero    False
alfa-romero     True
alfa-romero    False
audi           False
audi           False
               ...  
volkswagen     False
volkswagen     False
volkswagen     False
volvo          False
volvo          False
Name: average-mileage, Length: 61, dtype: bool

In [103]:
df.notnull()

Unnamed: 0_level_0,body-style,wheel-base,length,engine-type,num-of-cylinders,horsepower,average-mileage,price
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
alfa-romero,True,True,True,True,True,True,True,True
alfa-romero,True,True,True,True,True,True,False,True
alfa-romero,True,True,True,True,True,True,True,True
audi,True,True,True,True,True,True,True,True
audi,True,True,True,True,True,True,True,True
...,...,...,...,...,...,...,...,...
volkswagen,True,True,True,True,True,True,True,True
volkswagen,True,True,True,True,True,True,True,True
volkswagen,True,True,True,True,True,True,True,True
volvo,True,True,True,True,True,True,True,True


In [124]:
df_not_null= pd.notnull(df['average-mileage'])
df_not_null

company
alfa-romero     True
alfa-romero    False
alfa-romero     True
audi            True
audi            True
               ...  
volkswagen      True
volkswagen      True
volkswagen      True
volvo           True
volvo           True
Name: average-mileage, Length: 61, dtype: bool

### Filling missing values using fillna(), replace() and interpolate()

In [108]:
df1 = df.fillna(0) #fill with particular value
df1

Unnamed: 0_level_0,body-style,wheel-base,length,engine-type,num-of-cylinders,horsepower,average-mileage,price
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
alfa-romero,convertible,88.6,168.8,dohc,four,111,21.0,13495.0
alfa-romero,convertible,88.6,168.8,dohc,four,111,0.0,16500.0
alfa-romero,hatchback,94.5,171.2,ohcv,six,154,19.0,16500.0
audi,sedan,99.8,176.6,ohc,four,102,24.0,13950.0
audi,sedan,99.4,176.6,ohc,five,115,18.0,17450.0
...,...,...,...,...,...,...,...,...
volkswagen,sedan,97.3,171.7,ohc,four,85,27.0,7975.0
volkswagen,sedan,97.3,171.7,ohc,four,52,37.0,7995.0
volkswagen,sedan,97.3,171.7,ohc,four,100,26.0,9995.0
volvo,sedan,104.3,188.8,ohc,four,114,23.0,12940.0


In [125]:
df_fill_previous_value=df.fillna(method ='pad') # fill null values with previous value
df_fill_previous_value

Unnamed: 0_level_0,body-style,wheel-base,length,engine-type,num-of-cylinders,horsepower,average-mileage,price
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
alfa-romero,convertible,88.6,168.8,dohc,four,111,21.0,13495.0
alfa-romero,convertible,88.6,168.8,dohc,four,111,21.0,16500.0
alfa-romero,hatchback,94.5,171.2,ohcv,six,154,19.0,16500.0
audi,sedan,99.8,176.6,ohc,four,102,24.0,13950.0
audi,sedan,99.4,176.6,ohc,five,115,18.0,17450.0
...,...,...,...,...,...,...,...,...
volkswagen,sedan,97.3,171.7,ohc,four,85,27.0,7975.0
volkswagen,sedan,97.3,171.7,ohc,four,52,37.0,7995.0
volkswagen,sedan,97.3,171.7,ohc,four,100,26.0,9995.0
volvo,sedan,104.3,188.8,ohc,four,114,23.0,12940.0


In [126]:
df_fill_previous_value=df.fillna(method ='bfill') # fill null values with next value
df_fill_previous_value

Unnamed: 0_level_0,body-style,wheel-base,length,engine-type,num-of-cylinders,horsepower,average-mileage,price
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
alfa-romero,convertible,88.6,168.8,dohc,four,111,21.0,13495.0
alfa-romero,convertible,88.6,168.8,dohc,four,111,19.0,16500.0
alfa-romero,hatchback,94.5,171.2,ohcv,six,154,19.0,16500.0
audi,sedan,99.8,176.6,ohc,four,102,24.0,13950.0
audi,sedan,99.4,176.6,ohc,five,115,18.0,17450.0
...,...,...,...,...,...,...,...,...
volkswagen,sedan,97.3,171.7,ohc,four,85,27.0,7975.0
volkswagen,sedan,97.3,171.7,ohc,four,52,37.0,7995.0
volkswagen,sedan,97.3,171.7,ohc,four,100,26.0,9995.0
volvo,sedan,104.3,188.8,ohc,four,114,23.0,12940.0


In [135]:
df['average-mileage'].fillna('No Average Mileage', inplace = True )
df

Unnamed: 0_level_0,body-style,wheel-base,length,engine-type,num-of-cylinders,horsepower,average-mileage,price
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
alfa-romero,convertible,88.6,168.8,dohc,four,111,21,13495.0
alfa-romero,convertible,88.6,168.8,dohc,four,111,No Average Mileage,16500.0
alfa-romero,hatchback,94.5,171.2,ohcv,six,154,19,16500.0
audi,sedan,99.8,176.6,ohc,four,102,24,13950.0
audi,sedan,99.4,176.6,ohc,five,115,18,17450.0
...,...,...,...,...,...,...,...,...
volkswagen,sedan,97.3,171.7,ohc,four,85,27,7975.0
volkswagen,sedan,97.3,171.7,ohc,four,52,37,7995.0
volkswagen,sedan,97.3,171.7,ohc,four,100,26,9995.0
volvo,sedan,104.3,188.8,ohc,four,114,23,12940.0


In [139]:
df = df.replace('No Average Mileage',np.nan) # replace No Average Mileage with null field
df

Unnamed: 0_level_0,body-style,wheel-base,length,engine-type,num-of-cylinders,horsepower,average-mileage,price
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
alfa-romero,convertible,88.6,168.8,dohc,four,111,21.0,13495.0
alfa-romero,convertible,88.6,168.8,dohc,four,111,,16500.0
alfa-romero,hatchback,94.5,171.2,ohcv,six,154,19.0,16500.0
audi,sedan,99.8,176.6,ohc,four,102,24.0,13950.0
audi,sedan,99.4,176.6,ohc,five,115,18.0,17450.0
...,...,...,...,...,...,...,...,...
volkswagen,sedan,97.3,171.7,ohc,four,85,27.0,7975.0
volkswagen,sedan,97.3,171.7,ohc,four,52,37.0,7995.0
volkswagen,sedan,97.3,171.7,ohc,four,100,26.0,9995.0
volvo,sedan,104.3,188.8,ohc,four,114,23.0,12940.0


In [140]:
df.head()

Unnamed: 0_level_0,body-style,wheel-base,length,engine-type,num-of-cylinders,horsepower,average-mileage,price
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
alfa-romero,convertible,88.6,168.8,dohc,four,111,21.0,13495.0
alfa-romero,convertible,88.6,168.8,dohc,four,111,,16500.0
alfa-romero,hatchback,94.5,171.2,ohcv,six,154,19.0,16500.0
audi,sedan,99.8,176.6,ohc,four,102,24.0,13950.0
audi,sedan,99.4,176.6,ohc,five,115,18.0,17450.0


In [141]:
df3 = df.interpolate(method ='linear', limit_direction ='forward')
df3

Unnamed: 0_level_0,body-style,wheel-base,length,engine-type,num-of-cylinders,horsepower,average-mileage,price
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
alfa-romero,convertible,88.6,168.8,dohc,four,111,21.0,13495.0
alfa-romero,convertible,88.6,168.8,dohc,four,111,20.0,16500.0
alfa-romero,hatchback,94.5,171.2,ohcv,six,154,19.0,16500.0
audi,sedan,99.8,176.6,ohc,four,102,24.0,13950.0
audi,sedan,99.4,176.6,ohc,five,115,18.0,17450.0
...,...,...,...,...,...,...,...,...
volkswagen,sedan,97.3,171.7,ohc,four,85,27.0,7975.0
volkswagen,sedan,97.3,171.7,ohc,four,52,37.0,7995.0
volkswagen,sedan,97.3,171.7,ohc,four,100,26.0,9995.0
volvo,sedan,104.3,188.8,ohc,four,114,23.0,12940.0


In [138]:
df.head()

Unnamed: 0_level_0,body-style,wheel-base,length,engine-type,num-of-cylinders,horsepower,average-mileage,price
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
alfa-romero,convertible,88.6,168.8,dohc,four,111,21,13495.0
alfa-romero,convertible,88.6,168.8,dohc,four,111,No Average Mileage,16500.0
alfa-romero,hatchback,94.5,171.2,ohcv,six,154,19,16500.0
audi,sedan,99.8,176.6,ohc,four,102,24,13950.0
audi,sedan,99.4,176.6,ohc,five,115,18,17450.0


In [117]:
df4 = df.interpolate(method ='linear', limit_direction ='backward', limit = 1)
df4

Unnamed: 0_level_0,body-style,wheel-base,length,engine-type,num-of-cylinders,horsepower,average-mileage,price
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
alfa-romero,convertible,88.6,168.8,dohc,four,111,21.0,13495.0
alfa-romero,convertible,88.6,168.8,dohc,four,111,20.0,16500.0
alfa-romero,hatchback,94.5,171.2,ohcv,six,154,19.0,16500.0
audi,sedan,99.8,176.6,ohc,four,102,24.0,13950.0
audi,sedan,99.4,176.6,ohc,five,115,18.0,17450.0
...,...,...,...,...,...,...,...,...
volkswagen,sedan,97.3,171.7,ohc,four,85,27.0,7975.0
volkswagen,sedan,97.3,171.7,ohc,four,52,37.0,7995.0
volkswagen,sedan,97.3,171.7,ohc,four,100,26.0,9995.0
volvo,sedan,104.3,188.8,ohc,four,114,23.0,12940.0


#### Dropping missing values using dropna()

In [119]:
df.head()

Unnamed: 0_level_0,body-style,wheel-base,length,engine-type,num-of-cylinders,horsepower,average-mileage,price
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
alfa-romero,convertible,88.6,168.8,dohc,four,111,21.0,13495.0
alfa-romero,convertible,88.6,168.8,dohc,four,111,,16500.0
alfa-romero,hatchback,94.5,171.2,ohcv,six,154,19.0,16500.0
audi,sedan,99.8,176.6,ohc,four,102,24.0,13950.0
audi,sedan,99.4,176.6,ohc,five,115,18.0,17450.0


In [120]:
df5 = df.dropna()
df5

Unnamed: 0_level_0,body-style,wheel-base,length,engine-type,num-of-cylinders,horsepower,average-mileage,price
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
alfa-romero,convertible,88.6,168.8,dohc,four,111,21.0,13495.0
alfa-romero,hatchback,94.5,171.2,ohcv,six,154,19.0,16500.0
audi,sedan,99.8,176.6,ohc,four,102,24.0,13950.0
audi,sedan,99.4,176.6,ohc,five,115,18.0,17450.0
audi,sedan,99.8,177.3,ohc,five,110,19.0,15250.0
audi,wagon,105.8,192.7,ohc,five,110,19.0,18920.0
bmw,sedan,101.2,176.8,ohc,four,101,23.0,16430.0
bmw,sedan,101.2,176.8,ohc,four,101,23.0,16925.0
bmw,sedan,101.2,176.8,ohc,six,121,21.0,20970.0
bmw,sedan,103.5,189.0,ohc,six,182,16.0,30760.0


In [142]:
df6 = df.dropna(how = 'all')# drop if all values is null
df6

Unnamed: 0_level_0,body-style,wheel-base,length,engine-type,num-of-cylinders,horsepower,average-mileage,price
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
alfa-romero,convertible,88.6,168.8,dohc,four,111,21.0,13495.0
alfa-romero,convertible,88.6,168.8,dohc,four,111,,16500.0
alfa-romero,hatchback,94.5,171.2,ohcv,six,154,19.0,16500.0
audi,sedan,99.8,176.6,ohc,four,102,24.0,13950.0
audi,sedan,99.4,176.6,ohc,five,115,18.0,17450.0
...,...,...,...,...,...,...,...,...
volkswagen,sedan,97.3,171.7,ohc,four,85,27.0,7975.0
volkswagen,sedan,97.3,171.7,ohc,four,52,37.0,7995.0
volkswagen,sedan,97.3,171.7,ohc,four,100,26.0,9995.0
volvo,sedan,104.3,188.8,ohc,four,114,23.0,12940.0


In [144]:
df7 = df.dropna(axis = 1) # drop column with atleast 1 null values
df7 # price, average-mileage column has been dropped

Unnamed: 0_level_0,body-style,wheel-base,length,engine-type,num-of-cylinders,horsepower
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
alfa-romero,convertible,88.6,168.8,dohc,four,111
alfa-romero,convertible,88.6,168.8,dohc,four,111
alfa-romero,hatchback,94.5,171.2,ohcv,six,154
audi,sedan,99.8,176.6,ohc,four,102
audi,sedan,99.4,176.6,ohc,five,115
...,...,...,...,...,...,...
volkswagen,sedan,97.3,171.7,ohc,four,85
volkswagen,sedan,97.3,171.7,ohc,four,52
volkswagen,sedan,97.3,171.7,ohc,four,100
volvo,sedan,104.3,188.8,ohc,four,114


In [147]:
df8 = df.dropna(axis = 0,how ='any') # drop row with atleast 1 null values
df8 # price, average-mileage column has been dropped

Unnamed: 0_level_0,body-style,wheel-base,length,engine-type,num-of-cylinders,horsepower,average-mileage,price
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
alfa-romero,convertible,88.6,168.8,dohc,four,111,21.0,13495.0
alfa-romero,hatchback,94.5,171.2,ohcv,six,154,19.0,16500.0
audi,sedan,99.8,176.6,ohc,four,102,24.0,13950.0
audi,sedan,99.4,176.6,ohc,five,115,18.0,17450.0
audi,sedan,99.8,177.3,ohc,five,110,19.0,15250.0
audi,wagon,105.8,192.7,ohc,five,110,19.0,18920.0
bmw,sedan,101.2,176.8,ohc,four,101,23.0,16430.0
bmw,sedan,101.2,176.8,ohc,four,101,23.0,16925.0
bmw,sedan,101.2,176.8,ohc,six,121,21.0,20970.0
bmw,sedan,103.5,189.0,ohc,six,182,16.0,30760.0


In [149]:
org_len = len(df)
new_len = len(df8)
null_rows_removed = org_len - new_len
null_rows_removed

4

## Iterating over rows