## Library Imports

In [72]:
import pandas as pd
import numpy as np

%matplotlib inline

## Loading Dataset

In [73]:
dataset = pd.read_csv("dataset.csv", index_col=[0, 1]).transpose()
dataset

Discipline,Science & Technology Disciplines,Science & Technology Disciplines,Science & Technology Disciplines,Science & Technology Disciplines,Science & Technology Disciplines,Other Disciplines,Other Disciplines,Other Disciplines,Other Disciplines,Other Disciplines,Total
Year,Science,Engg.,Medicine,Agri.,Vety.Sc.,Arts,Commerce,Law,Education,Others*,Total
1974-75,106.2,1.3,21.0,0.3,0.08,363.0,22.1,5.9,29.0,4.1,553.0
1979-80,140.1,4.4,24.4,1.1,0.2,397.9,68.0,11.0,34.5,7.4,689.0
1985-86,215.7,12.2,37.5,2.3,0.6,576.3,156.7,17.6,38.6,9.0,1067.5
1991-92,302.0,18.3,51.1,3.6,0.9,824.9,212.0,24.9,58.1,15.1,1512.2
1992-93,318.6,19.2,53.8,3.8,1.0,867.5,222.0,26.2,61.1,15.9,1590.3
1993-94,334.4,20.0,,,,905.1,235.3,30.0,65.1,74.1,1664.1
1994-95,415.0,24.9,,,,1123.2,292.0,37.3,80.8,91.9,2065.0
1995-96,440.4,26.4,,,,1191.8,309.8,39.6,85.7,97.6,2191.3
1996-97,462.9,27.6,,,,1252.7,325.7,41.7,90.1,102.5,2303.2
1997-98,469.6,51.3,80.7,14.7,2.4,1330.5,332.6,48.9,83.2,31.8,2445.7


## Displaying NA Values (Missing Data)

In [74]:
dataset.iloc[5:9, 2:5]

Discipline,Science & Technology Disciplines,Science & Technology Disciplines,Science & Technology Disciplines
Year,Medicine,Agri.,Vety.Sc.
1993-94,,,
1994-95,,,
1995-96,,,
1996-97,,,


## Imputations
### 1. Mean Imputation

In [75]:
# Mean Imputation
means = dataset.mean()
mean_imputed = dataset.fillna(means)

print(means)
mean_imputed.iloc[5:9, 2:5]

Discipline                        Year       
Science & Technology Disciplines  Science         483.341176
                                  Engg.            66.794118
                                  Medicine         89.453846
                                  Agri.             8.184615
                                  Vety.Sc.          2.052308
Other Disciplines                 Arts           1289.123529
                                  Commerce        364.000000
                                  Law              41.911765
                                  Education        68.541176
                                  Others*          39.858824
Total                             Total          2430.005882
dtype: float64


Discipline,Science & Technology Disciplines,Science & Technology Disciplines,Science & Technology Disciplines
Year,Medicine,Agri.,Vety.Sc.
1993-94,89.453846,8.184615,2.052308
1994-95,89.453846,8.184615,2.052308
1995-96,89.453846,8.184615,2.052308
1996-97,89.453846,8.184615,2.052308


### 2. Hot Deck Imputation - filling prior value

In [76]:
hot_deck = dataset.fillna(method='ffill')
hot_deck.iloc[5:9, 2:5]

Discipline,Science & Technology Disciplines,Science & Technology Disciplines,Science & Technology Disciplines
Year,Medicine,Agri.,Vety.Sc.
1993-94,53.8,3.8,1.0
1994-95,53.8,3.8,1.0
1995-96,53.8,3.8,1.0
1996-97,53.8,3.8,1.0


### 3. Hot Deck Imputation - filling later value

In [77]:
hot_deck_2 = dataset.fillna(method='bfill')
hot_deck_2.iloc[5:9, 2:5]

Discipline,Science & Technology Disciplines,Science & Technology Disciplines,Science & Technology Disciplines
Year,Medicine,Agri.,Vety.Sc.
1993-94,80.7,14.7,2.4
1994-95,80.7,14.7,2.4
1995-96,80.7,14.7,2.4
1996-97,80.7,14.7,2.4


## Interpolation - Linear Interpolation

In [78]:
dataset_interpolated = dataset.interpolate()
dataset_interpolated.iloc[5:9, 2:5]

Discipline,Science & Technology Disciplines,Science & Technology Disciplines,Science & Technology Disciplines
Year,Medicine,Agri.,Vety.Sc.
1993-94,59.18,5.98,1.28
1994-95,64.56,8.16,1.56
1995-96,69.94,10.34,1.84
1996-97,75.32,12.52,2.12


## Verification

In [79]:
original_value = dataset.iloc[7, 1]
dataset.iloc[7, 1] = np.nan

interpolated_value = dataset.interpolate().iloc[7, 1]

print(original_value, interpolated_value)

26.4 26.25
