# **Interpolation**

- Preserve Data Integrity
- Smooth Trends
- Avoid Data Loss

In [47]:
import pandas as pd

In [48]:
data = {
    "Name": ["John", "Jane", "Bob", "Alice"],
    "Age": [25, None, 35, 40],
    "Salary": [50000, 60000, 70000, 80000],
    "Department": ["IT", "HR", "IT", "Finance"]
}
df = pd.DataFrame(data)
print(df)

    Name   Age  Salary Department
0   John  25.0   50000         IT
1   Jane   NaN   60000         HR
2    Bob  35.0   70000         IT
3  Alice  40.0   80000    Finance


## **01**- Linear Interpolation

In [49]:
print("Before Interpolation:")
print(df)

Before Interpolation:
    Name   Age  Salary Department
0   John  25.0   50000         IT
1   Jane   NaN   60000         HR
2    Bob  35.0   70000         IT
3  Alice  40.0   80000    Finance


### Select Columns
Selecting specific columns from DataFrame

In [50]:
df['Age'] = df['Age'].interpolate(method='linear')
print("\nDataFrame after handling missing values in 'Age' column:")
print(df)


DataFrame after handling missing values in 'Age' column:
    Name   Age  Salary Department
0   John  25.0   50000         IT
1   Jane  30.0   60000         HR
2    Bob  35.0   70000         IT
3  Alice  40.0   80000    Finance


## 02 Time-Weighted Interpolation (method='time')
This is arguably the most important method for time-series analysis.

In [51]:
# Create data with irregular dates
df_dates = pd.DataFrame({
    'Date': pd.to_datetime(['2023-01-01', '2023-01-04', '2023-01-07', '2023-01-10', '2023-01-15', '2023-01-20']),
    'Value': [10, None, 30, 40, None, 60]
})
df_dates.set_index('Date', inplace=True)
print("\nDataFrame with irregular dates:")
print(df_dates)


DataFrame with irregular dates:
            Value
Date             
2023-01-01   10.0
2023-01-04    NaN
2023-01-07   30.0
2023-01-10   40.0
2023-01-15    NaN
2023-01-20   60.0


In [52]:
# Interpolate based on time
df_dates['Value'] = df_dates['Value'].interpolate(method='time')
print("\nDataFrame after time-based interpolation:")
print(df_dates)


DataFrame after time-based interpolation:
            Value
Date             
2023-01-01   10.0
2023-01-04   20.0
2023-01-07   30.0
2023-01-10   40.0
2023-01-15   50.0
2023-01-20   60.0


## 03. Nearest Neighbor Interpolation (method='nearest')
This method fills the missing value with the value of the nearest valid data point. It does not calculate an average; it simply copies the closest neighbor.

In [53]:
stock_data = {
    'Date': pd.to_datetime(['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04', '2023-01-05']),
    'Stock_Price': [100, None, 102, None, 105]
}
df_stock = pd.DataFrame(stock_data)
print("\nStock DataFrame before interpolation:")
print(df_stock)


Stock DataFrame before interpolation:
        Date  Stock_Price
0 2023-01-01        100.0
1 2023-01-02          NaN
2 2023-01-03        102.0
3 2023-01-04          NaN
4 2023-01-05        105.0


### Import Libraries
Loading required packages for data analysis

In [54]:


import scipy  # pandas uses SciPy for method='nearest'

df_stock['Nearest_Fill'] = df_stock['Stock_Price'].interpolate(method='nearest')
print("\nStock DataFrame after nearest neighbor interpolation:")
print(df_stock)


Stock DataFrame after nearest neighbor interpolation:
        Date  Stock_Price  Nearest_Fill
0 2023-01-01        100.0         100.0
1 2023-01-02          NaN         100.0
2 2023-01-03        102.0         102.0
3 2023-01-04          NaN         102.0
4 2023-01-05        105.0         105.0


## 04. Polynomial Interpolation (method='polynomial')
Real-world data often follows a curve rather than a straight line. Polynomial interpolation fits a curve (of a specified order) to the data points.

- Order 2 (Quadratic): A parabolic curve (1 bend).

- Order 3 (Cubic): A more complex curve (2 bends).

In [55]:
temp_data = {
    'Day': [1, 2, 3, 4, 5, 6, 7],
    'Temperature': [30, None, None, 35, None, 40, 42]
}
df_temp = pd.DataFrame(temp_data)
print("\nTemperature DataFrame before interpolation:")
print(df_temp)


Temperature DataFrame before interpolation:
   Day  Temperature
0    1         30.0
1    2          NaN
2    3          NaN
3    4         35.0
4    5          NaN
5    6         40.0
6    7         42.0


### Select Columns
Selecting specific columns from DataFrame

In [56]:
df['Poly_Fill'] = df_temp['Temperature'].interpolate(method='polynomial', order=2)
print("\nTemperature DataFrame after polynomial interpolation:")
print(df_temp)


Temperature DataFrame after polynomial interpolation:
   Day  Temperature
0    1         30.0
1    2          NaN
2    3          NaN
3    4         35.0
4    5          NaN
5    6         40.0
6    7         42.0


## 05. Padding (Forward Fill)

In [57]:
HB_data = {
    'Hour': [1, 2, 3, 4, 5, 6, 7, 8],
    'Heart_Beat': [70, None, 75, None, None, 80, 82, None]
}
df_hb = pd.DataFrame(HB_data)
print("\nHeart Beat DataFrame before interpolation:")
print(df_hb)


Heart Beat DataFrame before interpolation:
   Hour  Heart_Beat
0     1        70.0
1     2         NaN
2     3        75.0
3     4         NaN
4     5         NaN
5     6        80.0
6     7        82.0
7     8         NaN


In [58]:
df_hb['Pad_Fill'] = df_hb['Heart_Beat'].ffill()
print("\nHeart Beat DataFrame after forward fill interpolation:")
print(df_hb)


Heart Beat DataFrame after forward fill interpolation:
   Hour  Heart_Beat  Pad_Fill
0     1        70.0      70.0
1     2         NaN      70.0
2     3        75.0      75.0
3     4         NaN      75.0
4     5         NaN      75.0
5     6        80.0      80.0
6     7        82.0      82.0
7     8         NaN      82.0
