### ***Interpolation in Pandas***

In [2]:
import pandas as pd
import numpy as np

# ***Time Based Interpolation in Pandas***

In [None]:
# pd.to_datetime - string ---> data and time format
# An Energy analysis company which collects hourly power readings from meters.
# due to network delays, some reading will be missed
# you have to interpolate the missing power usage values based on the time-series context to ensure the billing enginer calculates the energy consumption properly

# en_ds = {
#     "timestamp": pd.to_datetime([
#         "2026-01-10 01:00",
#         "2026-01-10 02:00",
#         "2026-01-10 04:00",
#         "2026-01-10 06:00",
#         "2026-01-10 09:00",
#     ]),
#     "energy_KwH": [120, np.nan, 168, np.nan, 289]
# }

# en_df = pd.DataFrame(en_ds)
# en_df_indexed = en_df.set_index('timestamp')

# # en_df_indexed.isnull().sum()

# print(f"before intepolation - {en_df_indexed}")

# en_df_indexed['energy_KwH'] = en_df_indexed['energy_KwH'].interpolate(method='time')

# print(f"after intepolation - {en_df_indexed}")






# 2. Flight Altitude Analysis
# An aircraft's black box records altitude every few seconds
# We need to analyze the altitude of the flight

# al_ds = {
#     "time": pd.to_datetime([
#         '2026-02-15 10:00:03',
#         '2026-02-15 10:00:04',
#         '2026-02-15 10:00:07',
#         '2026-02-15 10:00:08',
#         '2026-02-15 10:00:15',
#     ]),
#     'al_ft': [32000, np.nan, 32400, np.nan, 22000]
# }

# al_df = pd.DataFrame(al_ds)

# al_df_indexed = al_df.set_index('time')

# print(f"before intepolation - {al_df_indexed}")
# al_df_indexed['al_ft'] = al_df_indexed['al_ft'].interpolate(method='time')
# print(f"after intepolation - {al_df_indexed}")


before intepolation -                        al_ft
time                        
2026-02-15 10:00:03  32000.0
2026-02-15 10:00:04      NaN
2026-02-15 10:00:07  32400.0
2026-02-15 10:00:08      NaN
2026-02-15 10:00:15  22000.0
after intepolation -                        al_ft
time                        
2026-02-15 10:00:03  32000.0
2026-02-15 10:00:04  32100.0
2026-02-15 10:00:07  32400.0
2026-02-15 10:00:08  31100.0
2026-02-15 10:00:15  22000.0


# ***Index based Interpolation***

In [None]:
# an Automobile engineer is tesing engine torque at various RPM levels.
# However, due to sensor drops, torque readings at some RPM points are missing
# We'll use index-based interpolation since RPM (our index) is numerical

ds = {
    "RPM": [1000,1500,2000,3000,4500,5800],
    "torque_Nm": [120,np.nan, 160, np.nan, 220, 260]
}

df = pd.DataFrame(ds).set_index('RPM')

print(f"before intepolation - {df}")
df = df.interpolate(method="index")
print(f"after intepolation - {df}")




before intepolation -       torque_Nm
RPM            
1000      120.0
1500        NaN
2000      160.0
3000        NaN
4500      220.0
5800      260.0
after intepolation -       torque_Nm
RPM            
1000      120.0
1500      140.0
2000      160.0
3000      184.0
4500      220.0
5800      260.0


# ***Quadratic Interpolation***

*   works on 2nd degree polynomial curve, performs and predict smooth curves or coordinates but not smoother than cubic interpolation
*   algebric eq. for the quadratic interpolation - ax2 + bx + c



In [7]:
# when we are tracking a ball's trajectory which thrown from 4th floor of a building with respect to time, some data might be missing
# Quadratic Interpolation can estimate the path of the projectile (parabolic)

dataset = {
    "timeRate": [0,1,2,3,4],
    "height": [100, np.nan, 50, np.nan, 0]
}

df = pd.DataFrame(dataset)
print(f"before interpolation - \n {df}")

df['height'].interpolate(method='quadratic', inplace=True)

print(f"after interpolation - \n {df}")



before interpolation - 
   timeRate  height
0         0   100.0
1         1     NaN
2         2    50.0
3         3     NaN
4         4     0.0


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['height'].interpolate(method='quadratic', inplace=True)


after interpolation - 
   timeRate  height
0         0   100.0
1         1    75.0
2         2    50.0
3         3    25.0
4         4     0.0


In [None]:
# In a vehicle acceleration test, sensors sometimes skips the readings
# in this scenario we wll use quas interpolation models for the non-linear motion tracking

df = pd.DataFrame({
    "time_s": [0,1,2,3,4,5],
    "speed_kmph": [0, 25, np.nan, 75, np.nan, 125]
})
print(f"before interpolation - \n {df}")

df['speed_kmph'].interpolate(method='quadratic', inplace=True)
print(f"after interpolation - \n {df}")

# ***Cubic Interpolation***


*   fits in 3rd degree polynomial curves
*   ax3 + bx2 + cx + d



In [13]:
# Scenario -
# in premiere pro, if some keyframes are lost, cubic interpolation rebuilds natural, fluid motion between frames




# 2. Scenario -
# wheather sensors might miss or lose some hourly based readings
# cubic interpolation gives smooth transition -
# df structur - 2 columns (time_h & temp_C)

df = pd.DataFrame({
    "hour_h": [0,4,5,12,18,48],
    "temp_c": [18, np.nan, 23, 25, np.nan, 22]
}).set_index('hour_h')

print(f"before interpolation - \n {df}")
df['temp_c'] = df['temp_c'].interpolate(method='cubic')
print(f"after interpolation - \n {df}")


before interpolation - 
         temp_c
hour_h        
0         18.0
4          NaN
5         23.0
12        25.0
18         NaN
48        22.0
after interpolation - 
            temp_c
hour_h           
0       18.000000
4       22.272056
5       23.000000
12      25.000000
18      23.561462
48      22.000000
