# ***Interpolation in Pandas***

- It will also help to fill the missing, null, none & nan values by predicting all the missing values through their previous and upcoming data.
- unlike fillna() it will not gonna fill the missing values using a default value, instead it will gonna fill all the missing values with a prediction

In [None]:
import pandas as pd
import numpy as np

IoT_Temp = {
    'tempreature_timestamp (per hours)': pd.date_range("2026-01-01 02:00:00", periods=12, freq='h'),   # m, s, min, h, d, y
    'device_tempreature (deg C)': [np.nan, np.nan, 34.57, 38.98, np.nan, 37.65, 38.04, 31.68, None, 33.53, np.nan, 35.27]
}

IoT_df = pd.DataFrame(IoT_Temp, index=['A1', 'B1', 'C1', 'D1', 'E1', 'F1', 'A2', 'B2', 'C2', 'D2', 'E2', 'F2'])

IoT_df

Unnamed: 0,tempreature_timestamp (per hours),device_tempreature (deg C)
A1,2026-01-01 02:00:00,
B1,2026-01-01 03:00:00,
C1,2026-01-01 04:00:00,34.57
D1,2026-01-01 05:00:00,38.98
E1,2026-01-01 06:00:00,
F1,2026-01-01 07:00:00,37.65
A2,2026-01-01 08:00:00,38.04
B2,2026-01-01 09:00:00,31.68
C2,2026-01-01 10:00:00,
D2,2026-01-01 11:00:00,33.53


In [None]:
# finding null and missing values in the DataFrame
IoT_df.isnull().sum()

# copying the original dataframe
new_IoT_df = IoT_df.copy()

# filling null and missing values using fillna name ka method on the original dataframe cause a lacking in data predication. how?
# IoT_df.fillna(35.5, inplace=True)

# IoT_df



# here to maintain the data consistency and to fill the null and missing values we'll use interplolate name ka method through which
# a predictive value will be filled over all the null and missing values

new_IoT_df['device_tempreature (deg C)'] = new_IoT_df['device_tempreature (deg C)'].interpolate(method='linear')
new_IoT_df

# polynomial -
# 1. quadratic - ax2 + bx + c
# 2. cubic - ax3 + bx2 + cx + d


Unnamed: 0,tempreature_timestamp (per hours),device_tempreature (deg C)
A1,2026-01-01 02:00:00,
B1,2026-01-01 03:00:00,
C1,2026-01-01 04:00:00,34.57
D1,2026-01-01 05:00:00,38.98
E1,2026-01-01 06:00:00,38.315
F1,2026-01-01 07:00:00,37.65
A2,2026-01-01 08:00:00,38.04
B2,2026-01-01 09:00:00,31.68
C2,2026-01-01 10:00:00,32.605
D2,2026-01-01 11:00:00,33.53


# ***Grouping In Pandas***

In [None]:
import pandas as pd

empDf = pd.DataFrame({
    'empName': ['Chunaram', 'Mangiram', 'Changaram', 'Pangaram', 'Lambaram', 'Rajaram'],
    'empAge': [52,74,52, 63, 88, 52],
    'empSavings': [7000000, 5000, 7000000, 12000000, 0, 5000]
})

empDf

Unnamed: 0,empName,empAge,empSavings
0,Chunaram,52,7000000
1,Mangiram,74,5000
2,Changaram,52,7000000
3,Pangaram,63,12000000
4,Lambaram,88,0
5,Rajaram,52,5000


In [None]:
# grouping on the basis of their agesusing groupby() name ki method
ageGroup = empDf.groupby('empAge')
ageGroup['empSavings'].sum()  # you can get your hands dirty with other aggregate methods

Unnamed: 0_level_0,empSavings
empAge,Unnamed: 1_level_1
52,14005000
63,12000000
74,5000
88,0


In [None]:
import pandas as pd

bankDf = pd.DataFrame({
    'visitng_timestamp': ['2026-02-23 09:00:00'],
    'withdrawl_amount': [1, 0, 0, 4, 0, 0, 7, 0, .5],
    'deposit_amount': [3, 0, 7, 0, 0, 8, 0, 0, 5]
})

bankDf

Unnamed: 0,visitng_timestamp,withdrawl_amount,deposit_amount
0,2026-02-23 09:00:00,1.0,3
1,2026-02-23 10:00:00,0.0,0
2,2026-02-23 11:00:00,0.0,7
3,2026-02-23 12:00:00,4.0,0
4,2026-02-23 13:00:00,0.0,0
5,2026-02-23 14:00:00,0.0,8
6,2026-02-23 15:00:00,7.0,0
7,2026-02-23 16:00:00,0.0,0
8,2026-02-23 17:00:00,0.5,5
