In [3]:
import pandas as pd
import numpy as np

In [4]:
# Linear Interpolation 

# Utiliza equação linear entre dois pontos próximos para preenchar gaps 
# de informação. Para o caso de inputs vazias no começo ou fim utiliza
# repetição de input válidas mais próximas.
def ends_filler(df):
    for column in df.columns:
        if  df[column].dtype.kind in ['i', 'f']:
            df[column] = first_filler(df[column])
            df[column] = last_filler(df[column])
    return df

def first_filler(column):
    if pd.isnull(column.at[0]) or np.isnan(column.at[0]):
        size = len(column) -1
        first_valid_index = 0
        while first_valid_index <= size and (pd.isnull(column.at[first_valid_index]) or np.isnan(column.at[first_valid_index])):
            first_valid_index += 1
        if first_valid_index > size:
            column.at[0] = 0.0
        else:
            column.at[0] = column.at[first_valid_index]
    return column
    
def last_filler(column):
    if pd.isnull(column.at[len(column) -1]) or np.isnan(column.at[len(column) -1]):  
        last_valid_index = len(column) -1
        while last_valid_index >= 0 and (pd.isnull(column.at[last_valid_index]) or np.isnan(column.at[last_valid_index])):
            last_valid_index -= 1
        if last_valid_index < 0:
            column.at[len(column) -1] = 0.0
        else:
            column.at[len(column) -1] = column.at[last_valid_index]
    return column
    
def prev_valid_index(column, pos):
    prev_index = pos
    size = len(column) -1
    if pos > 0 and pos < size:
        while prev_index >= 0 and (pd.isnull(column.at[prev_index]) or np.isnan(column.at[prev_index])):
            prev_index -= 1
    return prev_index  

def next_valid_index(column, pos):
    next_index = pos
    size = len(column) -1
    if pos > 0 and pos < size:
        while next_index <= size and (pd.isnull(column.at[next_index]) or np.isnan(column.at[next_index])):
            next_index += 1
    return next_index

def linear_filler(column):
    if column.isnull().any():
        for i, value in enumerate(column):
            if i > 0 and (pd.isnull(value) or np.isnan(value) or (value == '')):
                start = prev_valid_index(column, i)
                end = next_valid_index(column, i)
                gap = list(range(start + 1, end))
                linear_a = (column.at[end] - column.at[start])/(end - start)
                linear_b = column.at[start] - linear_a*start
                for x in gap:
                    column.at[x] = linear_a*x + linear_b
    return column

In [5]:
pd.set_option('display.max_columns', None)
original_data = {
    'Date': ['2022/01/01', '2022/01/01', '2022/01/01', '2022/01/01', '2022/01/01',
            '2022/01/01', '2022/01/01', '2022/01/01', '2022/01/01', '2022/01/01',
            '2022/01/01'],
    'Atmospheric Pressure (mB)': [920.5, 920.8, 920.8, 920.4, 919.7, 919.0, 919.0, 919.2, 919.7, 920.3, 921.2],
    'Global Radiation - Daylight (Kj/m²)': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 49.8, 420.6],
    'Air Temperature - Dry Bulb (°C)': [19.3, 19.1, 19.0, 18.7, 18.5, 18.3, 18.3, 18.2, 18.3, 18.5, 19.3],
    'Air Relative Humidity (%)': [92.0, 93.0, 93.0, 93.0, 93.0, 93.0, 94.0, 94.0, 94.0, 95.0, 93.0],
    'Wind Gust (m/s)': [0.7, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3, 0.3]
}

data = {
    'Date': ['2022/01/01', '2022/01/01', '2022/01/01', '2022/01/01', '2022/01/01',
            '2022/01/01', '2022/01/01', '2022/01/01', '2022/01/01', '2022/01/01',
            '2022/01/01'],
    'Atmospheric Pressure (mB)': [920.5, np.nan, 920.8, 920.4, 919.7, np.nan, 919.0, np.nan, 919.7, 920.3, 921.2],
    'Global Radiation - Daylight (Kj/m²)': [np.nan, np.nan, np.nan, np.nan, np.nan, 0.0, 0.0, 0.0, 0.0, 49.8, 420.6],
    'Air Temperature - Dry Bulb (°C)': [19.3, 19.1, 19.0, np.nan, np.nan, np.nan, np.nan, np.nan, 17.3, 18.5, 19.3],
    'Air Relative Humidity (%)': [np.nan, np.nan, np.nan, 93.0, np.nan, np.nan, np.nan, 94.0, 94.0, 95.0, 93.0],
    'Wind Gust (m/s)': [0.7, 0.1, 0.1, 0.2, 0.6, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]
}


df = pd.DataFrame(data)
df

Unnamed: 0,Date,Atmospheric Pressure (mB),Global Radiation - Daylight (Kj/m²),Air Temperature - Dry Bulb (°C),Air Relative Humidity (%),Wind Gust (m/s)
0,2022/01/01,920.5,,19.3,,0.7
1,2022/01/01,,,19.1,,0.1
2,2022/01/01,920.8,,19.0,,0.1
3,2022/01/01,920.4,,,93.0,0.2
4,2022/01/01,919.7,,,,0.6
5,2022/01/01,,0.0,,,
6,2022/01/01,919.0,0.0,,,
7,2022/01/01,,0.0,,94.0,
8,2022/01/01,919.7,0.0,17.3,94.0,
9,2022/01/01,920.3,49.8,18.5,95.0,


In [6]:
df = ends_filler(df)
for column in df.columns:
    df[column] = linear_filler(df[column])
df

Unnamed: 0,Date,Atmospheric Pressure (mB),Global Radiation - Daylight (Kj/m²),Air Temperature - Dry Bulb (°C),Air Relative Humidity (%),Wind Gust (m/s)
0,2022/01/01,920.5,0.0,19.3,93.0,0.7
1,2022/01/01,920.65,0.0,19.1,93.0,0.1
2,2022/01/01,920.8,0.0,19.0,93.0,0.1
3,2022/01/01,920.4,0.0,18.716667,93.0,0.2
4,2022/01/01,919.7,0.0,18.433333,93.25,0.6
5,2022/01/01,919.35,0.0,18.15,93.5,0.6
6,2022/01/01,919.0,0.0,17.866667,93.75,0.6
7,2022/01/01,919.35,0.0,17.583333,94.0,0.6
8,2022/01/01,919.7,0.0,17.3,94.0,0.6
9,2022/01/01,920.3,49.8,18.5,95.0,0.6
