### Interpolate the output data of a csv file

For a provided csv file, the interpolated values of the "welle_z" column are computer and saved as "welle_z_ipo". If neccessary outliers can be removed.

In [None]:
import numpy as np
import pandas as pd

from datetime import datetime, timedelta 

import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as ex
import plotly.io as pio

Define Paths & Parameters

In [None]:
filepath = "X:\\KI Praktikum\\validate_Data\\2023_08_23_filtered_data\\Versuch29_08_2023_trocken_iso_5s.csv" 
                #"G:\\Innovations@HELLER\\DN\\KI\\Temperaturkompensation\\Edgebox\Versuchsdaten\\5s_data\\for_interpolation\\Versuch11_12_2022_M8_iso.csv"
interpolatedFilePath = "X:\\KI Praktikum\\validate_Data\\2023_08_23_filtered_data\\filtered_interpolated_Versuch28_08_2023_M8_iso_5s.csv"
                #"X:\\KI Praktikum\\5s_data_interpolated\\Versuch11_12_2022_M8_iso_interpolated.csv"
scatter_mode= 'lines'

cleanOutlier = False

Read in data from csv and plot the original data

In [None]:
df = pd.read_csv(filepath)
df.head(5)

In [None]:
df_cols = df.copy()     #[["Unnamed: 0", "date", "t_bett", "t_motor", "t_spindle", "DRZ2", "M8", "M121", "M127", "M7", "given2model", "welle_z"]]

In [None]:
print(df_cols.loc[df_cols["Unnamed: 0"] == 19000])

In [None]:
scatter_mode= 'lines'
df_plot = df[["Unnamed: 0", "date", "t_bett", "t_motor", "t_spindle", "DRZ2", "M8", "M121", "M127", "M7", "given2model", "welle_z"]].copy()

nrrows = len(df_plot.columns)
fig= make_subplots(rows=nrrows, cols=1, shared_xaxes= True, print_grid= True, vertical_spacing=0.01)
col_names = df_plot.columns.values
i = 0 

for column in df_plot:
    fig.add_trace(go.Scatter(x= df_plot['date'], y = df_plot[column], name= col_names[i], mode= scatter_mode), row= i+1, col= 1)
    i += 1

fig.update_layout(height=1000, width=1300, title_text="Daten im Dataframe")
fig.show()

In [None]:
if cleanOutlier:
    # delete suprisingly high value for welle_z at 18:59:29 (index 2671 - 2694)
    id = df_cols.loc[(df_cols["date"] >= "2023-08-29 18:59:00.000") & (df_cols["date"] <= "2023-08-29 19:01:00.000")]
    #print(id)
    print(df_cols.loc[(df_cols['Unnamed: 0'] >=20518) & (df_cols['Unnamed: 0'] <=20577)])
    df_clean = df_cols.copy()
    df_clean["welle_z"].iloc[2671:2695] = (df_clean["welle_z"].iloc[2670] +df_clean["welle_z"].iloc[2695])/2
    print(df_clean.loc[(df_cols['Unnamed: 0'] >=20518) & (df_clean['Unnamed: 0'] <=20577)])

In [None]:
if cleanOutlier:
    df_clean['date'] = pd.to_datetime(df_clean['date'])

    # Based on time and pandas
    df_clean['welle_z_ipo'] = df_clean['welle_z']
    newVal = df_clean['welle_z_ipo'].iloc[0]
    lastVal = df_clean['welle_z_ipo'].iloc[-1]

    for index, row in df_clean.iterrows():
        if index > 0 and row['welle_z_ipo'] == newVal:
            df_clean['welle_z_ipo'].iloc[index] = np.nan
        else: 
            if index > 0:
                newVal = row['welle_z_ipo']

    df_clean.index = df_clean['date']
    df_clean['welle_z_ipo'].interpolate(method='time', inplace = True)
    df_clean.index = df_clean['Unnamed: 0']

    fig= make_subplots(rows=1, cols=1, shared_xaxes= True, print_grid= True, vertical_spacing=0.02)

    fig.add_trace(go.Scatter(x=df_clean['date'], y= df_clean['welle_z'], name= 'welle_z', mode= scatter_mode), row= 1, col= 1)
    fig.add_trace(go.Scatter(x=df_clean['date'], y= df_clean['welle_z_ipo'], name= 'welle_z_interpolated', mode= scatter_mode), row= 1, col= 1)
    fig.update_yaxes(title_text= 'Abweichungen [mm]', row= 1, col= 1)

    fig.update_layout(height=600, width=1300, title_text="Welle_z original und interpoliert")
    fig.show()

Compute the interpolated values for welle_z and plot the original and interpolated values of welle_z

In [None]:
if not cleanOutlier:
    df_cols['date'] = pd.to_datetime(df_cols['date'])


In [None]:
if not cleanOutlier:
    # Based on time and pandas
    df_cols['welle_z_ipo'] = df_cols['welle_z']
    newVal = df_cols['welle_z_ipo'].iloc[0]
    lastVal = df_cols['welle_z_ipo'].iloc[-1]

    for index, row in df_cols.iterrows():
        if index > 0 and row['welle_z_ipo'] == newVal:
            df_cols['welle_z_ipo'].iloc[index] = np.nan
        else: 
            if index > 0:
                newVal = row['welle_z_ipo']

    df_cols.index = df_cols['date']
    df_cols['welle_z_ipo'].interpolate(method='time', inplace = True)
    df_cols.index = df_cols['Unnamed: 0']

In [None]:
if not cleanOutlier:
    fig= make_subplots(rows=1, cols=1, shared_xaxes= True, print_grid= True, vertical_spacing=0.02)

    fig.add_trace(go.Scatter(x=df_cols['date'], y= df_cols['welle_z'], name= 'welle_z', mode= scatter_mode), row= 1, col= 1)
    fig.add_trace(go.Scatter(x=df_cols['date'], y= df_cols['welle_z_ipo'], name= 'welle_z_interpolated', mode= scatter_mode), row= 1, col= 1)
    fig.update_yaxes(title_text= 'Abweichungen [mm]', row= 1, col= 1)

    fig.update_layout(height=600, width=1300, title_text="Welle_z original und interpoliert")
    fig.show()

Save interpolated data into defined folder

In [None]:
if cleanOutlier:
    df_clean.to_csv(interpolatedFilePath)
else:
    df_cols.to_csv(interpolatedFilePath)