### Save data from the database as CSV

For a defined timespan and DB configuration, all data is extracted from the database. This data is visualized, and a smaller timespan can be configured. 
If needed, outliers can be removed. Lastly, the specified data is saved as csv.

In [None]:
import pymongo as py
from datetime import datetime, timedelta 
import pandas as pd
import numpy as np
import plotly

In [None]:
#parameters = ["date", "t_bett", "t_motor", "t_spindle", "DRZ2", "M8", "M121", "M127", "M7", "given2model", "welle_z"]
start_sec= datetime.strptime("2023-09-06T00:00:00.000+0000", "%Y-%m-%dT%H:%M:%S.%f+0000")
end_sec= datetime.strptime("2023-09-09T23:30:00.000+0000", "%Y-%m-%dT%H:%M:%S.%f+0000")

timeshift = 1

In [None]:
#####DB Config
host= "M57002edge"
port=27017
collection= "modelLogs"

## Connect to data base and get the events
client = py.MongoClient(host= host, port=port)
db = client.h4ai
start= start_sec # datetime.strptime("2022-09-06T00:00:31.000+0000", "%Y-%m-%dT%H:%M:%S.%f+0000")
end= end_sec #datetime.strptime ("2022-09-07T00:00:00.000+0000", "%Y-%m-%dT%H:%M:%S.%f+0000")
event_list = db[collection].find({ "date" : { '$gte' : start, '$lt' : end} }).sort('date', 1)


In [None]:
df = pd.DataFrame() #columns=parameters

for event in event_list:
    content = event["content"]
    for data in content:
        if data["date"] >= start and data["date"] <= end:
            raw_data = data["raw_data"]
            keylist = raw_data.keys()
            #keylist = list(keylist)
            raw_data["date"] = data["date"]
            raw_data["given2model"] = data["given2model"]
            raw_data["prediction"] = data["prediction"]
            df_row = pd.DataFrame(raw_data, index= ["date"])
            df = pd.concat([df, df_row], axis=0, ignore_index=True)
df.reset_index(inplace=True)
print(df.head(5))

Create Plot

In [None]:
#print(list(df.columns.values))

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as ex
import plotly.io as pio

scatter_mode= 'lines'

nrrows = len(df.columns)
fig= make_subplots(rows=nrrows, cols=1, shared_xaxes= True, print_grid= True, vertical_spacing=0.01)
col_names = df.columns.values
i = 0 

for column in df:
    fig.add_trace(go.Scatter(x= df['date'], y = df[column], name= col_names[i], mode= scatter_mode), row= i+1, col= 1)
    i += 1

fig.update_layout(height=10000, width=1300, title_text="Daten im Dataframe")
fig.show()

Truncate to interesting region

In [None]:
# Define important region

start_save = datetime.strptime("2023-09-07T16:20:00.000+0000", "%Y-%m-%dT%H:%M:%S.%f+0000")
end_save = datetime.strptime("2023-09-08T03:50:00.000+0000", "%Y-%m-%dT%H:%M:%S.%f+0000")

In [None]:
firstId = df.index[0]
lastId = df.index[-1]

idStartAll = df.index[df['date']>=start_save].tolist()
idStart = idStartAll[0]

idEndAll = df.index[df['date']>=end_save].tolist()
idEnd = idEndAll[0]

df_save = df[idStart:idEnd].copy()

# print(" firstID: " + str(idStart) + " StopId: " + str(idEnd))
# print(df.info())

In [None]:
#df_save["t_motor"] = df_save["T_Motor_S"]

Remove outliers

In [None]:
# print(df_save.loc[18170:18210])
# print(df_save.loc[18075:18110])
# df_save["welle_z"].loc[18080:18104] = (df_save["welle_z"].loc[18079]+df_save["welle_z"].loc[18104])/2
# df_save["welle_z"].loc[18175:18199] = (df_save["welle_z"].loc[18174]+df_save["welle_z"].loc[18199])/2


In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as ex
import plotly.io as pio

scatter_mode= 'lines'

nrrows = len(df_save.columns)
fig= make_subplots(rows=nrrows, cols=1, shared_xaxes= True, print_grid= True, vertical_spacing=0.01)
col_names = df_save.columns.values
i = 0 

for column in df_save:
    fig.add_trace(go.Scatter(x= df_save['date'], y = df_save[column], name= col_names[i], mode= scatter_mode), row= i+1, col= 1)
    i += 1

fig.update_layout(height=10000, width=1300, title_text="Daten im Dataframe")
fig.show()

From Dataframe to csv file

In [None]:
df_save["date"] = df_save["date"] + timedelta(hours=timeshift)
df_save.to_csv("X:\\KI Praktikum\\validate_Data\\2023_08_23_filtered_data\\Versuch07_09_2023_M8_M127_iso_5s.csv")