In [None]:
# Importe
import pandas as pd
import numpy as np
import plotly.express as px
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import seaborn as sns

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LinearRegression
from sklearn.svm import LinearSVC
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

#import graphviz
from sklearn import tree
from matplotlib.dates import DateFormatter
import datetime as dt
from pandas import DataFrame
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

In [None]:
df = pd.read_csv('dataset_train.csv')

df['timestamp'] = pd.to_datetime(df['timestamp'])
#Aus Tabelle
err_1_start = dt.datetime(2022,2,28,21,53)
err_1_end = dt.datetime(2022,3,1,2,00)
err_2_start = dt.datetime(2022,3,23,14,54)
err_2_end = dt.datetime(2022,3,23,15,24)
err_3_start = dt.datetime(2022,5,30,12,00)
err_3_end = dt.datetime(2022,6,2,6,18)

df['Label'] = np.where(
    ((df['timestamp'] >= err_1_start) & (df['timestamp'] <= err_1_end)) |
    ((df['timestamp'] >= err_2_start) & (df['timestamp'] <= err_2_end)) | 
    ((df['timestamp'] >= err_3_start) & (df['timestamp'] <= err_3_end)), 1, 0)

In [None]:
oil = df[['Oil_temperature', 'Label', 'timestamp']]

In [None]:
t = np.linspace(0, 10, 1000)

def sin(t, A, F):
    return A * np.sin(2 * np.pi * F * t)

y = sin(t, 1, 1) + sin(t, 0.5, 2) + sin(t, 0.25, 4) + sin(t, 0.125, 8)

#plot with plotly
fig = px.line(x=t, y=y)
fig.show()

In [None]:
def yfft(y):
    hanning = np.hanning(len(y))
    yf = 4*np.abs(np.fft.rfft(y*hanning))/len(y)
    return yf

def xfft(blocksize, duration):
    return np.fft.rfftfreq(blocksize, duration)

In [None]:
y_fft = yfft(y)
x_fft = xfft(len(t), t[1]-t[0])

px.bar(x=x_fft, y=y_fft)

In [None]:
#blocksize
BLOCKSIZE = 1024
HIGHEST_FREQU = 1000

#number of blocks
nblocks = int(len(oil)/BLOCKSIZE)

data = []

#TODO:
#Notiz an mich:
#Prüfen in der Loop ob der zug ausgeschaltet wird, wenn ja block droppen!

X_f = xfft(BLOCKSIZE, 1)
X_f = X_f[X_f<HIGHEST_FREQU]
for i in range(nblocks):
    df_block = oil.iloc[i*BLOCKSIZE:(i+1)*BLOCKSIZE]


    iserr    = df_block['Label'].sum() > BLOCKSIZE/2
    fft_amp  = yfft(df_block['Oil_temperature'])[:len(X_f)]
    block_data = {'iserr': iserr}
    block_data.update({f'':amp for i,amp in enumerate(fft_amp)})
    data.append(block_data)

In [None]:
def fourier(blocksize, max_freq, series, name):

    #number of blocks
    nblocks = int(len(series)/blocksize)

    data = []

    X_f = xfft(blocksize, 1)
    X_f = X_f[X_f<max_freq]
    for i in range(nblocks):
        sr_block = series[i*blocksize:(i+1)*blocksize]
        # iserr    = df_block['Label'].sum() > BLOCKSIZE/2
        fft_amp  = yfft(sr_block)[:len(X_f)]
        # block_data = {'iserr': iserr}
        data.append({f'{name}_{j}':amp for j,amp in enumerate(fft_amp)})

    return pd.DataFrame(data)

def getError(blocksize, series, name):
    #number of blocks
    nblocks = int(len(series)/blocksize)

    data = []

    for i in range(nblocks):
        sr_block = series[i*blocksize:(i+1)*blocksize]

        iserr    = sr_block.sum() > blocksize/2
        # block_data = {'iserr': iserr}
        data.append({f'{name}':iserr})

    return pd.DataFrame(data)

In [None]:
df_features = pd.DataFrame()

blocks = 750

tp3 = fourier(blocks,1000, df['TP3'], 'TP3')
oil = fourier(blocks, 1000, df['Oil_temperature'], 'Oil')
res = fourier(blocks, 1000, df['Reservoirs'], 'Res')
errors = getError(blocks, df['Label'], 'Error')
motCurr = fourier(blocks, 1000, df['Motor_current'], 'MotCurr')

df_features = pd.concat([df_features, tp3, oil,errors, res, motCurr], axis=1)
df_features

In [None]:
#decision tree
X = df_features.drop('Error', axis=1)
y = df_features['Error']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

clf = DecisionTreeClassifier()
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

cm = confusion_matrix(y_test, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=clf.classes_)
disp.plot()

In [None]:
#random forest
clf = RandomForestClassifier()
clf.fit(X_train, y_train)
cm = confusion_matrix(y_test, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=clf.classes_)
disp.plot()

In [None]:
x= list(df_features.columns.values)
x.remove('Error')


np.nan == np.nan

In [None]:
featureColls = list(df_features.columns.values)
featureColls.remove('Error')

importances = clf.feature_importances_
forest = clf
forest_importances = pd.Series(importances, index=featureColls)
std = np.std([tree.feature_importances_ for tree in forest.estimators_], axis=0)
fig, ax = plt.subplots()
forest_importances.plot.bar(yerr=std, ax=ax)
ax.set_title("Feature importances using MDI")
ax.set_ylabel("Mean decrease in impurity")
fig.set_figheight(15)
fig.set_figwidth(30)
fig.tight_layout()
fig.show()

In [None]:
1/xfft(1024, 1)[73]

In [None]:
px.bar(x=featureColls, y=forest_importances)

In [None]:
oil['hasCut'] = oil['timestamp'].diff(1).dt.total_seconds() != 1

In [None]:
oil

In [None]:
oil_df = pd.DataFrame(data)

f_cols = [c for c in oil_df.columns if c != "iserr"]
scaler = MinMaxScaler()
# oil_df[f_cols] = scaler.fit_transform(oil_df[f_cols])
oil_df

In [None]:
fig = px.imshow(oil_df[[c for c in oil_df.columns if not c in (["iserr"])]].T, height=700)
fig.add_scatter(y=oil_df["iserr"])
fig
#px.bar (x=xfft(BLOCKSIZE, 1), y=data[0])

In [None]:
# #train test
# #df_train, df_test = train_test_split(oil_df, test_size=0.2,shuffle=False, stratify=oil_df['iserr'])
# df_train = oil_df.iloc[2800:10400] 
# df_train.loc[df_train.index < 6500, "iserr"] = False

# df_test = pd.concat([oil_df.iloc[:2800], oil_df.iloc[10400:]])

# y_train = df_train['iserr']
# y_test = df_test['iserr']


# X_train = df_train.drop(columns=['iserr'])
# X_test = df_test.drop(columns=['iserr'])

# #model
# model = DecisionTreeClassifier()
# model.fit(X_train, y_train)

# y_pred = model.predict(X_test)
# y_true = y_test

# #confusion matrix
# cm = confusion_matrix(y_true, y_pred)
# disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=model.classes_)
# disp.plot()

In [None]:
#train test
df_train, df_test = train_test_split(oil_df, test_size=0.2,shuffle=True, stratify=oil_df['iserr'])
y_train = df_train['iserr']
y_test = df_test['iserr']
#y_train = oil_df['iserr']


X_train = df_train.drop(columns=['iserr'])
#X_train = oil_df.drop(columns=['iserr'])
X_test = df_test.drop(columns=['iserr'])

#model
model = DecisionTreeClassifier()
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)
y_true = y_test

#confusion matrix
cm = confusion_matrix(y_true, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=model.classes_)
disp.plot()

In [None]:
#transform false to 0 and true to 1
oil_df['iserr'] = oil_df['iserr'].astype(int)
y = y_test.astype(int)

y.plot()