In [1]:
import seaborn as sns
import pandas as pd
import numpy as np
import os
import datetime
from dataclasses import dataclass, field

from sklearn.linear_model import RidgeClassifierCV
from sklearn.pipeline import make_pipeline

from sktime.transformations.panel.rocket import Rocket
from sklearn.model_selection import GridSearchCV
from sktime.classification.deep_learning.resnet import ResNetClassifier
from sktime.forecasting.model_selection import temporal_train_test_split

from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score

from sktime import datatypes



In [2]:
def gen_labellist(columns):
    global df
    label_list =[]
    for value, group_df in df.groupby(level='id'):
        inner = []
        for col in columns.split(','):
            id_series = group_df[col]
            first_element = id_series.iloc[0]
            inner.append(first_element)
        label_list.append(inner)
    label_list = np.array(label_list)
    for col in columns.split(','):
        df = df.drop(col, axis=1)
    return label_list

def gen_labellist_test(columns):
    global test
    label_list =[]
    for value, group_df in test.groupby(level='id'):
        inner = []
        for col in columns.split(','):
            id_series = group_df[col]
            first_element = id_series.iloc[0]
            inner.append(first_element)
        label_list.append(inner)
    label_list = np.array(label_list)
    for col in columns.split(','):
        test = test.drop(col, axis=1)
    return label_list

def dataframe_rocket(df):
    nested_dict = {}
    for time, group_df in df.groupby(level='t_sec'):
        enzyme_dict = {}
        for column_name in df.columns:
            xs = df.xs(time, level='t_sec')[column_name]
            enzyme_dict[column_name] = xs
        nested_dict[time] = enzyme_dict
    dataframe = pd.DataFrame(nested_dict)
    dataframe = dataframe.transpose()
    return dataframe

def gen_3d_nparray(df):
    d3_array = []
    for value, group_df in df.groupby(level='id'):
        inner_array = []
        for col in df.columns:
            inner = np.array(group_df[col])
            inner_array.append(inner)
        inner_nparray = np.array(inner_array)
        d3_array.append(inner_nparray)
    d3_nparray = np.array(d3_array)
    return d3_nparray

In [3]:
start_time = datetime.datetime.now()

In [4]:
name = 'dataset_mapk361_n100_2'
df = pd.read_csv('dataset_mapk361_n100_train.csv', sep=',',
                 index_col=['id', 't_sec'])
status_array = gen_labellist('desc')
df = df.drop('status', axis=1)
df = df.drop('inhibition', axis=1)
df = df.drop('inh_strength', axis=1)
new_level2 = pd.RangeIndex(start=0, stop=len(df), step=1)
df.index = df.index.set_levels(new_level2, level='t_sec')

  df = pd.read_csv('dataset_mapk361_n100_train.csv', sep=',',


In [5]:
test = pd.read_csv('dataset_mapk361_n100_test.csv', sep=',',
                 index_col=['id', 't_sec'])
test.index = test.index.set_levels(new_level2, level='t_sec')
status_array_test = gen_labellist_test('desc')
test = test.drop('status', axis=1)
test = test.drop('inhibition', axis=1)
test = test.drop('inh_strength', axis=1)

  test = pd.read_csv('dataset_mapk361_n100_test.csv', sep=',',


In [6]:
test.shape

(1047261, 13)

In [7]:
df.shape

(1047261, 12)

In [8]:
data = (df, status_array)

In [9]:
# X_train, X_test, y_train, y_test = temporal_train_test_split(X, y, test_size=0.3)
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train = df
y_train =np.array([status_array[i][0] for i, val in enumerate(status_array)])
X_test = test
y_test = np.array([status_array_test[i][0] for i, val in enumerate(status_array_test)])

In [10]:
classifier = ResNetClassifier(n_epochs=300, verbose=True )

2023-08-25 12:15:05.235997: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [11]:
weight = {'g': 1000, 'k': 1}

In [None]:
classifier.fit(X_train, y_train)
classifier.score(X_test,y_test)

2023-08-25 12:15:16.366402: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 361, 12)]    0           []                               
                                                                                                  
 conv1d (Conv1D)                (None, 361, 64)      6208        ['input_1[0][0]']                
                                                                                                  
 batch_normalization (BatchNorm  (None, 361, 64)     256         ['conv1d[0][0]']                 
 alization)                                                                                       
                                                                                                  
 activation (Activation)        (None, 361, 64)      0           ['batch_normalization[0][0]']

In [None]:
y_pred = classifier.predict(X_test)

In [None]:
print(y_pred)

In [None]:
print(y_test)

In [None]:
end_time = datetime.datetime.now()

In [None]:
cm = confusion_matrix(y_test, y_pred)

In [None]:
cm

In [None]:
classes = classifier.classes_
plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
plt.title("Confusion Matrix")
plt.colorbar()
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes)
plt.yticks(tick_marks, classes)
plt.xticks(rotation='vertical')

plt.xlabel('Vorhergesagte Klasse')
plt.ylabel('Tatsächliche Klasse')
plt.show()

In [None]:
n = 0
m = 0
for i in y_test:
    if i == 'k':
        n += 1
    else:
        m += 1
print(n)
print(m)

In [None]:
precision_score(y_test, y_pred, average=None)

In [None]:
solutiondf = pd.Series()
solutiondf['name'] = name
for i , value in enumerate(precision_score(y_test, y_pred, average=None)):
    print(f'{classifier.classes_[i]} = {np.round(value, 3)}')
    solutiondf[classifier.classes_[i]] = np.round(value, 3)
solutiondf['acc'] = np.round(classifier.score(X_test, y_test), 3)
runtime = end_time - start_time
minutes_difference = runtime.total_seconds() / 60
solutiondf['time'] = np.round(minutes_difference, 3)
solutiondf['cm'] = cm
solutiondf['type'] = 'dtw_d'
solutiondf['y'] = y_pred
solutiondf['history'] = classifier.history.history
solutiondf = pd.DataFrame([solutiondf])
output_folder = os.path.join('solutions', 'ResNet')
if not os.path.exists(output_folder):
    os.makedirs(output_folder)
path = os.path.join(output_folder, f'{name}_desc.csv')
solutiondf.to_csv(path, mode='w', header=True)

In [None]:
print(solutiondf)

In [None]:
hist = classifier.history

In [None]:
a = classifier.model_

In [None]:
a.__dict__

In [None]:
a._metrics_lock

In [None]:
classifier.history.__dict__

In [None]:
plotdat = classifier.history.history

In [None]:
sns.lineplot(x= hist.epoch, y='loss', data=plotdat)
sns.lineplot(x=hist.epoch, y='accuracy', data=plotdat)

In [None]:
a['epoch']

In [None]:
n = 0
id_list = []
for i, value in enumerate(y_pred):
    if y_test[i] != y_pred[i]:
        if y_pred[i] == '-':
            n += 1
            id_list.append(i)
            print(n)
            print(i)
            print(y_test[i])
            print(y_pred[i])
            print()
print(id_list)

In [None]:
print(y_test)

In [None]:
print(y_pred)