### Linear regression

In [None]:
import pandas as pd
import utils
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
import os
import glob

In [None]:
# config

BATCH_SIZE = 512  
LEARNING_RATE = 2e-4  # 1e-5 was too small for 'LVtot_kalibriert' and 'LVtot' 
NUM_WORKERS = 10
NUM_EPOCHS = 500
LR_DECAY_AFTER_EPOCH = 300  
GENERATION_AFTER_EPOCH = NUM_EPOCHS # number of epochs after which the model generates a sample
SIG_A = "AoP"           # Drucksignal Hauptschlagader = Aortendruck
SIG_B = "VADcurrent"    # VAD Strom [A] – Pumpemstrom in Ampere
SIG_C = "VadQ"          # Fluss durch VAD (VAD = Ventrikular assistance device = Pumpe) = Pumpenfluss
SIG_D = "LVP"           # Ventrikeldruck links = Drucksignal der linken Herzkammer
TARGET = "LVtot_kalibriert"
feature_names = [SIG_A, SIG_C, SIG_D]
CHANNELS = len(feature_names)
WINDOW = 256
target = TARGET

In [None]:
path = "/home/johann/Desktop/Uni/Masterarbeit/Cycle_GAN/csv_export_files_alle_Daten/csv_export_files" 
csv_files = glob.glob(os.path.join(path, "*.csv"))
  
df = pd.DataFrame()
scaler = StandardScaler() 
# loop over the list of csv files
for f in csv_files:
      
    # read the csv file
    df_temp = pd.read_csv(f, sep=";")
    df_temp = utils.drop_cols(df_temp)
    df_temp = df_temp.dropna()
    df_temp = utils.remove_strings(df_temp)
    df_temp = utils.subsample(df_temp, 10)
    df_temp = utils.normalize(df_temp, scaler, phase1 = True)  
      
    # print the content
    df = pd.concat([df, df_temp], axis=0)
    

# print(df.shape)

In [None]:
df = df.reset_index(drop=True)

for index, row in df.iterrows():
    if row['Phasenzuordnung'] == 1:
        df.at[index, 'intervention'] = 0
    elif row['intervention'] == 10:
        if row['contractility'] == 1.0:
            df.at[index, 'intervention'] = 0      # contractility = 1.0 - could be ignored? - phase 0?
        if row['contractility'] == 3.0:
            df.at[index, 'intervention'] = 5      # contractility = 3.0                                        
        if row['contractility'] == 4.0:
            df.at[index, 'intervention'] = 6      # contractility = 4.0

# get unique intervention
# print(df['intervention'].unique())

In [None]:
df_IPA = df[['intervention', 'Phasenzuordnung', 'animal']]

# split df into separate dataframes for each animal
scaler = StandardScaler()
df_temp = pd.DataFrame()

#utils.visualize(df, [SIG_A, SIG_B, SIG_C, SIG_D, 'intervention', 'animal'], 72094)

for animal in df['animal'].unique():
    # split df into separate dataframes for each animal
    df_animal = df.loc[df['animal'] == animal]
    df_animal = utils.normalize(df_animal, scaler)
    # append df_animal to df_temp
    df_temp = pd.concat([df_temp, df_animal], axis=0, ignore_index=True)

print(df_temp.shape)
df = df_temp
df = df.drop(columns=['intervention', 'Phasenzuordnung', 'animal'])
df = df.join(df_IPA)
# utils.visualize(df, [SIG_A, SIG_B, SIG_C, SIG_D, 'intervention', 'animal'], 5000)

In [None]:
# select animals 3,4,8,11,17 as test animals
test_animals = [3,4,8,11,17] 

print('\nTest animal(s):', test_animals)

all_animals = df['animal'].unique()
# remove test animals from train animals
train_animals =  [x for x in all_animals if x not in test_animals]

# test data
df_test = df[df['animal'].isin(test_animals)]

# change the length of the test data to a multiple of the Window size
df_test = df_test.iloc[:len(df_test) - (len(df_test) % WINDOW)]

# train dataframe with only animals from train_animals
df_train = df[df['animal'].isin(train_animals)]
print('\nDifferent animal IDs after removing those that are in the test dataset: ',len(df_train['animal'].unique()))


print('\nTrain data shape:', df_train.shape)
print('\nTest data shape:', df_test.shape)

In [None]:
df_LR = df_train[[SIG_A, SIG_B, SIG_C, SIG_D, 'intervention', 'Phasenzuordnung']]
target = df_train[[TARGET]]

# convert to numpy array
X_train = df_LR.to_numpy()
y_train = target.to_numpy()

reg = LinearRegression().fit(X_train, y_train)
print(reg.score(X_train, y_train))
print(reg.coef_)
print(reg.intercept_)

In [None]:
df_LR_test = df_test[[SIG_A, SIG_B, SIG_C, SIG_D, 'intervention', 'Phasenzuordnung']]
target_test = df_test[[TARGET]]

# convert to numpy array
X_test = df_LR_test.to_numpy()
y_test = target_test.to_numpy()


#calculate the mean squared error
from sklearn.metrics import mean_squared_error
print('MSE: ', mean_squared_error(y_test, reg.predict(X_test)))

# calculate l1 loss
from sklearn.metrics import mean_absolute_error
print('L1: ', mean_absolute_error(y_test, reg.predict(X_test)))


# plot the first 1000 results 
plt.rcParams['figure.figsize'] = [15, 7]
plt.plot(y_test[:1000], label='true')
plt.plot(reg.predict(X_test[:1000]), label='predicted')
plt.legend()