In [None]:
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn import preprocessing
from sklearn.model_selection import train_test_split, cross_validate
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder
from sklearn.impute import SimpleImputer, KNNImputer
from permetrics.regression import Metrics

from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.dummy import DummyRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.svm import LinearSVR
from sklearn.linear_model import Ridge, LinearRegression, Lasso, ElasticNet

from tensorflow import keras
from keras.wrappers.scikit_learn import KerasRegressor
from keras.models import Sequential
from keras.layers import Dense

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.inspection import permutation_importance

from sklearn.pipeline import Pipeline

import statsmodels.api as sm
from dython import nominal

**Add column names**

In [None]:
opkort = pd.read_csv(Path('../20210324/opkort.csv'), header=None, sep=';')
opkort_gammal = pd.read_csv(Path('../inte_så_kul/res/gamla/op_kort_filt.csv'), sep=';')
opkort.columns = opkort_gammal.columns
opkort.to_csv(Path('../20210324/with_name/op_kort_namn.csv'))

In [None]:
ingrepp = pd.read_csv(Path('../20210324/op_ingrepp.csv'), header=None, sep=';')
ingrepp_gammal = pd.read_csv(Path('../inte_så_kul/res/gamla/ingrepp_ingrepp.csv'), sep=';')
ingrepp.columns = ingrepp_gammal.columns
ingrepp.to_csv(Path('../20210324/with_name/op_ingrepp_namn.csv'))

In [None]:
diagnos = pd.read_csv(Path('../20210324/op_diagnos.csv'), header=None, sep=';')
diagnos_gammal = pd.read_csv(Path('../inte_så_kul/res/gamla/ingrepp_diagnos.csv'), sep=';')
diagnos.columns = diagnos_gammal.columns
diagnos.to_csv(Path('../20210324/with_name/op_diagnos_namn.csv'))

**LOAD DATA AND COMBINE INTO 1 DF**

**Behandling**
* Der_Behandling_PK
* Der_Opkort_FK -> op_kort
* Der_Anestesikort_FK -> Anestesikort (Har inte fått än)
* Der_Vårdform_FK
* Der_Prioritet_FK
* BehandlingsStatus - Kanske behöver filtreras bort
* ASAklass
* Der_PeropUtskriventill_FK - Vad fan menas??
* ForberedelsetidStartTidpunkt
* ForberedelsetidSlutTidpunkt
* PatientÅlderVidOp - står i dagar
* Veckodag? kanske
* Starttimme? kanske
* BMI
* Kroppslängd - del av BMI
* Kroppsvikt - del av BMI

**Op-kort** - Hur kopplas behandling till OP-kort (Tror inte att vi behöver någonting härifrån)
* Der_Opkort_PK -> behandling
* OpkortText - har en kod i början
* OpkortUndergruppKod
* OpkortHubudgruppKod
* TidFöreMin - Är det kopplat till en behandling?

**Op-ingrepp**
* Der_Behandling_PK -> behandling
* Ingreppskod - primär, kanske sekundär (se Primär_Sekundär)
* Primär_Sekundär
* Sida

**Op-diagnos**
* Der_Behandling_PK -> behandling
* Diagnoskod - primär, kanske sekundär (se Primär_Sekundär)
* Primär_Sekundär

In [None]:
# Lead behandling and select relevant columns
behandling = pd.read_csv(Path('../20210324/with_name/behandling_optillfälle.csv'), sep=';')
behandling = behandling[['Der_Behandling_PK',
                         'Der_Opkort_FK',
                         'Der_Anestesikort_FK',
                         'Der_Vårdform_FK',
                         'Der_Prioritet_FK',
                         'BehandlingsStatus',
                         'ASAklass',
                         'ForberedelsetidStartTidpunkt',
                         'ForberedelsetidSlutTidpunkt',
                         'PatientÅlderVidOp',
                         'Veckodag',
                         'Starttimme',
                         'BMI',
                         'Kroppslängd',
                         'Kroppsvikt',
                        ]]
behandling = behandling[behandling['BehandlingsStatus'] == 'Opererad'] # Remove 'abrutna' operationer as they do not contain all relevant data
print("Behandling length: {}".format(len(behandling)))

# Load ingrepp and select relevant columns
ingrepp = pd.read_csv(Path('../20210324/with_name/op_ingrepp_namn.csv'))
ingrepp = ingrepp[['Der_Behandling_PK',
                   'Ingreppkod',
                   'Primär_Sekundär',
                   'Sida',
                  ]]
ingrepp = ingrepp[ingrepp['Primär_Sekundär'] == 'Primär'] # Might want to include this if we make a more complicated model
print("Ingrepp length: {}".format(len(ingrepp)))

# Load diagnos and select relevant columns
diagnos = pd.read_csv(Path('../20210324/with_name/op_diagnos_namn.csv'))
diagnos = diagnos[['Der_Behandling_PK',
                   'Diagnoskod',
                   'Primär_Sekundär',
                  ]]
diagnos = diagnos[diagnos['Primär_Sekundär'] == 'Primär'] # Might want to include this if we make a more complicated model
print("Diagnos length: {}".format(len(diagnos)))

# Combine the data frames
combined_df = behandling.merge(diagnos, on='Der_Behandling_PK').merge(ingrepp, on='Der_Behandling_PK')
print("Combined length: {}".format(len(combined_df)))

**CALCULATE AND ADD PREOPTIME TO DATAFRAME**

In [None]:
# Bad algoritm for checking min and max time of förbereds
start = combined_df["ForberedelsetidStartTidpunkt"].dropna()
slut = combined_df["ForberedelsetidSlutTidpunkt"].dropna()

start_times = []
for time in start:
    minn = int(time[-9:-7])
    hour = int(time[-12:-10])
    minutes = hour*60 + minn
    start_times.append(minutes)
    
stop_times = []
for time in slut:
    minn = int(time[-9:-7])
    hour = int(time[-12:-10])
    minutes = hour*60 + minn
    stop_times.append(minutes)

times = []
for i in range(len(start_times)):
    #print(stop_times[i], start_times[i], stop_times[i] - start_times[i])
    times.append(stop_times[i] - start_times[i])
    
print(max(times))
print(min(times))

# Add total time to dataframe
combined_df['time'] = times

**ASAKLASS**

In [None]:
asa_mean = []
asa_std = []
for i in range(6):
    asa = combined_df[combined_df['ASAklass'] == i+1]
    time = asa['time']
    asa_mean.append(time.mean())
    asa_std.append(time.std())

ax = sns.boxplot(x='ASAklass', y='time', data=combined_df, color='#99c2a2')
ax = sns.swarmplot(x='ASAklass', y='time', data=combined_df, color='#99c2a2', alpha=0.5)
plt.xlabel('ASAklass')
plt.ylabel('Preoptid')
plt.title("ASAKLASS - Preoptid")

Nr of data points:
* ASA 1 - 1935
* ASA 2 - 1731
* ASA 3 - 267
* ASA 4 - 3

**BMI, KROPPSVIKT, KROPPSLÄNGD**

In [None]:
combined_df = combined_df[combined_df['BMI'] < 200]

sns.jointplot(x='Kroppslängd', y='time', data=combined_df, kind='hex')

sns.jointplot(x='Kroppsvikt', y='time', data=combined_df, kind='hex')

sns.jointplot(x='BMI', y='time', data=combined_df, kind='hex')


**ÅLDER**

In [None]:
sns.jointplot(x='PatientÅlderVidOp', y='time', data=combined_df, kind='hex')
plt.title('Ålder')
plt.xlabel('Ålder (dagar)')
plt.ylabel('Preoptid (min)')

**VÅRDFORM**

In [None]:
form_mean = []
form_std = []
for i in range(2):
    form = combined_df[combined_df['Der_Vårdform_FK'] == i+1]
    time = form['time']
    form_mean.append(time.mean())
    form_std.append(time.std())

sns.boxplot(x='Der_Vårdform_FK', y='time', data=combined_df, color='#99c2a2')
sns.swarmplot(x='Der_Vårdform_FK', y='time', data=combined_df, color='#99c2a2', alpha=0.5)
plt.xlabel('ASAklass')
plt.ylabel('Preoptid')
plt.title("ASAKLASS - Preoptid")

In [None]:
plt.rcParams["figure.figsize"] = (20,5)

In [None]:
diagnoser = {}
for diagnost in combined_df['Diagnoskod']:
    diagnos = diagnost[0]
    if diagnos not in diagnoser.keys():
        diagnoser[diagnos] = 1
    else:
        value = diagnoser[diagnos]
        diagnoser[diagnos] = value +1
print(diagnoser)

In [None]:
true_diagnoser = {}
for diagnos in diagnoser:
    if diagnos[0] == 'M' or diagnos[0] == 'G' or diagnos[0] == 'Z' or diagnos[0] == 'S' or diagnos[0] == 'T':
        true_diagnoser[diagnos] = diagnoser[diagnos]

combined_df = combined_df[combined_df['Diagnoskod'].isin(true_diagnoser)]
#plt.bar(true_diagnoser.keys(), true_diagnoser.values())
print("Nr of diagnoser: {}".format(sum(true_diagnoser.values())))
combined_df.head()

In [None]:
ingrepp_plural = {}
ingreppsgrupp = []
for index, row in combined_df.iterrows():
    ingrepp = row['Ingreppkod']
    ingrepp_group = ingrepp[0:2]
    ingreppsgrupp.append(ingrepp_group)
    if ingrepp_group not in ingrepp_plural.keys():
        ingrepp_plural[ingrepp_group] = 1
    else:
        value = ingrepp_plural[ingrepp_group]
        ingrepp_plural[ingrepp_group] = value +1
print(ingrepp_plural)
combined_df['IngreppsGrupp'] = ingreppsgrupp
combined_df.head()

In [None]:
plt.rcParams.update({'font.size': 22})
fig, ax = plt.subplots(figsize=(30, 10))
ax.bar(ingrepp_plural.keys(), ingrepp_plural.values())
fig.savefig('ingreppsgrupp_bar.PNG')

In [None]:
print(ingrepp_plural['NC'])

In [None]:
plt.bar(diagnoser.keys(), diagnoser.values())

In [None]:
features_df = combined_df.drop(["Der_Behandling_PK", 
                               "Der_Opkort_FK",
                               "Der_Anestesikort_FK",
                               "BehandlingsStatus",
                               "ForberedelsetidStartTidpunkt",
                               "ForberedelsetidSlutTidpunkt",
                               "Primär_Sekundär_x",
                               "Primär_Sekundär_y",
                            ], axis='columns')
features_df.head()

In [None]:
ingreppsgrupper = {}
for ingreppsgrupp, ingreppsgrupp_df in features_df.groupby('IngreppsGrupp'):
    ingreppsgrupper[ingreppsgrupp] = ingreppsgrupp_df

In [None]:
grupp_mean = []
grupp_std = []
features_df = features_df[features_df['IngreppsGrupp'].isin(['TN', 'NC', 'NH', 'NB', 'NG', 'NF', 'ND'])]
for ingreppsgrupp in ingreppsgrupper.keys():
    df = features_df[features_df['IngreppsGrupp'] == ingreppsgrupp]
    grupp_mean.append(df['time'].mean())
    grupp_std.append(df['time'].std())
features_df.head()

In [None]:
sns.boxplot(x='IngreppsGrupp', y='time', data=features_df, color='#99c2a2')

Preoptime seam to differ quite alot in different ingrepps groups

In [None]:
asa_mean = []
asa_std = []
for i in range(6):
    asa = combined_df[combined_df['ASAklass'] == i+1]
    time = asa['time']
    asa_mean.append(time.mean())
    asa_std.append(time.std())

sns.boxplot(x='ASAklass', y='time', data=features_df, color='#99c2a2')
sns.swarmplot(x='ASAklass', y='time', data=features_df, hue='IngreppsGrupp')
plt.xlabel('ASAklass')
plt.ylabel('Preoptid')
plt.title("ASAKLASS - Preoptid")

In [None]:
sns.jointplot(x='PatientÅlderVidOp', y='time', data=features_df, hue='IngreppsGrupp', kind='scatter')

In [None]:
sns.lineplot(x='Starttimme', y='time', data=features_df, color='#99c2a2')