In [20]:
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from catboost import CatBoostRegressor
import os 

In [21]:
if tf.config.list_physical_devices('GPU'):
  print("GPU is available")
  print("CuDNN is enabled: True")
else:
  print("GPU is not available")

GPU is available
CuDNN is enabled: True


In [22]:
cwd = os.getcwd()
df = pd.read_csv(cwd + "/main.csv")
df = df.drop('Unnamed: 0', axis=1)


In [23]:
df.head()

Unnamed: 0,Millis,ECG,IR,RED,patient,datetime,rate_min,rate_max,rate_std,rate_avg,rate_25%,rate_50%,rate_75%,result
0,892,226,28556,28966,1,2023-10-18 09:38:39,0,0,0.0,0.0,0.0,0.0,0.0,1
1,893,226,28554,28959,1,2023-10-18 09:38:39,0,0,0.0,0.0,0.0,0.0,0.0,1
2,893,226,28556,28960,1,2023-10-18 09:38:39,0,0,0.0,0.0,0.0,0.0,0.0,1
3,893,226,28552,28959,1,2023-10-18 09:38:39,0,0,0.0,0.0,0.0,0.0,0.0,1
4,894,226,28551,28954,1,2023-10-18 09:38:39,0,0,0.0,0.0,0.0,0.0,0.0,1


In [24]:
df.Millis

0         892
1         893
2         893
3         893
4         894
         ... 
572512    157
572513    168
572514    170
572515    172
572516    174
Name: Millis, Length: 572517, dtype: int64

In [5]:
df = df.drop('datetime', axis=1)


In [6]:
df.columns

Index(['Millis', 'ECG', 'IR', 'RED', 'patient', 'rate_min', 'rate_max',
       'rate_std', 'rate_avg', 'rate_25%', 'rate_50%', 'rate_75%', 'result'],
      dtype='object')

In [7]:
features = ['Millis', 'ECG', 'IR', 'RED', 'rate_min', 'rate_max', 'rate_std', 'rate_avg', 'rate_25%', 'rate_50%', 'rate_75%']
scaler = StandardScaler()
df[features] = scaler.fit_transform(df[features])
df

Unnamed: 0,Millis,ECG,IR,RED,patient,rate_min,rate_max,rate_std,rate_avg,rate_25%,rate_50%,rate_75%,result
0,1.361583,0.610319,0.281004,-0.120392,1,0.353012,-0.346990,-0.500606,0.000076,0.201017,0.000779,-0.197973,1
1,1.365051,0.610319,0.280482,-0.124166,1,0.353012,-0.346990,-0.500606,0.000076,0.201017,0.000779,-0.197973,1
2,1.365051,0.610319,0.281004,-0.123627,1,0.353012,-0.346990,-0.500606,0.000076,0.201017,0.000779,-0.197973,1
3,1.365051,0.610319,0.279960,-0.124166,1,0.353012,-0.346990,-0.500606,0.000076,0.201017,0.000779,-0.197973,1
4,1.368520,0.610319,0.279699,-0.126862,1,0.353012,-0.346990,-0.500606,0.000076,0.201017,0.000779,-0.197973,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
572512,-1.187720,-0.750920,-0.320210,0.353494,41,-0.530160,0.166245,0.546982,0.039378,-0.133998,0.204329,0.244823,1
572513,-1.149567,-0.750920,-0.319688,0.352416,41,-0.532463,0.860888,1.244109,0.375865,-0.249752,0.461311,0.970333,1
572514,-1.142630,-0.750920,-0.320210,0.352955,41,0.699602,0.712157,-0.162914,0.808926,0.800002,0.741192,0.608786,1
572515,-1.135693,-0.750920,-0.320732,0.352416,41,-1.178434,-0.028683,0.685746,-0.841812,-1.031249,-0.756809,-0.551968,1


In [8]:
df

Unnamed: 0,Millis,ECG,IR,RED,patient,rate_min,rate_max,rate_std,rate_avg,rate_25%,rate_50%,rate_75%,result
0,1.361583,0.610319,0.281004,-0.120392,1,0.353012,-0.346990,-0.500606,0.000076,0.201017,0.000779,-0.197973,1
1,1.365051,0.610319,0.280482,-0.124166,1,0.353012,-0.346990,-0.500606,0.000076,0.201017,0.000779,-0.197973,1
2,1.365051,0.610319,0.281004,-0.123627,1,0.353012,-0.346990,-0.500606,0.000076,0.201017,0.000779,-0.197973,1
3,1.365051,0.610319,0.279960,-0.124166,1,0.353012,-0.346990,-0.500606,0.000076,0.201017,0.000779,-0.197973,1
4,1.368520,0.610319,0.279699,-0.126862,1,0.353012,-0.346990,-0.500606,0.000076,0.201017,0.000779,-0.197973,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
572512,-1.187720,-0.750920,-0.320210,0.353494,41,-0.530160,0.166245,0.546982,0.039378,-0.133998,0.204329,0.244823,1
572513,-1.149567,-0.750920,-0.319688,0.352416,41,-0.532463,0.860888,1.244109,0.375865,-0.249752,0.461311,0.970333,1
572514,-1.142630,-0.750920,-0.320210,0.352955,41,0.699602,0.712157,-0.162914,0.808926,0.800002,0.741192,0.608786,1
572515,-1.135693,-0.750920,-0.320732,0.352416,41,-1.178434,-0.028683,0.685746,-0.841812,-1.031249,-0.756809,-0.551968,1


In [9]:
X = df.drop('result', axis=1)
y = df.result

In [10]:
model = CatBoostRegressor(iterations=20000,
                          task_type="GPU",
                          learning_rate=0.5,
                          depth=2)

In [None]:
new_patient_order = [33, 23, 67, 15, 8]

new_data = pd.DataFrame(columns=df.columns)

for patient in new_patient_order:
    patient_data = df[df['patient'] == patient]
    new_data = pd.concat([new_data, patient_data])


new_data = new_data.reset_index(drop=True)

print(new_data)

In [19]:
from sklearn.model_selection import KFold
import numpy as np

patients = df['patient'].unique()
np.random.shuffle(patients)
kf = KFold(n_splits=len(patients) // 8)

for train_index, test_index in kf.split(patients):
    train_patients = patients[train_index]
    test_patients = patients[test_index]

    train_data = df[df['patient'].isin(train_patients)]
    test_data = df[df['patient'].isin(test_patients)]

    x_train_fold = train_data.drop('result', axis=1)
    y_train_fold = train_data.result
    test_X = test_data.drop('result', axis=1)
    test_y = test_data.result
    model.fit(x_train_fold, y_train_fold, verbose=0)
    preds = model.predict(test_X)
    threshold = 0.5

    binary_predictions = (preds > threshold).astype(int)
    print("-----------------------------------------------")
    print(classification_report(test_y, binary_predictions))
    test_X['preds'] = preds
    test_X['real'] = test_y
    prediction_cal = test_X[['patient', 'real', 'preds']]
    prediction_cal.groupby('patient')
    summary = prediction_cal.groupby('patient').describe()
    print(summary)



-----------------------------------------------
              precision    recall  f1-score   support

           0       0.11      0.06      0.08     46797
           1       0.54      0.68      0.60     74855

    accuracy                           0.44    121652
   macro avg       0.32      0.37      0.34    121652
weighted avg       0.37      0.44      0.40    121652

            real                                       preds            \
           count mean  std  min  25%  50%  75%  max    count      mean   
patient                                                                  
6        13863.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  13863.0  0.804789   
10       19624.0  1.0  0.0  1.0  1.0  1.0  1.0  1.0  19624.0  0.554146   
18       14564.0  1.0  0.0  1.0  1.0  1.0  1.0  1.0  14564.0  1.034855   
26       17258.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  17258.0  0.858909   
30       17083.0  1.0  0.0  1.0  1.0  1.0  1.0  1.0  17083.0  0.970948   
31       14382.0  1.0  0.0  1.0  

In [27]:
for train_index, test_index in skf.split(X, y):
      train = df[df['patient'] <= 34]
      test = df[df['patient'] > 34]
      x_train_fold = train.drop('result', axis=1)
      y_train_fold = train.result
      test_X = test.drop('result', axis=1)
      test_y = test.result
      model.fit(x_train_fold, y_train_fold)
      preds = model.predict(test_X)
      threshold = 0.5

      binary_predictions = (preds > threshold).astype(int)
      print("-----------------------------------------------")
      print(classification_report(test_y, binary_predictions))
      )


NameError: name 'X' is not defined

In [22]:
from sklearn.model_selection import cross_validate
cv_results = cross_validate(model, df.drop('result', axis=1), df.result, cv=3)

sorted(cv_results.keys())


0:	learn: 0.3099443	total: 70ms	remaining: 23m 20s
1:	learn: 0.2341730	total: 73.4ms	remaining: 12m 13s
2:	learn: 0.1791043	total: 76.5ms	remaining: 8m 29s
3:	learn: 0.1389334	total: 79.7ms	remaining: 6m 38s
4:	learn: 0.1151741	total: 82.9ms	remaining: 5m 31s
5:	learn: 0.0879550	total: 85.8ms	remaining: 4m 45s
6:	learn: 0.0773221	total: 88.8ms	remaining: 4m 13s
7:	learn: 0.0679458	total: 91.9ms	remaining: 3m 49s
8:	learn: 0.0591331	total: 94.9ms	remaining: 3m 30s
9:	learn: 0.0526296	total: 97.9ms	remaining: 3m 15s
10:	learn: 0.0470214	total: 101ms	remaining: 3m 3s
11:	learn: 0.0432410	total: 104ms	remaining: 2m 53s
12:	learn: 0.0403681	total: 107ms	remaining: 2m 45s
13:	learn: 0.0365346	total: 111ms	remaining: 2m 38s
14:	learn: 0.0342000	total: 114ms	remaining: 2m 31s
15:	learn: 0.0320203	total: 117ms	remaining: 2m 26s
16:	learn: 0.0301131	total: 120ms	remaining: 2m 21s
17:	learn: 0.0287106	total: 123ms	remaining: 2m 17s
18:	learn: 0.0269158	total: 127ms	remaining: 2m 13s
19:	learn: 0.

['fit_time', 'score_time', 'test_score']

In [23]:

cv_results['test_score']

array([-0.37517016, -0.16156714, -0.48952548])

In [25]:
scoring = "accuracy"
print(f"{scoring}: %0.2f (+/- %0.2f)" % (cv_results.mean(), cv_results.std() * 2))

AttributeError: 'dict' object has no attribute 'mean'

In [8]:

sequence_length = 16
sequences = []
labels = []

for i in range(len(scaled_features) - sequence_length):
    sequences.append(scaled_features[i:i+sequence_length])
    labels.append(df['patient'][i + sequence_length])

len(labels)
import numpy as np
sequences = np.array(sequences)
labels = np.array(labels)


In [None]:
df

In [10]:
scaled_features.shape

(572517, 10)

In [None]:

X_train, X_test, y_train, y_test = train_test_split(sequences, labels, test_size=0.2, random_state=42)


In [28]:
X_train.shape

(458000, 16, 10)

In [29]:
X_test.shape

(114501, 16, 10)

In [30]:
sequence_length

16

In [32]:
import catboost

In [51]:
model = CatBoostRegressor(iterations=20000,
                          task_type="GPU",
                          learning_rate=0.5,
                          depth=2)
# Fit model
model.fit(train_X, train_y)
# Get predictions
preds = model.predict(test_X)




0:	learn: 0.3840014	total: 3.61ms	remaining: 1m 12s
1:	learn: 0.3376167	total: 6.71ms	remaining: 1m 7s
2:	learn: 0.3119511	total: 9.76ms	remaining: 1m 5s
3:	learn: 0.2931744	total: 12.8ms	remaining: 1m 3s
4:	learn: 0.2563935	total: 15.9ms	remaining: 1m 3s
5:	learn: 0.2217147	total: 19ms	remaining: 1m 3s
6:	learn: 0.2037888	total: 22ms	remaining: 1m 2s
7:	learn: 0.1931033	total: 25.2ms	remaining: 1m 2s
8:	learn: 0.1742044	total: 28.3ms	remaining: 1m 2s
9:	learn: 0.1612319	total: 31.3ms	remaining: 1m 2s
10:	learn: 0.1542349	total: 34.4ms	remaining: 1m 2s
11:	learn: 0.1422594	total: 37.5ms	remaining: 1m 2s
12:	learn: 0.1303418	total: 40.7ms	remaining: 1m 2s
13:	learn: 0.1233617	total: 43.9ms	remaining: 1m 2s
14:	learn: 0.1161875	total: 47.1ms	remaining: 1m 2s
15:	learn: 0.1090275	total: 50.3ms	remaining: 1m 2s
16:	learn: 0.1021556	total: 53.6ms	remaining: 1m 3s
17:	learn: 0.0996912	total: 57ms	remaining: 1m 3s
18:	learn: 0.0963291	total: 60.6ms	remaining: 1m 3s
19:	learn: 0.0886585	total:

In [52]:
threshold = 0.5

# Convert probabilities/logits to binary predictions
binary_predictions = (preds > threshold).astype(int)

In [53]:
print(classification_report(test_y, binary_predictions))

              precision    recall  f1-score   support

           0       0.69      1.00      0.82     36679
           1       1.00      0.52      0.69     34417

    accuracy                           0.77     71096
   macro avg       0.85      0.76      0.75     71096
weighted avg       0.84      0.77      0.75     71096



In [14]:
!
train = df[df['patient'] <= 34]
test = df[df['patient'] > 34]

train_X = train.drop('result', axis=1)
train_y = train.result
test_X = test.drop('result', axis=1)
test_y = test.result

In [15]:
train_X.columns

Index(['Millis', 'ECG', 'IR', 'RED', 'patient', 'rate_min', 'rate_max',
       'rate_std', 'rate_avg', 'rate_25%', 'rate_50%', 'rate_75%'],
      dtype='object')

In [16]:
features = ['Millis', 'ECG', 'IR', 'RED', 'rate_min', 'rate_max',
       'rate_std', 'rate_avg', 'rate_25%', 'rate_50%', 'rate_75%']

for feature in features:
    mean = train_X[feature].mean()
    std = train_X[feature].std()
    
    train_X[feature] = (train_X[feature] - mean) / std
    test_X[feature] = (test_X[feature] - mean) / std


In [17]:
train_X

Unnamed: 0,Millis,ECG,IR,RED,patient,rate_min,rate_max,rate_std,rate_avg,rate_25%,rate_50%,rate_75%
0,1.362832,0.550931,0.279323,-0.172247,1,0.354197,-0.345987,-0.510328,0.000101,0.199814,0.000931,-0.196114
1,1.366302,0.550931,0.278818,-0.176220,1,0.354197,-0.345987,-0.510328,0.000101,0.199814,0.000931,-0.196114
2,1.366302,0.550931,0.279323,-0.175652,1,0.354197,-0.345987,-0.510328,0.000101,0.199814,0.000931,-0.196114
3,1.366302,0.550931,0.278313,-0.176220,1,0.354197,-0.345987,-0.510328,0.000101,0.199814,0.000931,-0.196114
4,1.369771,0.550931,0.278060,-0.179058,1,0.354197,-0.345987,-0.510328,0.000101,0.199814,0.000931,-0.196114
...,...,...,...,...,...,...,...,...,...,...,...,...
501416,-1.398832,-0.638541,-0.795008,-0.676850,34,0.060520,0.596135,0.389383,0.457574,0.255110,0.454888,0.578997
501417,-1.398832,-0.638541,-0.796523,-0.675715,34,-1.486455,-0.642231,0.597474,-1.250948,-1.396926,-1.155968,-0.854653
501418,-1.398832,-0.638541,-0.796018,-0.675148,34,-1.846904,-2.142967,-0.288654,-2.455087,-2.103472,-2.322827,-2.323688
501419,-1.395362,-0.638541,-0.795766,-0.676850,34,-1.666088,-0.557836,0.762018,-1.317891,-1.509699,-1.226648,-0.864303


In [18]:
test_X

Unnamed: 0,Millis,ECG,IR,RED,patient,rate_min,rate_max,rate_std,rate_avg,rate_25%,rate_50%,rate_75%
501421,0.124247,0.154441,0.831772,1.155389,35,0.354197,-0.345987,-0.510328,0.000101,0.199814,0.000931,-0.196114
501422,0.131186,-0.043805,0.832277,1.152551,35,0.354197,-0.345987,-0.510328,0.000101,0.199814,0.000931,-0.196114
501423,0.131186,-0.043805,0.831267,1.153119,35,0.354197,-0.345987,-0.510328,0.000101,0.199814,0.000931,-0.196114
501424,0.134655,-0.043805,0.831014,1.151416,35,0.354197,-0.345987,-0.510328,0.000101,0.199814,0.000931,-0.196114
501425,0.138124,-0.043805,0.831014,1.151416,35,0.354197,-0.345987,-0.510328,0.000101,0.199814,0.000931,-0.196114
...,...,...,...,...,...,...,...,...,...,...,...,...
572512,-1.187197,-0.836786,-0.302680,0.326681,41,-0.552243,0.177031,0.582588,0.039765,-0.140998,0.206545,0.253021
572513,-1.149033,-0.836786,-0.302174,0.325546,41,-0.554607,0.884915,1.309880,0.379349,-0.258756,0.466133,0.988918
572514,-1.142094,-0.836786,-0.302680,0.326114,41,0.709918,0.733348,-0.158025,0.816396,0.809163,0.748852,0.622195
572515,-1.135155,-0.836786,-0.303185,0.325546,41,-1.217596,-0.021613,0.727357,-0.849535,-1.053777,-0.764338,-0.555178


In [19]:
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(12, (train_X.shape[1]-1))))
model.add(LSTM(50))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(train_X, train_y, epochs=10, batch_size=64, validation_split=0.2, verbose=1)


Epoch 1/10


  super().__init__(**kwargs)


ValueError: Exception encountered when calling Sequential.call().

[1mInvalid input shape for input Tensor("data:0", shape=(None, 12), dtype=float32). Expected shape (None, 12, 11), but input has incompatible shape (None, 12)[0m

Arguments received by Sequential.call():
  • inputs=tf.Tensor(shape=(None, 12), dtype=float32)
  • training=True
  • mask=None

In [None]:

# Modelin performansını değerlendir
loss, accuracy = model.evaluate(test_X, test_y)
print(f'Test Accuracy: {accuracy}')