### Project in progress.

In [2]:
import pandas as pd
import numpy as np

import tensorflow
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split

from collections import Counter
from sklearn.compose import ColumnTransformer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, InputLayer

from sklearn.metrics import classification_report

from tensorflow.keras.utils import to_categorical

from sklearn.preprocessing import Normalizer
from tensorflow.keras.optimizers import Adam

### Data preparation.

In [20]:
heart = pd.read_csv("heart failure.csv")

heart.head()
heart.info()

#Counting the number of death events. 
print("The count of death events: ", Counter(heart['death_event']))

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 299 entries, 0 to 298
Data columns (total 14 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Unnamed: 0                299 non-null    int64  
 1   age                       299 non-null    float64
 2   anaemia                   299 non-null    object 
 3   creatinine_phosphokinase  299 non-null    int64  
 4   diabetes                  299 non-null    object 
 5   ejection_fraction         299 non-null    int64  
 6   high_blood_pressure       299 non-null    object 
 7   platelets                 299 non-null    float64
 8   serum_creatinine          299 non-null    float64
 9   serum_sodium              299 non-null    int64  
 10  sex                       299 non-null    object 
 11  smoking                   299 non-null    object 
 12  time                      299 non-null    int64  
 13  death_event               299 non-null    object 
dtypes: float64

In [21]:
features = heart[['age','anaemia','creatinine_phosphokinase','diabetes','ejection_fraction','high_blood_pressure','platelets','serum_creatinine','serum_sodium','sex','smoking','time']]
labels = heart[['death_event']]


print(features, labels)

      age anaemia  creatinine_phosphokinase diabetes  ejection_fraction  \
0    75.0      no                       582       no                 20   
1    55.0      no                      7861       no                 38   
2    65.0      no                       146       no                 20   
3    50.0     yes                       111       no                 20   
4    65.0     yes                       160      yes                 20   
..    ...     ...                       ...      ...                ...   
294  62.0      no                        61      yes                 38   
295  55.0      no                      1820       no                 38   
296  45.0      no                      2060      yes                 60   
297  45.0      no                      2413       no                 38   
298  50.0      no                       196       no                 45   

    high_blood_pressure  platelets  serum_creatinine  serum_sodium  sex  \
0                   yes 

In [22]:
features_dummies = pd.get_dummies(features)
print(features_dummies)

      age  creatinine_phosphokinase  ejection_fraction  platelets  \
0    75.0                       582                 20  265000.00   
1    55.0                      7861                 38  263358.03   
2    65.0                       146                 20  162000.00   
3    50.0                       111                 20  210000.00   
4    65.0                       160                 20  327000.00   
..    ...                       ...                ...        ...   
294  62.0                        61                 38  155000.00   
295  55.0                      1820                 38  270000.00   
296  45.0                      2060                 60  742000.00   
297  45.0                      2413                 38  140000.00   
298  50.0                       196                 45  395000.00   

     serum_creatinine  serum_sodium  time  anaemia_no  anaemia_yes  \
0                 1.9           130     4           1            0   
1                 1.1          

### Train-test split.

In [44]:
feat_train, feat_test, labels_train, labels_test = train_test_split(features_dummies, labels, test_size = 0.3, random_state = 42)

print(labels_train.head()) 
print(labels_test.head())
#feat_train.info()

    death_event
224          no
68          yes
222          no
37          yes
16          yes
    death_event
281          no
265          no
164         yes
9           yes
77           no


### Data normalization. 

In [31]:
ct = ColumnTransformer([('numeric', StandardScaler(), ['age','creatinine_phosphokinase','ejection_fraction','platelets','serum_creatinine','serum_sodium','time'])])
feat_train_scaled = ct.fit_transform(feat_train)
feat_test_scaled = ct.transform(feat_test)

print(feat_train_scaled, feat_test_scaled)

[[1.15079274e-04 1.15476099e-03 4.96031352e-05 ... 1.98412541e-06
  2.73809306e-04 4.06745708e-04]
 [2.86885111e-04 6.59835755e-04 1.02458968e-04 ... 4.91803048e-06
  5.81966940e-04 2.70491676e-04]
 [1.15068463e-04 2.35616376e-04 9.58903857e-05 ... 3.01369784e-06
  3.80821818e-04 5.50684787e-04]
 ...
 [2.09124582e-04 2.84409431e-03 1.71101931e-04 ... 4.94294466e-06
  5.20910322e-04 3.34599331e-04]
 [1.67072435e-04 2.20991267e-03 1.13913024e-04 ... 6.07536128e-06
  4.93623104e-04 9.26492595e-04]
 [5.36902574e-04 6.02673139e-03 1.67782054e-04 ... 7.38241039e-06
  9.66424633e-04 5.83881549e-04]] [[1.37243657e-03 1.14108298e-02 7.84249470e-04 9.99918075e-01
  5.29368393e-05 2.66644820e-03 4.90155919e-03]
 [1.38121458e-04 8.23203887e-04 9.66850203e-05 9.99999352e-01
  2.48618624e-06 3.86740081e-04 6.62982996e-04]
 [1.34726914e-04 7.31118056e-03 8.98179430e-05 9.99973099e-01
  3.29332458e-06 4.16156469e-04 3.86217155e-04]
 [2.06185539e-04 3.17010266e-04 9.02061734e-05 9.99999865e-01
  2.4226

### Raveling labels subsets into one-dimentional arrays.

In [81]:
labels_train = np.ravel(labels_test, order = 'C')
labels_test = np.ravel(labels_test, order = 'C')

print(len(labels_train))
print(len(labels_test))

360
360


### Label Encoding.

In [82]:
le = LabelEncoder()
labels_train = le.fit_transform(labels_train.astype(str))
labels_test = le.transform(labels_test.astype(str))

### Turning string variables into categorical.

In [76]:
labels_train = tensorflow.keras.utils.to_categorical(labels_train, dtype = 'int64')
labels_test = tensorflow.keras.utils.to_categorical(labels_test, dtype = 'int64')

### Buliding the model

In [64]:
model = Sequential()
model.add(InputLayer(input_shape=(feat_train_scaled.shape[1])))
model.add(Dense(12, activation ='relu'))
model.add(Dense(12, activation = "softmax"))
opt = Adam(learning_rate = 0.01)
model.compile(loss = "categorical_crossentropy", optimizer = opt, metrics = "accuracy")
print(model)

<keras.engine.sequential.Sequential object at 0x0000020EC84F5310>


### Fitting and estimating the model.

In [83]:
model.fit(feat_train_scaled, labels_train, epochs = 100, batch_size = 16, verbose = 1)

ValueError: Data cardinality is ambiguous:
  x sizes: 209
  y sizes: 360
Make sure all arrays contain the same number of samples.

In [71]:
loss, acc = model.evaluate(feat_test, labels_test)
labels_estimate = model.predict(labels_estimate)
labels_estimate = np.argmax(labels_estimate, axis = 1)
true = np.argmax(labels_test, axis = 1)

print(classification_report(true, estimate))

ValueError: in user code:

    File "C:\Users\acer\anaconda3\lib\site-packages\keras\engine\training.py", line 1557, in test_function  *
        return step_function(self, iterator)
    File "C:\Users\acer\anaconda3\lib\site-packages\keras\engine\training.py", line 1546, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\acer\anaconda3\lib\site-packages\keras\engine\training.py", line 1535, in run_step  **
        outputs = model.test_step(data)
    File "C:\Users\acer\anaconda3\lib\site-packages\keras\engine\training.py", line 1499, in test_step
        y_pred = self(x, training=False)
    File "C:\Users\acer\anaconda3\lib\site-packages\keras\utils\traceback_utils.py", line 67, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "C:\Users\acer\anaconda3\lib\site-packages\keras\engine\input_spec.py", line 264, in assert_input_compatibility
        raise ValueError(f'Input {input_index} of layer "{layer_name}" is '

    ValueError: Input 0 of layer "sequential_15" is incompatible with the layer: expected shape=(None, 17), found shape=(None, 7)
