<a href="https://colab.research.google.com/github/Bziukiewicz/tests/blob/master/ANN_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Analiza surowych danych

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.figure_factory as ff
import seaborn as sns
import sklearn
import plotly.express as px

In [2]:
df_raw= pd.read_csv('heart.csv')


### Rozkłady zmiennych

In [3]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig = make_subplots(rows=6, cols=2)

for i, column in enumerate(df_raw.columns):

    value_counts = df_raw[column].value_counts()

    bar_chart = go.Bar(x=value_counts.index, y=value_counts.values, name=column)

    fig.add_trace(bar_chart, row=(i // 2) + 1, col=(i % 2) + 1)

fig.update_layout(height=800, width=600, title_text="Side By Side Subplots")

fig.show()

In [89]:
import plotly.express as px

target = 'HeartDisease'

fig = make_subplots(rows=6, cols=2, subplot_titles=df_raw.columns)

for i, column in enumerate(df_raw.columns, start=1):

    row = (i - 1) // 2 + 1
    col = (i - 1) % 2 + 1


    histogram = px.histogram(df_raw, x=column, color=target, barmode='group')


    for trace in histogram['data']:
        fig.add_trace(trace, row=row, col=col)


fig.update_layout(height=800, width=600, title_text="Visualizing Data")
fig.update_yaxes(matches='y')


fig.show()

### Przygotowanie danych do pracy

In [6]:
data= df_raw.copy()
target=data.pop('HeartDisease')

In [7]:
from sklearn.preprocessing import LabelEncoder

cols2labelEncode = ['Sex', 'ChestPainType', 'RestingECG', 'ExerciseAngina', 'ST_Slope']

le = LabelEncoder()

for col in cols2labelEncode:
    data[col]=le.fit_transform(data[col])


In [None]:
data.info()

In [None]:
data.isnull().sum()

In [10]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(data, target)

In [11]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(X_train)

X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
X_train

array([[-1.12311985,  0.48953002, -0.8000505 , ...,  1.17150066,
        -0.74407036, -0.55539277],
       [-1.12311985,  0.48953002,  1.26150285, ..., -0.85360601,
         0.91233218,  1.09876406],
       [-0.28504343,  0.48953002, -0.8000505 , ..., -0.85360601,
        -0.83609273,  1.09876406],
       ...,
       [ 0.76255209,  0.48953002, -0.8000505 , ...,  1.17150066,
        -0.83609273, -2.20954961],
       [-1.12311985, -2.04277564,  0.23072618, ..., -0.85360601,
        -0.83609273,  1.09876406],
       [-1.33263896,  0.48953002, -0.8000505 , ...,  1.17150066,
         1.92457818, -0.55539277]])

### Utworzenie modelu ANN Klasyfikacji

In [12]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense, Dropout

In [13]:
X_train.shape

(688, 11)

In [71]:
from tensorflow.keras.callbacks import ModelCheckpoint
filepath = 'best_model_weights.hdf5'

checkpoint = ModelCheckpoint(filepath=filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')


In [72]:
from tensorflow.keras.layers import Dropout
model = Sequential()
model.add(Dense(256,  activation='relu', input_shape=(11,)))
model.add(Dense(128,  activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64,  activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='Adam',
                 loss='binary_crossentropy',
                 metrics=['accuracy', 'binary_crossentropy'])

model.summary()

Model: "sequential_22"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_87 (Dense)            (None, 256)               3072      
                                                                 
 dense_88 (Dense)            (None, 128)               32896     
                                                                 
 dropout_23 (Dropout)        (None, 128)               0         
                                                                 
 dense_89 (Dense)            (None, 64)                8256      
                                                                 
 dense_90 (Dense)            (None, 1)                 65        
                                                                 
Total params: 44289 (173.00 KB)
Trainable params: 44289 (173.00 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [None]:
history = model.fit(X_train, y_train, epochs=100, validation_split=0.2, batch_size=32, callbacks=[checkpoint])

### Załadowanie najlepszego modelu

In [79]:
best_model = Sequential()
best_model.add(Dense(256,  activation='relu', input_shape=(11,)))
best_model.add(Dense(128,  activation='relu'))
best_model.add(Dropout(0.5))
best_model.add(Dense(64,  activation='relu'))
best_model.add(Dense(1, activation='sigmoid'))

best_model.load_weights('best_model_weights.hdf5')
best_model.compile(optimizer='Adam',
                 loss='binary_crossentropy',
                 metrics=['accuracy', 'binary_crossentropy'])
test_results = best_model.evaluate(X_test, y_test, verbose=0)
print(test_results)

[0.33365336060523987, 0.873913049697876, 0.33365336060523987]


In [83]:
predictions = best_model.predict(X_test)


predicted_classes = (predictions > 0.5).astype("int32")





In [86]:
from sklearn.metrics import confusion_matrix as cm

In [87]:
cm= cm(predicted_classes, y_test)

In [88]:
def plot_confusion_matrix(cm):
    cm = cm[::-1]
    cm = pd.DataFrame(cm, columns=['pred_0', 'pred_1'], index=['true_1', 'true_0'])

    fig = ff.create_annotated_heatmap(z=cm.values, x=list(cm.columns), y=list(cm.index),
                                      colorscale='ice', showscale=True, reversescale=True)
    fig.update_layout(width=500, height=500, title='Confusion Matrix', font_size=16)
    fig.show()

plot_confusion_matrix(cm)

In [91]:
from sklearn.metrics import confusion_matrix, f1_score, precision_score, recall_score
precision=precision_score(y_test,predicted_classes)
recall=recall_score(y_test,predicted_classes)
f1Score=f1_score(y_test,predicted_classes)

print(f'Precision {precision}')
print(f'Recall {recall}')
print(f'F1 Score {f1Score}')

Precision 0.8672566371681416
Recall 0.875
F1 Score 0.8711111111111112
