In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn.preprocessing

In [None]:
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/cardio_train.csv', delimiter=';',index_col='id')
df.head()

In [None]:
df.describe()

In [None]:
df.columns

In [None]:
df.loc[df.ap_hi>500].describe()

In [None]:
bp_data = df.drop(df[(df['ap_hi'] > 500) | (df['ap_hi'] < 0) | (df['ap_lo'] > 300) | (df['ap_lo'] < 0)].index)

In [None]:
bp_data.loc[:, 'age'] = bp_data['age'] / 365.25
bp_data.describe()

In [None]:
bp_data['bmi'] = bp_data['weight'] / (bp_data['height'] / 100) ** 2
bp_data.describe()

In [None]:
bp_data.drop(['height', 'weight'], axis=1, inplace=True)
bp_data = bp_data[['age', 'bmi', 'gender', 'ap_hi', 'ap_lo', 'cholesterol', 'gluc', 'smoke', 'alco', 'active', 'cardio']]

bp_data.describe()

In [None]:
from sklearn.model_selection import train_test_split

X = bp_data.drop('cardio', axis=1)  # Features
y = bp_data['cardio']  # Target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier

In [None]:
# Random forest model
rf = RandomForestClassifier()
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

In [None]:
# SVM model
svm = SVC()
svm.fit(X_train, y_train)
y_pred_svm = svm.predict(X_test)

In [None]:
# Gradient boosting machine model
gbm = GradientBoostingClassifier()
gbm.fit(X_train, y_train)
y_pred_gbm = gbm.predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [None]:
acc = accuracy_score(y_test, y_pred_rf)
print('Random forest accuracy:', acc)

acc = accuracy_score(y_test, y_pred_svm)
print('SVM accuracy:', acc)

acc = accuracy_score(y_test, y_pred_gbm)
print('Gradient boosting machine accuracy:', acc)

In [None]:
# Accuracy
fig, ax = plt.subplots()

sns.barplot(x=['Random Forest', 'SVM', 'Gradient Boosting Machine'], y=[accuracy_score(y_test, y_pred_rf), accuracy_score(y_test, y_pred_svm), accuracy_score(y_test, y_pred_gbm)])

ax.set_title('Accuracy Comparison')
ax.set_xlabel('Model')
ax.set_ylabel('Accuracy Score')
plt.show()

In [None]:
prec = precision_score(y_test, y_pred_rf)
print('Random forest precision:', prec)

prec = precision_score(y_test, y_pred_svm)
print('SVM precision:', prec)

prec = precision_score(y_test, y_pred_gbm)
print('Gradient boosting machine precision:', prec)

In [None]:
# Precision
fig, ax = plt.subplots()

sns.barplot(x=['Random Forest', 'SVM', 'Gradient Boosting Machine'], y=[precision_score(y_test, y_pred_rf), precision_score(y_test, y_pred_svm), precision_score(y_test, y_pred_gbm)])

ax.set_title('Precision Comparison')
ax.set_xlabel('Model')
ax.set_ylabel('Precision Score')
plt.show()

In [None]:
rec = recall_score(y_test, y_pred_rf)
print('Random forest recall:', rec)

rec = recall_score(y_test, y_pred_svm)
print('SVM recall:', rec)

rec = recall_score(y_test, y_pred_gbm)
print('Gradient boosting machine recall:', rec)

In [None]:
# Recall
fig, ax = plt.subplots()

sns.barplot(x=['Random Forest', 'SVM', 'Gradient Boosting Machine'], y=[recall_score(y_test, y_pred_rf), recall_score(y_test, y_pred_svm), recall_score(y_test, y_pred_gbm)])

ax.set_title('Recall Comparison')
ax.set_xlabel('Model')
ax.set_ylabel('Recall Score')
plt.show()

In [None]:
f1 = f1_score(y_test, y_pred_rf)
print('Random forest F1-score:', f1)

f1 = f1_score(y_test, y_pred_svm)
print('SVM F1-score:', f1)

f1 = f1_score(y_test, y_pred_gbm)
print('Gradient boosting machine F1-score:', f1)

In [None]:
# F1-score
fig, ax = plt.subplots()

sns.barplot(x=['Random Forest', 'SVM', 'Gradient Boosting Machine'], y=[f1_score(y_test, y_pred_rf), f1_score(y_test, y_pred_svm), f1_score(y_test, y_pred_gbm)])

ax.set_title('F1-score Comparison')
ax.set_xlabel('Model')
ax.set_ylabel('F1-Score')
plt.show()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

correlation_matrix = bp_data.corr()

plt.figure(figsize=(12, 6))
sns.heatmap(correlation_matrix, annot=True, fmt='.2f', cmap='coolwarm', vmin=-1, vmax=1, linecolor='white', linewidths=0.5)

# Adjust the position of the y spine
ax.spines['left'].set_position('zero')
ax.spines['left'].set_linewidth(0.5)

# Set the tick labels
ax.set_yticks(range(len(correlation_matrix.columns)), correlation_matrix.columns, va='center', rotation=45)

plt.suptitle('Correlation Heatmap', fontsize=12)
plt.tight_layout()
plt.show()