In [None]:
import pandas as pd
import numpy as np
from sklearn.impute import KNNImputer
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb

In [None]:
path = r'dataset/heart.csv'
data = pd.read_csv(path)

In [None]:
data

In [None]:
(data == 0).sum()

In [None]:
data['Cholesterol'] = data['Cholesterol'].replace(0, np.nan)

In [None]:
to_KNN = KNNImputer(n_neighbors=5)
data['Cholesterol'] = to_KNN.fit_transform(data[['Cholesterol']])

In [None]:
data

In [None]:
(data == 0).sum()

In [None]:
to_OHE = ['Sex', 'ChestPainType', 'RestingECG', 'ExerciseAngina', 'ExerciseAngina', 'ST_Slope', ]
data_encoded = pd.concat([data.drop(columns=to_OHE), pd.get_dummies(data[to_OHE])], axis=1)

In [None]:
data_encoded

In [None]:
data_encoded.info()

In [None]:
columns_name = data_encoded.columns.to_list()
to_MMS = MinMaxScaler()
data_encoded[columns_name] = to_MMS.fit_transform(data_encoded[columns_name])

In [None]:
data_encoded

In [None]:
file_path = r'dataset/processed_data-heart.csv'
data_encoded.to_csv(file_path, index=False)

print(f"داده‌های پردازش شده با موفقیت ذخیره شدند در: {file_path}")

In [None]:
x = data_encoded.drop('HeartDisease', axis=1)
y = data_encoded['HeartDisease']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)

In [None]:
Model_svc = SVC(kernel='linear', C=1.0, gamma=0.1)
Model_svc.fit(x_train, y_train)

In [None]:
y_pred = Model_svc.predict(x_test)
print(classification_report(y_test, y_pred))

In [None]:
param_grid = {
    'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000],
    'gamma': [0.0001, 0.001, 0.01, 0.1, 2, 10, 100, 1000, 10000],
    'kernel': ['linear']
}

model_svc = SVC()

grid_search = GridSearchCV(estimator=model_svc, param_grid=param_grid, scoring='f1_macro', cv=10, n_jobs=-1)

grid_search.fit(x_train, y_train)

print("بهترین مقادیر پارامترها:", grid_search.best_params_)

best_model = grid_search.best_estimator_
y_pred = best_model.predict(x_test)

print("دقت مدل با بهترین پارامترها:")
print(accuracy_score(y_test, y_pred))

print("گزارش بازخوانی:")
print(classification_report(y_test, y_pred))


In [None]:
rf_classifier = RandomForestClassifier()
rf_classifier.fit(x_train, y_train)

y_pred = rf_classifier.predict(x_test)

print("دقت مدل:")
print(accuracy_score(y_test, y_pred))
print("گزارش دقت:")
print(classification_report(y_test, y_pred))

In [None]:

rf_classifier = RandomForestClassifier(random_state=42)

param_grid = {
    'n_estimators': [50, 100, 200,300,500],
    'max_depth': [None, 10, 20, 30,50,80,100],
    'min_samples_split': [2, 5, 10,20,30],
    'min_samples_leaf': [1, 2, 4,6,8,10,20],
}

grid_search = GridSearchCV(estimator=rf_classifier, param_grid=param_grid, scoring='f1_macro', cv=10, n_jobs=-1)

grid_search.fit(x_train, y_train)

print("بهترین مقادیر پارامترها:", grid_search.best_params_)

best_rf_classifier = grid_search.best_estimator_
y_pred = best_rf_classifier.predict(x_test)

print("دقت مدل با بهترین پارامترها:")
print(accuracy_score(y_test, y_pred))

print("گزارش دقت مدل با بهترین پارامترها:")
print(classification_report(y_test, y_pred))
