In [15]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder
from catboost import CatBoostClassifier
from sklearn.metrics import classification_report, roc_auc_score
import joblib

In [16]:
# 1️⃣ Загрузка данных
df = pd.read_csv('/kaggle/input/heart-train/heart_train.csv')

In [17]:
# 2️⃣ Удаляем лишние колонки, выбросы, обрабатываем данные
if "Unnamed: 0" in df.columns:
    df = df.drop(columns=["Unnamed: 0"])

In [18]:
# Удаление выбросов по Heart rate
df = df[df['Heart rate'] <= 0.9]

In [19]:
# Кодируем Gender
le = LabelEncoder()
df['Gender'] = le.fit_transform(df['Gender'])

In [20]:
# Импутируем пропуски средним
imputer = SimpleImputer(strategy='mean')
df[df.columns] = imputer.fit_transform(df)

In [21]:
# 3️⃣ Подготовка X и y
y = df['Heart Attack Risk (Binary)']
X = df.drop(columns=['Heart Attack Risk (Binary)', 'id'])

In [22]:
# 4️⃣ Train-test split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

In [23]:
model = CatBoostClassifier(
    verbose=100,
    random_state=42,
    class_weights=[1, 2],
    eval_metric='AUC'
)
model.fit(X_train, y_train, eval_set=(X_val, y_val), early_stopping_rounds=100)

Learning rate set to 0.051147
0:	test: 0.5271233	best: 0.5271233 (0)	total: 7.03ms	remaining: 7.02s
100:	test: 0.5775619	best: 0.5777360 (97)	total: 495ms	remaining: 4.41s
200:	test: 0.5657310	best: 0.5778105 (106)	total: 1.13s	remaining: 4.5s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 0.5778105358
bestIteration = 106

Shrink model to first 107 iterations.


<catboost.core.CatBoostClassifier at 0x7c6ea44ecd90>

In [24]:
# 6️⃣ Оценка качества
y_pred = model.predict(X_val)
y_proba = model.predict_proba(X_val)[:, 1]

print(classification_report(y_val, y_pred))
print(f"ROC-AUC: {roc_auc_score(y_val, y_proba):.4f}")

              precision    recall  f1-score   support

         0.0       0.71      0.41      0.52      1134
         1.0       0.38      0.68      0.49       603

    accuracy                           0.50      1737
   macro avg       0.54      0.55      0.50      1737
weighted avg       0.59      0.50      0.51      1737

ROC-AUC: 0.5778


In [25]:
# 7️⃣ Сохранение модели
joblib.dump(model, 'heart_attack_model.pkl')
print("✅ Модель успешно сохранена как 'heart_attack_model.pkl'")

✅ Модель успешно сохранена как 'heart_attack_model.pkl'
