In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
import pandas as pd

df = pd.read_csv("diabetes.csv")

df_type1 = df[(df['Outcome'] == 0) | (df['Insulin'] == 0)]
df_type2 = df[(df['Outcome'] == 0) | (df['Insulin'] > 0)]

X_type1 = df_type1.drop(columns=['Outcome'])
y_type1 = df_type1['Outcome']

X_type2 = df_type2.drop(columns=['Outcome'])
y_type2 = df_type2['Outcome']

X_train_type1, X_test_type1, y_train_type1, y_test_type1 = train_test_split(X_type1, y_type1, test_size=0.2, random_state=42, stratify=y_type1)
X_train_type2, X_test_type2, y_train_type2, y_test_type2 = train_test_split(X_type2, y_type2, test_size=0.2, random_state=42, stratify=y_type2)

smote = SMOTE(random_state=42)
X_train_type1_balanced, y_train_type1_balanced = smote.fit_resample(X_train_type1, y_train_type1)
X_train_type2_balanced, y_train_type2_balanced = smote.fit_resample(X_train_type2, y_train_type2)

scaler = StandardScaler()
X_train_type1_scaled = scaler.fit_transform(X_train_type1_balanced)
X_test_type1_scaled = scaler.transform(X_test_type1)

X_train_type2_scaled = scaler.fit_transform(X_train_type2_balanced)
X_test_type2_scaled = scaler.transform(X_test_type2)

log_reg_type1 = LogisticRegression()
log_reg_type1.fit(X_train_type1_scaled, y_train_type1_balanced)
y_pred_type1 = log_reg_type1.predict(X_test_type1_scaled)

log_reg_type2 = LogisticRegression()
log_reg_type2.fit(X_train_type2_scaled, y_train_type2_balanced)
y_pred_type2 = log_reg_type2.predict(X_test_type2_scaled)

def print_metrics(y_true, y_pred, model_name):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    
    print(f"Метрики для {model_name}:")
    print(f"Accuracy: {accuracy:.2f}")
    print(f"Precision: {precision:.2f}")
    print(f"Recall: {recall:.2f}\n")

print_metrics(y_test_type1, y_pred_type1, "Диабет 1 типа (Логистическая регрессия)")
print_metrics(y_test_type2, y_pred_type2, "Диабет 2 типа (Логистическая регрессия)")

Метрики для Диабет 1 типа (Логистическая регрессия):
Accuracy: 0.83
Precision: 0.57
Recall: 0.86

Метрики для Диабет 2 типа (Логистическая регрессия):
Accuracy: 0.78
Precision: 0.48
Recall: 0.85

