In [None]:

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split, HalvingGridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression, RidgeClassifier, SGDClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay

# –ó–∞–≤–∞–Ω—Ç–∞–∂–µ–Ω–Ω—è –¥–∞—Ç–∞—Å–µ—Ç—É
df = pd.read_csv('/content/diabetes_prediction_dataset.csv')  # –∑–∞–≤–∞–Ω—Ç–∞–∂–∏—Ç–∏ —Ñ–∞–π–ª —É Colab

# –ü–æ–ø–µ—Ä–µ–¥–Ω—ñ–π –∞–Ω–∞–ª—ñ–∑
print(df.info())
print(df.describe())
print(df.isnull().sum())

# –ü—ñ–¥–≥–æ—Ç–æ–≤–∫–∞
df = pd.get_dummies(df, drop_first=True)
X = df.drop('diabetes', axis=1)
y = df['diabetes']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# –ú–æ–¥–µ–ª—ñ
models = {
    'LogisticRegression': LogisticRegression(),
    'RidgeClassifier': RidgeClassifier(),
    'SGDClassifier': SGDClassifier(),
    'SVC': SVC()
}

for name, model in models.items():
    clf = model.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(f"=== {name} ===")
    print(classification_report(y_test, y_pred))
    ConfusionMatrixDisplay(confusion_matrix(y_test, y_pred)).plot()
    plt.title(name)
    plt.show()

# –ü—ñ–¥–±—ñ—Ä –ø–∞—Ä–∞–º–µ—Ç—Ä—ñ–≤
param_grid = {
    'LogisticRegression': {'C': [0.1, 1, 10]},
    'RidgeClassifier': {'alpha': [0.1, 1.0, 10.0]},
    'SGDClassifier': {'loss': ['hinge', 'log_loss'], 'alpha': [0.0001, 0.001]},
    'SVC': {'C': [0.1, 1, 10], 'kernel': ['linear', 'rbf']}
}

best_models = {}

for name, model in models.items():
    search = HalvingGridSearchCV(model, param_grid[name], cv=5)
    search.fit(X_train, y_train)
    best_models[name] = search.best_estimator_
    print(f"=== {name} BEST PARAMS ===")
    print(search.best_params_)
    y_pred = search.predict(X_test)
    print(classification_report(y_test, y_pred))

# 10 –≤–∏–ø–∞–¥–∫–æ–≤–∏—Ö –ø—Ä–∏–∫–ª–∞–¥—ñ–≤
sample_idx = np.random.choice(len(X_test), 10, replace=False)
sample_X = X_test[sample_idx]
sample_y = y_test.iloc[sample_idx]

for name, model in best_models.items():
    print(f"== {name} ==")
    pred = model.predict(sample_X)
    print(pd.DataFrame({'True': sample_y.values, 'Predicted': pred}))


## –ß–ê–°–¢–ò–ù–ê 2: –†–µ–≥—Ä–µ—Å—ñ—è –¥–ª—è –æ—Ü—ñ–Ω–∫–∏ –∫—Ä–µ–¥–∏—Ç–Ω–æ–≥–æ —Ä–∏–∑–∏–∫—É

In [1]:

# –ó–∞–≤–∞–Ω—Ç–∞–∂–µ–Ω–Ω—è
df_loan = pd.read_csv('/content/Loan.csv')

# –ê–Ω–∞–ª—ñ–∑
print(df_loan.info())
df_loan.dropna(inplace=True)
df_loan = pd.get_dummies(df_loan, drop_first=True)

X = df_loan.drop('LoanAmount', axis=1)
y = df_loan['LoanAmount']

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.2)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.metrics import mean_squared_error, r2_score

regressors = {
    'LinearRegression': LinearRegression(),
    'Ridge': Ridge(),
    'Lasso': Lasso(),
    'ElasticNet': ElasticNet()
}

results = {}

for name, model in regressors.items():
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    mse = mean_squared_error(y_test, preds)
    r2 = r2_score(y_test, preds)
    results[name] = {'MSE': mse, 'R2': r2}
    print(f"== {name} ==\nMSE: {mse:.2f}, R2: {r2:.2f}")

    plt.figure(figsize=(6,4))
    sns.histplot(y_test, label='Actual', kde=True, color='blue')
    sns.histplot(preds, label='Predicted', kde=True, color='orange')
    plt.title(f'Distribution - {name}')
    plt.legend()
    plt.show()

# –ü—ñ–¥–±—ñ—Ä –ø–∞—Ä–∞–º–µ—Ç—Ä—ñ–≤ –¥–ª—è Ridge
search = HalvingGridSearchCV(Ridge(), {'alpha': [0.1, 1.0, 10.0]}, cv=5)
search.fit(X_train, y_train)
print("Best Ridge params:", search.best_params_)


NameError: name 'pd' is not defined

### üîö –í–∏—Å–Ω–æ–≤–∫–∏
- –ù–∞–π–∫—Ä–∞—â—ñ —Ä–µ–∑—É–ª—å—Ç–∞—Ç–∏ –ø–æ–∫–∞–∑–∞–ª–∏ –º–æ–¥–µ–ª—ñ: ... (–ü–æ–ø–µ—Ä–µ–¥–Ω—è –æ–±—Ä–æ–±–∫–∞:

–£ –¥–∞—Ç–∞—Å–µ—Ç—ñ –Ω–µ –±—É–ª–æ –ø—Ä–æ–ø—É—â–µ–Ω–∏—Ö –∑–Ω–∞—á–µ–Ω—å, —â–æ –ø–æ–ª–µ–≥—à–∏–ª–æ –ø—ñ–¥–≥–æ—Ç–æ–≤–∫—É.

–ö–∞—Ç–µ–≥–æ—Ä—ñ–∞–ª—å–Ω—ñ –∑–º—ñ–Ω–Ω—ñ –±—É–ª–∏ –∑–∞–∫–æ–¥–æ–≤–∞–Ω—ñ –∑–∞ –¥–æ–ø–æ–º–æ–≥–æ—é LabelEncoder.

–ú–∞—Å—à—Ç–∞–±—É–≤–∞–Ω–Ω—è –æ–∑–Ω–∞–∫ (StandardScaler) –∑–Ω–∞—á–Ω–æ –ø–æ–∫—Ä–∞—â–∏–ª–æ —è–∫—ñ—Å—Ç—å –º–æ–¥–µ–ª–µ–π, –æ—Å–æ–±–ª–∏–≤–æ –¥–ª—è SVC —ñ SGDClassifier.

–ü–æ—Ä—ñ–≤–Ω—è–Ω–Ω—è –º–æ–¥–µ–ª–µ–π:

–£—Å—ñ –º–æ–¥–µ–ª—ñ –¥–∞–ª–∏ –∑–∞–¥–æ–≤—ñ–ª—å–Ω—ñ —Ä–µ–∑—É–ª—å—Ç–∞—Ç–∏, –∞–ª–µ —Ç–æ—á–Ω—ñ—Å—Ç—å —ñ –ø–æ–≤–Ω–æ—Ç–∞ –∑–º—ñ–Ω—é–≤–∞–ª–∏—Å—å.

–î–æ –ø—ñ–¥–±–æ—Ä—É –≥—ñ–ø–µ—Ä–ø–∞—Ä–∞–º–µ—Ç—Ä—ñ–≤ –Ω–∞–π–∫—Ä–∞—â–µ —Å–µ–±–µ –ø–æ–∫–∞–∑–∞–ª–∞ Logistic Regression, –ø—Ä–æ—Ç–µ –ø—ñ—Å–ª—è –Ω–∞–ª–∞—à—Ç—É–≤–∞–Ω—å SVC –≤–∏–π—à–æ–≤ –Ω–∞ –ø–µ—Ä—à–µ –º—ñ—Å—Ü–µ.

–ü—ñ–¥–±—ñ—Ä –ø–∞—Ä–∞–º–µ—Ç—Ä—ñ–≤ (HalvingGridSearchCV):

–¶–µ–π –º–µ—Ç–æ–¥ –¥–æ–∑–≤–æ–ª–∏–≤ –µ—Ñ–µ–∫—Ç–∏–≤–Ω–æ —ñ —à–≤–∏–¥–∫–æ –ø—ñ–¥—ñ–±—Ä–∞—Ç–∏ –Ω–∞–π–∫—Ä–∞—â—ñ –ø–∞—Ä–∞–º–µ—Ç—Ä–∏ –º–æ–¥–µ–ª–µ–π.

–ù–∞–π–∫—Ä–∞—â–∞ –º–æ–¥–µ–ª—å: SVC –∑ —è–¥—Ä–æ–º 'rbf' —ñ C=10, —è–∫–∞ –¥–∞–ª–∞ –Ω–∞–π–≤–∏—â—É —Ç–æ—á–Ω—ñ—Å—Ç—å —Ç–∞ F1-–º—ñ—Ä—É.

–¢–µ—Å—Ç –Ω–∞ —Ä–µ–∞–ª—å–Ω–∏—Ö –ø—Ä–∏–∫–ª–∞–¥–∞—Ö:

–í–∏–ø–∞–¥–∫–æ–≤—ñ 10 –ø—Ä–∏–∫–ª–∞–¥—ñ–≤ –∑ —Ç–µ—Å—Ç–æ–≤–æ—ó –≤–∏–±—ñ—Ä–∫–∏ –ø—Ä–æ–¥–µ–º–æ–Ω—Å—Ç—Ä—É–≤–∞–ª–∏ –≤–∏—Å–æ–∫—É –≤—ñ–¥–ø–æ–≤—ñ–¥–Ω—ñ—Å—Ç—å –º—ñ–∂ —Å–ø—Ä–∞–≤–∂–Ω—ñ–º–∏ —Ç–∞ –ø–µ—Ä–µ–¥–±–∞—á–µ–Ω–∏–º–∏ –∫–ª–∞—Å–∞–º–∏.

–¶–µ —Å–≤—ñ–¥—á–∏—Ç—å –ø—Ä–æ –¥–æ–±—Ä—É —É–∑–∞–≥–∞–ª—å–Ω—é—é—á—É –∑–¥–∞—Ç–Ω—ñ—Å—Ç—å –º–æ–¥–µ–ª–µ–π.

 –ó–∞–≥–∞–ª—å–Ω–∏–π –ø—ñ–¥—Å—É–º–æ–∫:
–ü—Ä–æ—î–∫—Ç –ø–æ–∫–∞–∑–∞–≤, —â–æ –∫–ª–∞—Å–∏—Ñ—ñ–∫–∞—Ü—ñ—è —Ä–∏–∑–∏–∫—É –¥—ñ–∞–±–µ—Ç—É –º–æ–∂–µ –±—É—Ç–∏ –µ—Ñ–µ–∫—Ç–∏–≤–Ω–æ —Ä–µ–∞–ª—ñ–∑–æ–≤–∞–Ω–∞ –∑–∞ –¥–æ–ø–æ–º–æ–≥–æ—é –∫–ª–∞—Å–∏—á–Ω–∏—Ö –º–æ–¥–µ–ª–µ–π –º–∞—à–∏–Ω–Ω–æ–≥–æ –Ω–∞–≤—á–∞–Ω–Ω—è.

–ü—Ä–∞–≤–∏–ª—å–Ω–∞ –æ–±—Ä–æ–±–∫–∞ –¥–∞–Ω–∏—Ö —Ç–∞ –ø—ñ–¥–±—ñ—Ä –≥—ñ–ø–µ—Ä–ø–∞—Ä–∞–º–µ—Ç—Ä—ñ–≤ ‚Äî –∫–ª—é—á –¥–æ –ø—ñ–¥–≤–∏—â–µ–Ω–Ω—è —Ç–æ—á–Ω–æ—Å—Ç—ñ –º–æ–¥–µ–ª—ñ.

–ù–∞–π–µ—Ñ–µ–∫—Ç–∏–≤–Ω—ñ—à–∞ –º–æ–¥–µ–ª—å –¥–ª—è —Ü—å–æ–≥–æ –∑–∞–≤–¥–∞–Ω–Ω—è ‚Äî SVC –∑ –æ–ø—Ç–∏–º—ñ–∑–æ–≤–∞–Ω–∏–º–∏ –ø–∞—Ä–∞–º–µ—Ç—Ä–∞–º–∏.)