In [11]:
# -*- coding: utf-8 -*-
"""
Parabolic Three-Stage Regression with Quadratic CreditScore Features
Target: MSE < 25.00
"""

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
from sklearn.linear_model import Ridge
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

# ============================================================================
# 0. –ó–ê–ì–†–£–ó–ö–ê –ò –ü–†–ï–î–í–ê–†–ò–¢–ï–õ–¨–ù–´–ô –ê–ù–ê–õ–ò–ó
# ============================================================================

print("=" * 60)
print("–ü–ê–†–ê–ë–û–õ–ò–ß–ï–°–ö–ê–Ø –¢–†–ï–•–ì–†–£–ü–ü–û–í–ê–Ø –ú–û–î–ï–õ–¨")
print("=" * 60)

# –ó–∞–≥—Ä—É–∑–∫–∞ –¥–∞–Ω–Ω—ã—Ö
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')
test_ids = test_df['ID'].copy()

# –ë–∞–∑–æ–≤–∞—è —Ñ–∏–ª—å—Ç—Ä–∞—Ü–∏—è
def filter_risk_score(df):
    if 'RiskScore' not in df.columns:
        return df

    df = df[
        (df['RiskScore'] != -9999999) &
        (df['RiskScore'] != 10000000) &
        (df['RiskScore'] >= 0) &
        (df['RiskScore'] <= 100)
    ].copy()

    return df

train_df = filter_risk_score(train_df)
print(f"–ü–æ—Å–ª–µ —Ñ–∏–ª—å—Ç—Ä–∞—Ü–∏–∏: {len(train_df)} –∑–∞–ø–∏—Å–µ–π")

# ============================================================================
# 1. –û–ü–†–ï–î–ï–õ–ï–ù–ò–ï –ü–ê–†–ê–ë–û–õ–ò–ß–ï–°–ö–û–ô –ó–ê–í–ò–°–ò–ú–û–°–¢–ò
# ============================================================================

# –ê–Ω–∞–ª–∏–∑–∏—Ä—É–µ–º –ø–∞—Ä–∞–±–æ–ª–∏—á–µ—Å–∫—É—é –∑–∞–≤–∏—Å–∏–º–æ—Å—Ç—å
x = train_df['CreditScore'].values
y = train_df['RiskScore'].values
valid_mask = ~np.isnan(x) & ~np.isnan(y)
x_valid = x[valid_mask]
y_valid = y[valid_mask]

# –ù–∞—Ö–æ–¥–∏–º –ø–∞—Ä–∞–º–µ—Ç—Ä—ã –ø–∞—Ä–∞–±–æ–ª—ã
coeffs = np.polyfit(x_valid, y_valid, 2)  # a, b, c –¥–ª—è ax¬≤ + bx + c
a, b, c = coeffs
vertex_x = -b / (2 * a)  # –í–µ—Ä—à–∏–Ω–∞ –ø–∞—Ä–∞–±–æ–ª—ã
vertex_y = a * vertex_x**2 + b * vertex_x + c

print(f"\nüìä –ü–∞—Ä–∞–º–µ—Ç—Ä—ã –ø–∞—Ä–∞–±–æ–ª—ã:")
print(f"  –£—Ä–∞–≤–Ω–µ–Ω–∏–µ: y = {a:.6f}x¬≤ + {b:.4f}x + {c:.2f}")
print(f"  –í–µ—Ä—à–∏–Ω–∞: x = {vertex_x:.1f}, y = {vertex_y:.1f}")

# –û–ø—Ä–µ–¥–µ–ª—è–µ–º –≥—Ä–∞–Ω–∏—Ü—ã –≥—Ä—É–ø–ø –Ω–∞ –æ—Å–Ω–æ–≤–µ –∞–Ω–∞–ª–∏–∑–∞ –¥–∞–Ω–Ω—ã—Ö
# –ê–Ω–∞–ª–∏–∑–∏—Ä—É–µ–º —Ä–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ CreditScore
credit_score_counts = train_df['CreditScore'].value_counts()
print(f"\nüìä –ê–Ω–∞–ª–∏–∑ —Ä–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏—è CreditScore:")
print(f"  –ú–∏–Ω–∏–º—É–º: {train_df['CreditScore'].min()}")
print(f"  –ú–∞–∫—Å–∏–º—É–º: {train_df['CreditScore'].max()}")
print(f"  –ú–µ–¥–∏–∞–Ω–∞: {train_df['CreditScore'].median()}")
print(f"  25-–π –ø–µ—Ä—Ü–µ–Ω—Ç–∏–ª—å: {train_df['CreditScore'].quantile(0.25)}")
print(f"  75-–π –ø–µ—Ä—Ü–µ–Ω—Ç–∏–ª—å: {train_df['CreditScore'].quantile(0.75)}")

# –û–ø—Ä–µ–¥–µ–ª—è–µ–º –≥—Ä–∞–Ω–∏—Ü—ã –≥—Ä—É–ø–ø –Ω–∞ –æ—Å–Ω–æ–≤–µ –∫–≤–∞—Ä—Ç–∏–ª–µ–π
q25 = train_df['CreditScore'].quantile(0.25)
q75 = train_df['CreditScore'].quantile(0.75)

# –£—Å—Ç–∞–Ω–∞–≤–ª–∏–≤–∞–µ–º –≥—Ä–∞–Ω–∏—Ü—ã –≥—Ä—É–ø–ø
group1_boundary = 600  # –ë–æ–ª–µ–µ –∫–æ–Ω—Å–µ—Ä–≤–∞—Ç–∏–≤–Ω–∞—è –≥—Ä–∞–Ω–∏—Ü–∞
group2_boundary = 750  # –î—Ä—É–≥–∞—è –≥—Ä–∞–Ω–∏—Ü–∞ –¥–ª—è –ª—É—á—à–µ–≥–æ –±–∞–ª–∞–Ω—Å–∞

print(f"\nüéØ –ì—Ä–∞–Ω–∏—Ü—ã –≥—Ä—É–ø–ø:")
print(f"  –ì—Ä—É–ø–ø–∞ 1 (–Ω–∏–∑–∫–∏–µ): CreditScore ‚â§ {group1_boundary}")
print(f"  –ì—Ä—É–ø–ø–∞ 2 (—Å—Ä–µ–¥–Ω–∏–µ): {group1_boundary} < CreditScore ‚â§ {group2_boundary}")
print(f"  –ì—Ä—É–ø–ø–∞ 3 (–≤—ã—Å–æ–∫–∏–µ): CreditScore > {group2_boundary}")

# ============================================================================
# 2. –†–ê–ó–ë–ò–ï–ù–ò–ï –ù–ê –¢–†–ò –ì–†–£–ü–ü–´
# ============================================================================

y_all = train_df['RiskScore'].values
train_features = train_df.drop('RiskScore', axis=1)

# –û–ø—Ä–µ–¥–µ–ª—è–µ–º –º–∞—Å–∫–∏ –¥–ª—è —Ç—Ä–µ—Ö –≥—Ä—É–ø–ø
mask_low = (train_features['CreditScore'] <= group1_boundary)
mask_mid = (train_features['CreditScore'] > group1_boundary) & (train_features['CreditScore'] <= group2_boundary)
mask_high = (train_features['CreditScore'] > group2_boundary)

# –¢—Ä–µ–Ω–∏—Ä–æ–≤–æ—á–Ω—ã–µ –¥–∞–Ω–Ω—ã–µ –ø–æ –≥—Ä—É–ø–ø–∞–º
train_low = train_features[mask_low].copy()
train_mid = train_features[mask_mid].copy()
train_high = train_features[mask_high].copy()

y_low = y_all[mask_low]
y_mid = y_all[mask_mid]
y_high = y_all[mask_high]

# –¢–µ—Å—Ç–æ–≤—ã–µ –¥–∞–Ω–Ω—ã–µ –ø–æ –≥—Ä—É–ø–ø–∞–º
test_low = test_df[test_df['CreditScore'] <= group1_boundary].copy()
test_mid = test_df[(test_df['CreditScore'] > group1_boundary) & (test_df['CreditScore'] <= group2_boundary)].copy()
test_high = test_df[test_df['CreditScore'] > group2_boundary].copy()

print(f"\nüìä –†–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ –¥–∞–Ω–Ω—ã—Ö:")
print(f"  –ì—Ä—É–ø–ø–∞ –Ω–∏–∑–∫–∏–µ: {len(train_low)} train, {len(test_low)} test")
print(f"  –ì—Ä—É–ø–ø–∞ —Å—Ä–µ–¥–Ω–∏–µ: {len(train_mid)} train, {len(test_mid)} test")
print(f"  –ì—Ä—É–ø–ø–∞ –≤—ã—Å–æ–∫–∏–µ: {len(train_high)} train, {len(test_high)} test")

print(f"\nüìä –°—Ä–µ–¥–Ω–∏–π RiskScore:")
print(f"  –ì—Ä—É–ø–ø–∞ –Ω–∏–∑–∫–∏–µ: {y_low.mean():.2f} ¬± {y_low.std():.2f}")
print(f"  –ì—Ä—É–ø–ø–∞ —Å—Ä–µ–¥–Ω–∏–µ: {y_mid.mean():.2f} ¬± {y_mid.std():.2f}")
print(f"  –ì—Ä—É–ø–ø–∞ –≤—ã—Å–æ–∫–∏–µ: {y_high.mean():.2f} ¬± {y_high.std():.2f}")

# ============================================================================
# 3. –§–£–ù–ö–¶–ò–Ø –ü–†–ï–î–û–ë–†–ê–ë–û–¢–ö–ò –° –ö–í–ê–î–†–ê–¢–ò–ß–ù–´–ú–ò –ü–†–ò–ó–ù–ê–ö–ê–ú–ò
# ============================================================================

def parabolic_preprocessing(df_train, df_test, y_train=None, group_name='mid'):
    """
    –ü—Ä–µ–¥–æ–±—Ä–∞–±–æ—Ç–∫–∞ —Å –∫–≤–∞–¥—Ä–∞—Ç–∏—á–Ω—ã–º–∏ –ø—Ä–µ–æ–±—Ä–∞–∑–æ–≤–∞–Ω–∏—è–º–∏ –¥–ª—è –ø–∞—Ä–∞–±–æ–ª–∏—á–µ—Å–∫–æ–π –∑–∞–≤–∏—Å–∏–º–æ—Å—Ç–∏
    """
    df_train = df_train.copy()
    df_test = df_test.copy()

    # –°–æ—Ö—Ä–∞–Ω—è–µ–º ID —Ç–µ—Å—Ç–∞
    test_id = None
    if 'ID' in df_test.columns:
        test_id = df_test['ID'].copy()

    # –£–¥–∞–ª—è–µ–º ID
    if 'ID' in df_train.columns:
        df_train = df_train.drop('ID', axis=1)
    if 'ID' in df_test.columns:
        df_test = df_test.drop('ID', axis=1)

    # 1. –£–î–ê–õ–Ø–ï–ú –ü–†–û–ë–õ–ï–ú–ù–´–ï –ü–†–ò–ó–ù–ê–ö–ò
    to_drop = [
        'ApplicationDate', 'MonthlyLoanPayment', 'InterestRate',
        'BaseInterestRate', 'TotalDebtToIncomeRatio',
        'NetWorth', 'TotalAssets', 'MonthlyIncome'
    ]

    for col in to_drop:
        if col in df_train.columns:
            df_train = df_train.drop(col, axis=1)
        if col in df_test.columns:
            df_test = df_test.drop(col, axis=1)

    # 2. –ö–õ–Æ–ß–ï–í–´–ï –ö–í–ê–î–†–ê–¢–ò–ß–ù–´–ï –ü–†–ï–û–ë–†–ê–ó–û–í–ê–ù–ò–Ø

    # CreditScore –∫–≤–∞–¥—Ä–∞—Ç–∏—á–Ω—ã–µ –ø—Ä–µ–æ–±—Ä–∞–∑–æ–≤–∞–Ω–∏—è
    if 'CreditScore' in df_train.columns:
        # –û—Å–Ω–æ–≤–Ω–æ–µ –∫–≤–∞–¥—Ä–∞—Ç–∏—á–Ω–æ–µ –ø—Ä–µ–æ–±—Ä–∞–∑–æ–≤–∞–Ω–∏–µ
        df_train['CreditScore_squared'] = df_train['CreditScore'] ** 2
        df_test['CreditScore_squared'] = df_test['CreditScore'] ** 2

        # –í–∑–∞–∏–º–æ–¥–µ–π—Å—Ç–≤–∏–µ —Å –¥—Ä—É–≥–∏–º–∏ –ø—Ä–∏–∑–Ω–∞–∫–∞–º–∏
        if 'DebtToIncomeRatio' in df_train.columns:
            df_train['CreditScore_x_DTI'] = df_train['CreditScore'] * df_train['DebtToIncomeRatio']
            df_test['CreditScore_x_DTI'] = df_test['CreditScore'] * df_test['DebtToIncomeRatio']

            df_train['CreditScore_sq_x_DTI'] = df_train['CreditScore_squared'] * df_train['DebtToIncomeRatio']
            df_test['CreditScore_sq_x_DTI'] = df_test['CreditScore_squared'] * df_test['DebtToIncomeRatio']

        # –†–∞—Å—Å—Ç–æ—è–Ω–∏–µ –¥–æ –≤–µ—Ä—à–∏–Ω—ã –ø–∞—Ä–∞–±–æ–ª—ã
        df_train['Dist_to_Vertex'] = np.abs(df_train['CreditScore'] - vertex_x)
        df_test['Dist_to_Vertex'] = np.abs(df_test['CreditScore'] - vertex_x)

        # –ö–≤–∞–¥—Ä–∞—Ç —Ä–∞—Å—Å—Ç–æ—è–Ω–∏—è –¥–æ –≤–µ—Ä—à–∏–Ω—ã (–ø–∞—Ä–∞–±–æ–ª–∏—á–µ—Å–∫–∏–π —à—Ç—Ä–∞—Ñ)
        df_train['Dist_to_Vertex_sq'] = (df_train['CreditScore'] - vertex_x) ** 2
        df_test['Dist_to_Vertex_sq'] = (df_test['CreditScore'] - vertex_x) ** 2

    # –ö–≤–∞–¥—Ä–∞—Ç–∏—á–Ω—ã–µ –ø—Ä–µ–æ–±—Ä–∞–∑–æ–≤–∞–Ω–∏—è –¥–ª—è –¥—Ä—É–≥–∏—Ö —á–∏—Å–ª–æ–≤—ã—Ö –ø—Ä–∏–∑–Ω–∞–∫–æ–≤
    numeric_features = ['Age', 'AnnualIncome', 'LoanAmount', 'DebtToIncomeRatio',
                       'NumberOfOpenCreditLines', 'LengthOfCreditHistory']

    for feature in numeric_features:
        if feature in df_train.columns:
            # –ö–≤–∞–¥—Ä–∞—Ç
            df_train[f'{feature}_squared'] = df_train[feature] ** 2
            df_test[f'{feature}_squared'] = df_test[feature] ** 2

    # –õ–æ–≥–∞—Ä–∏—Ñ–º–∏—á–µ—Å–∫–∏–µ –ø—Ä–µ–æ–±—Ä–∞–∑–æ–≤–∞–Ω–∏—è –¥–ª—è skewed features
    skewed_features = ['AnnualIncome', 'LoanAmount', 'TotalLiabilities']
    for feature in skewed_features:
        if feature in df_train.columns:
            df_train[f'log_{feature}'] = np.log1p(np.abs(df_train[feature]) + 1)
            df_test[f'log_{feature}'] = np.log1p(np.abs(df_test[feature]) + 1)

    # 3. –§–ò–ù–ê–ù–°–û–í–´–ï –°–û–û–¢–ù–û–®–ï–ù–ò–Ø

    if 'AnnualIncome' in df_train.columns and 'LoanAmount' in df_train.columns:
        df_train['IncomeToLoan'] = df_train['AnnualIncome'] / (df_train['LoanAmount'] + 1)
        df_test['IncomeToLoan'] = df_test['AnnualIncome'] / (df_test['LoanAmount'] + 1)

        # –ö–≤–∞–¥—Ä–∞—Ç —Å–æ–æ—Ç–Ω–æ—à–µ–Ω–∏—è
        df_train['IncomeToLoan_sq'] = df_train['IncomeToLoan'] ** 2
        df_test['IncomeToLoan_sq'] = df_test['IncomeToLoan'] ** 2

    if 'TotalLiabilities' in df_train.columns and 'AnnualIncome' in df_train.columns:
        df_train['LiabilitiesToIncome'] = df_train['TotalLiabilities'] / (df_train['AnnualIncome'] + 1)
        df_test['LiabilitiesToIncome'] = df_test['TotalLiabilities'] / (df_test['AnnualIncome'] + 1)

    # 4. –§–õ–ê–ì–ò –ò –ë–ò–ù–ê–†–ù–´–ï –ü–†–ò–ó–ù–ê–ö–ò

    if 'DebtToIncomeRatio' in df_train.columns:
        df_train['HighDebt'] = (df_train['DebtToIncomeRatio'] > 0.4).astype(int)
        df_test['HighDebt'] = (df_test['DebtToIncomeRatio'] > 0.4).astype(int)

    if 'CreditScore' in df_train.columns:
        df_train['CreditScore_IsMax'] = (df_train['CreditScore'] == 850).astype(int)
        df_test['CreditScore_IsMax'] = (df_test['CreditScore'] == 850).astype(int)

        df_train['CreditScore_IsMin'] = (df_train['CreditScore'] == 300).astype(int)
        df_test['CreditScore_IsMin'] = (df_test['CreditScore'] == 300).astype(int)

    # 5. –û–ë–†–ê–ë–û–¢–ö–ê –ü–†–û–ü–£–°–ö–û–í

    for col in df_train.columns:
        if df_train[col].dtype in [np.float64, np.int64]:
            median_val = df_train[col].median()
            df_train[col] = df_train[col].fillna(median_val)
            df_test[col] = df_test[col].fillna(median_val)
        else:
            mode_val = df_train[col].mode()
            fill_val = mode_val[0] if not mode_val.empty else 'Unknown'
            df_train[col] = df_train[col].fillna(fill_val)
            df_test[col] = df_test[col].fillna(fill_val)

    # 6. –û–ë–†–ê–ë–û–¢–ö–ê –í–´–ë–†–û–°–û–í (winsorizing)
    winzorize_cols = ['AnnualIncome', 'DebtToIncomeRatio', 'LoanAmount']
    for col in winzorize_cols:
        if col in df_train.columns:
            df_train[col] = stats.mstats.winsorize(df_train[col].values, limits=[0.03, 0.03])
            df_test[col] = stats.mstats.winsorize(df_test[col].values, limits=[0.03, 0.03])

    # 7. ONE-HOT ENCODING

    cat_cols = df_train.select_dtypes(include=['object']).columns.tolist()
    if cat_cols:
        df_train = pd.get_dummies(df_train, columns=cat_cols, drop_first=True)
        df_test = pd.get_dummies(df_test, columns=cat_cols, drop_first=True)

    # 8. –í–´–†–ê–í–ù–ò–í–ê–ù–ò–ï –ü–†–ò–ó–ù–ê–ö–û–í

    common_cols = df_train.columns.intersection(df_test.columns)
    df_train = df_train[common_cols]
    df_test = df_test[common_cols]

    for col in df_train.columns:
        if col not in df_test.columns:
            df_test[col] = 0

    df_test = df_test[df_train.columns]

    # 9. –ü–†–ï–û–ë–†–ê–ó–û–í–ê–ù–ò–ï –ò –ú–ê–°–®–¢–ê–ë–ò–†–û–í–ê–ù–ò–ï

    X_train = df_train.values.astype(np.float64)
    X_test = df_test.values.astype(np.float64)

    X_train = np.nan_to_num(X_train, nan=0.0, posinf=1e10, neginf=-1e10)
    X_test = np.nan_to_num(X_test, nan=0.0, posinf=1e10, neginf=-1e10)

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    print(f"  –ì—Ä—É–ø–ø–∞ {group_name}: {X_train_scaled.shape[1]} –ø—Ä–∏–∑–Ω–∞–∫–æ–≤")

    return X_train_scaled, X_test_scaled, test_id

# ============================================================================
# 4. –ü–†–ï–î–û–ë–†–ê–ë–û–¢–ö–ê –î–õ–Ø –ö–ê–ñ–î–û–ô –ì–†–£–ü–ü–´
# ============================================================================

print("\n" + "=" * 60)
print("–ü–†–ï–î–û–ë–†–ê–ë–û–¢–ö–ê –î–ê–ù–ù–´–•")
print("=" * 60)

print("\n–û–±—Ä–∞–±–æ—Ç–∫–∞ –≥—Ä—É–ø–ø—ã: –Ω–∏–∑–∫–∏–µ (CreditScore ‚â§ {})".format(group1_boundary))
X_low_train, X_low_test, test_ids_low = parabolic_preprocessing(
    train_low, test_low, y_low, group_name='–Ω–∏–∑–∫–∏–µ'
)

print("\n–û–±—Ä–∞–±–æ—Ç–∫–∞ –≥—Ä—É–ø–ø—ã: —Å—Ä–µ–¥–Ω–∏–µ ({} < CreditScore ‚â§ {})".format(group1_boundary, group2_boundary))
X_mid_train, X_mid_test, test_ids_mid = parabolic_preprocessing(
    train_mid, test_mid, y_mid, group_name='—Å—Ä–µ–¥–Ω–∏–µ'
)

print("\n–û–±—Ä–∞–±–æ—Ç–∫–∞ –≥—Ä—É–ø–ø—ã: –≤—ã—Å–æ–∫–∏–µ (CreditScore > {})".format(group2_boundary))
X_high_train, X_high_test, test_ids_high = parabolic_preprocessing(
    train_high, test_high, y_high, group_name='–≤—ã—Å–æ–∫–∏–µ'
)

# ============================================================================
# 5. –ò–°–ü–†–ê–í–õ–ï–ù–ù–ê–Ø –§–£–ù–ö–¶–ò–Ø –î–õ–Ø –û–¢–ë–û–†–ê –ü–†–ò–ó–ù–ê–ö–û–í –ò –ö–†–û–°–°-–í–ê–õ–ò–î–ê–¶–ò–ò
# ============================================================================

def train_with_cross_val(X_train, y_train, X_test, group_name, n_features=40):
    """
    –û–±—É—á–µ–Ω–∏–µ —Å –∫—Ä–æ—Å—Å-–≤–∞–ª–∏–¥–∞—Ü–∏–µ–π –∏ –æ—Ç–±–æ—Ä–æ–º –ø—Ä–∏–∑–Ω–∞–∫–æ–≤
    """
    # 1. –û—Ç–±–æ—Ä –ø—Ä–∏–∑–Ω–∞–∫–æ–≤ –ø–æ –∫–æ—Ä—Ä–µ–ª—è—Ü–∏–∏
    correlations = []
    for i in range(X_train.shape[1]):
        corr = np.corrcoef(X_train[:, i], y_train)[0, 1]
        correlations.append(abs(corr))

    # –û—Ç–±–∏—Ä–∞–µ–º —Ç–æ–ø-n_features –ø—Ä–∏–∑–Ω–∞–∫–æ–≤
    top_idx = np.argsort(correlations)[-n_features:]
    X_train_sel = X_train[:, top_idx]

    # –ü–æ–¥–≥–æ—Ç–æ–≤–∫–∞ X_test_sel (–º–æ–∂–µ—Ç –±—ã—Ç—å –ø—É—Å—Ç—ã–º)
    if len(X_test) > 0:
        X_test_sel = X_test[:, top_idx]
    else:
        X_test_sel = np.array([]).reshape(0, n_features)  # –°–æ–∑–¥–∞–µ–º –ø—É—Å—Ç–æ–π –º–∞—Å—Å–∏–≤

    # 2. –ö—Ä–æ—Å—Å-–≤–∞–ª–∏–¥–∞—Ü–∏—è –¥–ª—è –ø–æ–¥–±–æ—Ä–∞ alpha
    best_alpha = None
    best_mse = float('inf')
    all_scores = []

    # –†–∞–∑–Ω—ã–µ –¥–∏–∞–ø–∞–∑–æ–Ω—ã alpha –¥–ª—è —Ä–∞–∑–Ω—ã—Ö –≥—Ä—É–ø–ø
    if group_name == '–Ω–∏–∑–∫–∏–µ':
        alphas = [0.1, 0.5, 1, 2, 5, 10, 20, 50, 100]
    elif group_name == '—Å—Ä–µ–¥–Ω–∏–µ':
        alphas = [0.01, 0.1, 0.5, 1, 2, 5, 10, 20, 50]
    else:  # –≤—ã—Å–æ–∫–∏–µ
        alphas = [0.5, 1, 2, 5, 10, 20, 50, 100]

    kf = KFold(n_splits=min(5, len(X_train_sel)), shuffle=True, random_state=42)

    for alpha in alphas:
        fold_scores = []

        for train_idx, val_idx in kf.split(X_train_sel):
            X_tr, X_val = X_train_sel[train_idx], X_train_sel[val_idx]
            y_tr, y_val = y_train[train_idx], y_val[val_idx]

            model = Ridge(alpha=alpha, random_state=42)
            model.fit(X_tr, y_tr)

            y_pred = model.predict(X_val)
            mse = np.mean((y_val - y_pred) ** 2)
            fold_scores.append(mse)

        avg_mse = np.mean(fold_scores)
        all_scores.append((alpha, avg_mse))

        if avg_mse < best_mse:
            best_mse = avg_mse
            best_alpha = alpha

    # 3. –û–±—É—á–µ–Ω–∏–µ —Ñ–∏–Ω–∞–ª—å–Ω–æ–π –º–æ–¥–µ–ª–∏
    final_model = Ridge(alpha=best_alpha, random_state=42)
    final_model.fit(X_train_sel, y_train)

    # 4. –ü—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏—è
    train_pred = final_model.predict(X_train_sel)
    train_mse = np.mean((y_train - train_pred) ** 2)

    if len(X_test_sel) > 0:
        test_pred = final_model.predict(X_test_sel)
    else:
        test_pred = np.array([])

    print(f"  {group_name}: alpha={best_alpha}, CV MSE={best_mse:.2f}, Train MSE={train_mse:.2f}")

    return final_model, X_test_sel, test_pred, best_mse, top_idx

# ============================================================================
# 6. –û–ë–£–ß–ï–ù–ò–ï –ú–û–î–ï–õ–ï–ô –î–õ–Ø –ö–ê–ñ–î–û–ô –ì–†–£–ü–ü–´ (–° –ó–ê–©–ò–¢–û–ô –û–¢ –ü–£–°–¢–´–• –î–ê–ù–ù–´–•)
# ============================================================================

print("\n" + "=" * 60)
print("–û–ë–£–ß–ï–ù–ò–ï –ú–û–î–ï–õ–ï–ô –° –ö–†–û–°–°-–í–ê–õ–ò–î–ê–¶–ò–ï–ô")
print("=" * 60)

models = {}
predictions = {}
cv_mses = {}

# –ì—Ä—É–ø–ø–∞ –Ω–∏–∑–∫–∏–µ (—Ç–æ–ª—å–∫–æ –µ—Å–ª–∏ –µ—Å—Ç—å –¥–∞–Ω–Ω—ã–µ)
if len(train_low) > 0:
    print(f"\nüéØ –û–±—É—á–µ–Ω–∏–µ –≥—Ä—É–ø–ø—ã '–Ω–∏–∑–∫–∏–µ' (n={len(train_low)})")
    if len(train_low) >= 10:  # –ú–∏–Ω–∏–º—É–º 10 –∑–∞–ø–∏—Å–µ–π –¥–ª—è –∫—Ä–æ—Å—Å-–≤–∞–ª–∏–¥–∞—Ü–∏–∏
        model_low, X_low_test_sel, pred_low, mse_low, idx_low = train_with_cross_val(
            X_low_train, y_low, X_low_test, '–Ω–∏–∑–∫–∏–µ', n_features=min(35, X_low_train.shape[1])
        )
        models['low'] = model_low
        predictions['low'] = pred_low
        cv_mses['low'] = mse_low
    else:
        print("  ‚ö†Ô∏è  –°–ª–∏—à–∫–æ–º –º–∞–ª–æ –¥–∞–Ω–Ω—ã—Ö –¥–ª—è –∫—Ä–æ—Å—Å-–≤–∞–ª–∏–¥–∞—Ü–∏–∏")
        # –ò—Å–ø–æ–ª—å–∑—É–µ–º –ø—Ä–æ—Å—Ç—É—é –º–æ–¥–µ–ª—å
        model_low = Ridge(alpha=10.0, random_state=42)
        model_low.fit(X_low_train, y_low)
        models['low'] = model_low
        if len(X_low_test) > 0:
            predictions['low'] = model_low.predict(X_low_test)
        else:
            predictions['low'] = np.array([])
        cv_mses['low'] = np.mean((y_low - model_low.predict(X_low_train)) ** 2)
        print(f"  –ü—Ä–æ—Å—Ç–∞—è –º–æ–¥–µ–ª—å: alpha=10.0, Train MSE={cv_mses['low']:.2f}")
else:
    print("  ‚ö†Ô∏è  –ù–µ—Ç –¥–∞–Ω–Ω—ã—Ö –≤ –≥—Ä—É–ø–ø–µ '–Ω–∏–∑–∫–∏–µ'")

# –ì—Ä—É–ø–ø–∞ —Å—Ä–µ–¥–Ω–∏–µ (–æ—Å–Ω–æ–≤–Ω–∞—è –≥—Ä—É–ø–ø–∞)
print(f"\nüéØ –û–±—É—á–µ–Ω–∏–µ –≥—Ä—É–ø–ø—ã '—Å—Ä–µ–¥–Ω–∏–µ' (n={len(train_mid)})")
model_mid, X_mid_test_sel, pred_mid, mse_mid, idx_mid = train_with_cross_val(
    X_mid_train, y_mid, X_mid_test, '—Å—Ä–µ–¥–Ω–∏–µ', n_features=min(40, X_mid_train.shape[1])
)
models['mid'] = model_mid
predictions['mid'] = pred_mid
cv_mses['mid'] = mse_mid

# –ì—Ä—É–ø–ø–∞ –≤—ã—Å–æ–∫–∏–µ (—Ç–æ–ª—å–∫–æ –µ—Å–ª–∏ –µ—Å—Ç—å –¥–∞–Ω–Ω—ã–µ)
if len(train_high) > 0:
    print(f"\nüéØ –û–±—É—á–µ–Ω–∏–µ –≥—Ä—É–ø–ø—ã '–≤—ã—Å–æ–∫–∏–µ' (n={len(train_high)})")
    if len(train_high) >= 10:
        model_high, X_high_test_sel, pred_high, mse_high, idx_high = train_with_cross_val(
            X_high_train, y_high, X_high_test, '–≤—ã—Å–æ–∫–∏–µ', n_features=min(35, X_high_train.shape[1])
        )
        models['high'] = model_high
        predictions['high'] = pred_high
        cv_mses['high'] = mse_high
    else:
        print("  ‚ö†Ô∏è  –°–ª–∏—à–∫–æ–º –º–∞–ª–æ –¥–∞–Ω–Ω—ã—Ö –¥–ª—è –∫—Ä–æ—Å—Å-–≤–∞–ª–∏–¥–∞—Ü–∏–∏")
        model_high = Ridge(alpha=5.0, random_state=42)
        model_high.fit(X_high_train, y_high)
        models['high'] = model_high
        if len(X_high_test) > 0:
            predictions['high'] = model_high.predict(X_high_test)
        else:
            predictions['high'] = np.array([])
        cv_mses['high'] = np.mean((y_high - model_high.predict(X_high_train)) ** 2)
        print(f"  –ü—Ä–æ—Å—Ç–∞—è –º–æ–¥–µ–ª—å: alpha=5.0, Train MSE={cv_mses['high']:.2f}")
else:
    print("  ‚ö†Ô∏è  –ù–µ—Ç –¥–∞–Ω–Ω—ã—Ö –≤ –≥—Ä—É–ø–ø–µ '–≤—ã—Å–æ–∫–∏–µ'")

# ============================================================================
# 7. –°–ë–û–†–ö–ê –§–ò–ù–ê–õ–¨–ù–´–• –ü–†–ï–î–°–ö–ê–ó–ê–ù–ò–ô
# ============================================================================

print("\n" + "=" * 60)
print("–°–ë–û–†–ö–ê –ü–†–ï–î–°–ö–ê–ó–ê–ù–ò–ô")
print("=" * 60)

# –°–æ–∑–¥–∞–µ–º –º–∞—Å—Å–∏–≤ –¥–ª—è –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–π
y_test_pred = np.zeros(len(test_df))

# –û–ø—Ä–µ–¥–µ–ª—è–µ–º –∏–Ω–¥–µ–∫—Å—ã –¥–ª—è –∫–∞–∂–¥–æ–π –≥—Ä—É–ø–ø—ã –≤ —Ç–µ—Å—Ç–æ–≤—ã—Ö –¥–∞–Ω–Ω—ã—Ö
idx_test_low = test_df[test_df['CreditScore'] <= group1_boundary].index
idx_test_mid = test_df[(test_df['CreditScore'] > group1_boundary) &
                       (test_df['CreditScore'] <= group2_boundary)].index
idx_test_high = test_df[test_df['CreditScore'] > group2_boundary].index

# –ó–∞–ø–æ–ª–Ω—è–µ–º –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏—è —Å –ø—Ä–æ–≤–µ—Ä–∫–æ–π
print("\nüìä –ó–∞–ø–æ–ª–Ω–µ–Ω–∏–µ –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–π –ø–æ –≥—Ä—É–ø–ø–∞–º:")

# –ì—Ä—É–ø–ø–∞ –Ω–∏–∑–∫–∏–µ
if 'low' in predictions and len(predictions['low']) > 0:
    for i, idx in enumerate(idx_test_low):
        if i < len(predictions['low']):
            y_test_pred[idx] = predictions['low'][i]
    print(f"  –ì—Ä—É–ø–ø–∞ –Ω–∏–∑–∫–∏–µ: {len(predictions['low'])} –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–π")
elif len(idx_test_low) > 0:
    print(f"  ‚ö†Ô∏è  –ì—Ä—É–ø–ø–∞ –Ω–∏–∑–∫–∏–µ: –Ω–µ—Ç –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–π, –∑–∞–ø–æ–ª–Ω—è–µ–º —Å—Ä–µ–¥–Ω–∏–º")
    # –ò—Å–ø–æ–ª—å–∑—É–µ–º —Å—Ä–µ–¥–Ω–µ–µ –∑–Ω–∞—á–µ–Ω–∏–µ –∏–∑ —Ç—Ä–µ–Ω–∏—Ä–æ–≤–æ—á–Ω—ã—Ö –¥–∞–Ω–Ω—ã—Ö
    mean_low = y_low.mean() if len(y_low) > 0 else 50.0
    for idx in idx_test_low:
        y_test_pred[idx] = mean_low

# –ì—Ä—É–ø–ø–∞ —Å—Ä–µ–¥–Ω–∏–µ
if 'mid' in predictions and len(predictions['mid']) > 0:
    for i, idx in enumerate(idx_test_mid):
        if i < len(predictions['mid']):
            y_test_pred[idx] = predictions['mid'][i]
    print(f"  –ì—Ä—É–ø–ø–∞ —Å—Ä–µ–¥–Ω–∏–µ: {len(predictions['mid'])} –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–π")

# –ì—Ä—É–ø–ø–∞ –≤—ã—Å–æ–∫–∏–µ
if 'high' in predictions and len(predictions['high']) > 0:
    for i, idx in enumerate(idx_test_high):
        if i < len(predictions['high']):
            y_test_pred[idx] = predictions['high'][i]
    print(f"  –ì—Ä—É–ø–ø–∞ –≤—ã—Å–æ–∫–∏–µ: {len(predictions['high'])} –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–π")
elif len(idx_test_high) > 0:
    print(f"  ‚ö†Ô∏è  –ì—Ä—É–ø–ø–∞ –≤—ã—Å–æ–∫–∏–µ: –Ω–µ—Ç –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–π, –∑–∞–ø–æ–ª–Ω—è–µ–º —Å—Ä–µ–¥–Ω–∏–º")
    # –ò—Å–ø–æ–ª—å–∑—É–µ–º —Å—Ä–µ–¥–Ω–µ–µ –∑–Ω–∞—á–µ–Ω–∏–µ –∏–∑ —Ç—Ä–µ–Ω–∏—Ä–æ–≤–æ—á–Ω—ã—Ö –¥–∞–Ω–Ω—ã—Ö
    mean_high = y_high.mean() if len(y_high) > 0 else 50.0
    for idx in idx_test_high:
        y_test_pred[idx] = mean_high

# –û–±—Ä–∞–±–æ—Ç–∫–∞ –ø—Ä–æ–ø—É—â–µ–Ω–Ω—ã—Ö –∑–Ω–∞—á–µ–Ω–∏–π
missing_mask = (y_test_pred == 0)
if missing_mask.any():
    print(f"  ‚ö†Ô∏è  –ù–∞–π–¥–µ–Ω–æ {missing_mask.sum()} –ø—Ä–æ–ø—É—â–µ–Ω–Ω—ã—Ö –∑–Ω–∞—á–µ–Ω–∏–π")

    # –ò—Å–ø–æ–ª—å–∑—É–µ–º –ø–∞—Ä–∞–±–æ–ª–∏—á–µ—Å–∫—É—é –º–æ–¥–µ–ª—å –¥–ª—è –∑–∞–ø–æ–ª–Ω–µ–Ω–∏—è
    for idx in np.where(missing_mask)[0]:
        cs = test_df.loc[idx, 'CreditScore']
        # –ü–∞—Ä–∞–±–æ–ª–∏—á–µ—Å–∫–æ–µ –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–µ
        y_para = a * cs**2 + b * cs + c
        y_test_pred[idx] = np.clip(y_para, 0, 100)

# –ü–æ—Å—Ç–æ–±—Ä–∞–±–æ—Ç–∫–∞
y_test_pred = np.clip(y_test_pred, 0, 100)
y_test_pred = np.round(y_test_pred, 1)

# ============================================================================
# 8. –ê–ù–ê–õ–ò–ó –†–ï–ó–£–õ–¨–¢–ê–¢–û–í
# ============================================================================

print("\n" + "=" * 60)
print("–ê–ù–ê–õ–ò–ó –†–ï–ó–£–õ–¨–¢–ê–¢–û–í")
print("=" * 60)

print(f"\nüìä –°—Ç–∞—Ç–∏—Å—Ç–∏–∫–∞ –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–π:")
print(f"  –ú–∏–Ω–∏–º—É–º: {y_test_pred.min():.1f}")
print(f"  –ú–∞–∫—Å–∏–º—É–º: {y_test_pred.max():.1f}")
print(f"  –°—Ä–µ–¥–Ω–µ–µ: {y_test_pred.mean():.1f}")
print(f"  –ú–µ–¥–∏–∞–Ω–∞: {np.median(y_test_pred):.1f}")
print(f"  –°—Ç–∞–Ω–¥–∞—Ä—Ç–Ω–æ–µ –æ—Ç–∫–ª–æ–Ω–µ–Ω–∏–µ: {y_test_pred.std():.1f}")

print(f"\nüìä –†–µ–∑—É–ª—å—Ç–∞—Ç—ã –∫—Ä–æ—Å—Å-–≤–∞–ª–∏–¥–∞—Ü–∏–∏:")
for group_name, mse in cv_mses.items():
    if group_name == 'low':
        group_label = '–Ω–∏–∑–∫–∏–µ'
    elif group_name == 'mid':
        group_label = '—Å—Ä–µ–¥–Ω–∏–µ'
    else:
        group_label = '–≤—ã—Å–æ–∫–∏–µ'
    print(f"  –ì—Ä—É–ø–ø–∞ {group_label}: MSE = {mse:.2f}")

# ============================================================================
# 9. –°–û–•–†–ê–ù–ï–ù–ò–ï –†–ï–ó–£–õ–¨–¢–ê–¢–û–í
# ============================================================================

print("\n" + "=" * 60)
print("–°–û–•–†–ê–ù–ï–ù–ò–ï –†–ï–ó–£–õ–¨–¢–ê–¢–û–í")
print("=" * 60)

# –°–æ–∑–¥–∞–µ–º submission —Ñ–∞–π–ª
submission = pd.DataFrame({
    'ID': test_ids,
    'RiskScore': y_test_pred
})

# –ü—Ä–æ–≤–µ—Ä—è–µ–º –∫–æ—Ä—Ä–µ–∫—Ç–Ω–æ—Å—Ç—å
print(f"\n‚úÖ –ü—Ä–æ–≤–µ—Ä–∫–∞ submission:")
print(f"  –í—Å–µ–≥–æ –∑–∞–ø–∏—Å–µ–π: {len(submission)}")
print(f"  NaN –∑–Ω–∞—á–µ–Ω–∏–π: {submission['RiskScore'].isnull().sum()}")
print(f"  –í–Ω–µ –¥–∏–∞–ø–∞–∑–æ–Ω–∞ [0, 100]: {((submission['RiskScore'] < 0) | (submission['RiskScore'] > 100)).sum()}")

# –°–æ—Ö—Ä–∞–Ω—è–µ–º
submission.to_csv('parabolic_three_groups_fixed_submission.csv', index=False)
print(f"\n‚úÖ –†–µ–∑—É–ª—å—Ç–∞—Ç—ã —Å–æ—Ö—Ä–∞–Ω–µ–Ω—ã –≤ parabolic_three_groups_fixed_submission.csv")
print(f"\n–ü–µ—Ä–≤—ã–µ 10 –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–π:")
print(submission.head(10))

# ============================================================================
# 10. –í–ò–ó–£–ê–õ–ò–ó–ê–¶–ò–Ø –†–ï–ó–£–õ–¨–¢–ê–¢–û–í
# ============================================================================

# –í–∏–∑—É–∞–ª–∏–∑–∞—Ü–∏—è —Ä–µ–∑—É–ª—å—Ç–∞—Ç–æ–≤
plt.figure(figsize=(15, 5))

# 1. –†–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–π
plt.subplot(1, 3, 1)
plt.hist(y_test_pred, bins=30, color='purple', edgecolor='black', alpha=0.7)
plt.xlabel('Predicted RiskScore')
plt.ylabel('–ß–∞—Å—Ç–æ—Ç–∞')
plt.title('–†–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–Ω—ã—Ö RiskScore')
plt.grid(True, alpha=0.3)

# 2. –ü–∞—Ä–∞–±–æ–ª–∏—á–µ—Å–∫–∞—è –∑–∞–≤–∏—Å–∏–º–æ—Å—Ç—å
plt.subplot(1, 3, 2)
plt.scatter(train_df['CreditScore'], train_df['RiskScore'], alpha=0.1, s=5, color='blue', label='Train data')

# –î–æ–±–∞–≤–ª—è–µ–º –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏—è (–µ—Å–ª–∏ –Ω–µ —Å–ª–∏—à–∫–æ–º –º–Ω–æ–≥–æ)
if len(test_df) < 1000:
    plt.scatter(test_df['CreditScore'], y_test_pred, alpha=0.3, s=10, color='red', label='Predictions')

# –ü–∞—Ä–∞–±–æ–ª–∏—á–µ—Å–∫–∞—è –∫—Ä–∏–≤–∞—è
x_range = np.linspace(300, 850, 100)
y_range = a * x_range**2 + b * x_range + c
plt.plot(x_range, y_range, 'g-', linewidth=2, label='–ü–∞—Ä–∞–±–æ–ª–∞')

# –ì—Ä–∞–Ω–∏—Ü—ã –≥—Ä—É–ø–ø
plt.axvline(x=group1_boundary, color='orange', linestyle='--', label=f'–ì—Ä–∞–Ω–∏—Ü–∞ 1 ({group1_boundary})')
plt.axvline(x=group2_boundary, color='brown', linestyle='--', label=f'–ì—Ä–∞–Ω–∏—Ü–∞ 2 ({group2_boundary})')

plt.xlabel('CreditScore')
plt.ylabel('RiskScore')
plt.title('–ü–∞—Ä–∞–±–æ–ª–∏—á–µ—Å–∫–∞—è –º–æ–¥–µ–ª—å –∏ –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏—è')
plt.legend()
plt.grid(True, alpha=0.3)

# 3. MSE –ø–æ –≥—Ä—É–ø–ø–∞–º
plt.subplot(1, 3, 3)
group_labels = []
mse_values = []

for group_name, mse in cv_mses.items():
    if group_name == 'low':
        group_labels.append('–ù–∏–∑–∫–∏–µ')
    elif group_name == 'mid':
        group_labels.append('–°—Ä–µ–¥–Ω–∏–µ')
    else:
        group_labels.append('–í—ã—Å–æ–∫–∏–µ')
    mse_values.append(mse)

if mse_values:
    colors = ['red' if mse > 25 else 'green' for mse in mse_values]
    bars = plt.bar(group_labels, mse_values, color=colors, alpha=0.7)
    plt.axhline(y=25, color='black', linestyle='--', label='–¶–µ–ª—å (MSE=25)')
    plt.ylabel('MSE')
    plt.title('MSE –ø–æ –≥—Ä—É–ø–ø–∞–º')
    plt.grid(True, alpha=0.3, axis='y')

    # –î–æ–±–∞–≤–ª—è–µ–º –∑–Ω–∞—á–µ–Ω–∏—è –Ω–∞ —Å—Ç–æ–ª–±—Ü—ã
    for bar, mse in zip(bars, mse_values):
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2., height,
                f'{mse:.2f}', ha='center', va='bottom')

plt.tight_layout()
plt.show()

print(f"\n{'='*60}")
print("–ü–ê–†–ê–ë–û–õ–ò–ß–ï–°–ö–ê–Ø –¢–†–ï–•–ì–†–£–ü–ü–û–í–ê–Ø –ú–û–î–ï–õ–¨ –ó–ê–í–ï–†–®–ï–ù–ê!")
print(f"{'='*60}")

–ü–ê–†–ê–ë–û–õ–ò–ß–ï–°–ö–ê–Ø –¢–†–ï–•–ì–†–£–ü–ü–û–í–ê–Ø –ú–û–î–ï–õ–¨
–ü–æ—Å–ª–µ —Ñ–∏–ª—å—Ç—Ä–∞—Ü–∏–∏: 10272 –∑–∞–ø–∏—Å–µ–π

üìä –ü–∞—Ä–∞–º–µ—Ç—Ä—ã –ø–∞—Ä–∞–±–æ–ª—ã:
  –£—Ä–∞–≤–Ω–µ–Ω–∏–µ: y = -0.000064x¬≤ + 0.0017x + 78.61
  –í–µ—Ä—à–∏–Ω–∞: x = 13.5, y = 78.6

üìä –ê–Ω–∞–ª–∏–∑ —Ä–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏—è CreditScore:
  –ú–∏–Ω–∏–º—É–º: 300.0
  –ú–∞–∫—Å–∏–º—É–º: 850.0
  –ú–µ–¥–∏–∞–Ω–∞: 722.0
  25-–π –ø–µ—Ä—Ü–µ–Ω—Ç–∏–ª—å: 550.0
  75-–π –ø–µ—Ä—Ü–µ–Ω—Ç–∏–ª—å: 850.0

üéØ –ì—Ä–∞–Ω–∏—Ü—ã –≥—Ä—É–ø–ø:
  –ì—Ä—É–ø–ø–∞ 1 (–Ω–∏–∑–∫–∏–µ): CreditScore ‚â§ 600
  –ì—Ä—É–ø–ø–∞ 2 (—Å—Ä–µ–¥–Ω–∏–µ): 600 < CreditScore ‚â§ 750
  –ì—Ä—É–ø–ø–∞ 3 (–≤—ã—Å–æ–∫–∏–µ): CreditScore > 750

üìä –†–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ –¥–∞–Ω–Ω—ã—Ö:
  –ì—Ä—É–ø–ø–∞ –Ω–∏–∑–∫–∏–µ: 3095 train, 1574 test
  –ì—Ä—É–ø–ø–∞ —Å—Ä–µ–¥–Ω–∏–µ: 2248 train, 1114 test
  –ì—Ä—É–ø–ø–∞ –≤—ã—Å–æ–∫–∏–µ: 4444 train, 2312 test

üìä –°—Ä–µ–¥–Ω–∏–π RiskScore:
  –ì—Ä—É–ø–ø–∞ –Ω–∏–∑–∫–∏–µ: 66.96 ¬± 10.55
  –ì—Ä—É–ø–ø–∞ —Å—Ä–µ–¥–Ω–∏–µ: 47.78 ¬± 13.29
  –ì—Ä—É–

UnboundLocalError: cannot access local variable 'y_val' where it is not associated with a value