In [5]:
import pandas as pd
import numpy as np
import random

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier
import pickle

def generate_data(n=1000):
    data = {
        'Age': np.random.randint(18, 65, n),
        'Gender': np.random.choice(['Мужчина', 'Женщина'], n),
        'Income': np.random.normal(250000, 75000, n).astype(int),
        'Spouse_income': np.random.choice([0, 100000, 150000, 200000], n, p=[0.6, 0.2, 0.15, 0.05]),
        'Marital_status': np.random.choice(['Женат/Замужем', 'Не женат/не замужем'], n),
        'Work_experience': np.random.randint(0, 40, n),
        'Children': np.random.randint(0, 4, n),
        'Loan_amount': np.random.randint(100000, 2000000, n),
        'Loan_term_months': np.random.choice([6, 12, 24, 36, 48, 60], n, p=[0.05, 0.1, 0.25, 0.3, 0.2, 0.1]),
        'Has_mortgage': np.random.choice([0, 1], n),
        'Has_car': np.random.choice([0, 1], n),
        'Credit_history': np.random.choice(['Хорошая', 'Плохая', 'Нет'], n, p=[0.7, 0.2, 0.1]),
        'Education': np.random.choice(['Среднее', 'Высшее', 'Магистратура', 'Другое'], n),
        'Region': np.random.choice(['Алматы', 'Астана', 'Шымкент', 'Другой'], n),
        'Employment_type': np.random.choice(['Сотрудник', 'Фрилансер', 'Безработный'], n),
        'Housing_type': np.random.choice(['Собственное', 'Аренда'], n),
        'Current_loans': np.random.randint(0, 5, n),
    }

    df = pd.DataFrame(data)

    df['adjusted_income'] = df['Income'] + df['Spouse_income'] - df['Children'] * 30000
    df['Monthly_payment'] = (df['Loan_amount'] / df['Loan_term_months']).astype(int)
    df['Payment_to_income_ratio'] = df['Monthly_payment'] / (df['adjusted_income'] + 1)

    def calc_target(row):
        score = 0
        score += (row['adjusted_income'] > 200000)
        score += (row['Credit_history'] == 'Хорошая')
        score += (row['Work_experience'] > 5)
        score += (row['Has_mortgage'] == 0)
        score += (row['Current_loans'] == 0)
        score += (row['Employment_type'] == 'Сотрудник')
        score += (row['Housing_type'] == 'Собственное')
        score += (row['Loan_amount'] < 1000000)
        score += (row['Payment_to_income_ratio'] < 0.3)  # Новое условие
        return 1 if score >= 5 else 0

    df['Target'] = df.apply(calc_target, axis=1)
    return df

df = generate_data(2000)

categorical_cols = [
    'Gender', 'Credit_history', 'Education', 'Region',
    'Marital_status', 'Employment_type', 'Housing_type'
]

encoders = {}
for col in categorical_cols:
    enc = LabelEncoder()
    df[col] = enc.fit_transform(df[col])
    encoders[col] = enc

X = df.drop(['Target', 'Income'], axis=1)
y = df['Target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
model.fit(X_train, y_train)

acc = model.score(X_test, y_test)
print(f"🎯 Accuracy модели: {acc:.2f}")

import os
os.makedirs("model_v2", exist_ok=True)

with open("model_v2/xgb_model.pkl", "wb") as f:
    pickle.dump(model, f)

with open("model_v2/encoders.pkl", "wb") as f:
    pickle.dump(encoders, f)


🎯 Accuracy модели: 0.98


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
