<a href="https://colab.research.google.com/github/Glorc12/Snegrs-ISP-22/blob/main/%D0%9F%D0%B5%D1%80%D0%B2%D0%BE%D0%BD%D0%B0%D1%87%D0%B0%D0%BB%D1%8C%D0%BD%D0%B0%D1%8F%20%D0%BC%D0%BE%D0%B4%D0%B5%D0%BB%D1%8C.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

data = pd.read_csv('/content/train.csv')

X = data.drop(columns=['client_id', 'target', 'report_date'])
y = data['target']

numeric_features = X.select_dtypes(include=['int64', 'float64']).columns
categorical_features = X.select_dtypes(include=['object', 'category']).columns

X[categorical_features] = X[categorical_features].astype(str)

numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ]
)

model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(random_state=42, n_estimators=100, class_weight='balanced'))
])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

model.fit(X_train, y_train)

test_preds = model.predict_proba(X_test)[:, 1]
test_score = roc_auc_score(y_test, test_preds)
print(f"ROC-AUC на тестовых данных: {test_score:.4f}")

submission = pd.DataFrame({'client_id': data.loc[X_test.index, 'client_id'], 'score': test_preds})
submission.to_csv('submission_file.csv', index=False)
print("Результаты сохранены в submission_file.csv")

  data = pd.read_csv('/content/train.csv')
 'col781' 'col782' 'col783' 'col784' 'col785' 'col786' 'col787' 'col788'
 'col789' 'col790' 'col791' 'col792' 'col1069' 'col1173' 'col1655'
 'col1656' 'col1657' 'col1658' 'col1659' 'col1660' 'col1661' 'col1662'
 'col1663' 'col1673' 'col1674' 'col1675' 'col1679' 'col1680' 'col1681'
 'col1703' 'col1704' 'col1705' 'col1727' 'col1728' 'col1729' 'col1751'
 'col1752' 'col1753' 'col1775' 'col1776' 'col1777' 'col1799' 'col1800'
 'col1801' 'col2247' 'col2248' 'col2249' 'col2250' 'col2251' 'col2252'
 'col2253' 'col2254' 'col2255' 'col2256' 'col2257' 'col2258' 'col2259'
 'col2260' 'col2261' 'col2262' 'col2263' 'col2264' 'col2265' 'col2266'
 'col2267' 'col2268' 'col2269' 'col2270' 'col2535' 'col2536' 'col2537'
 'col2538' 'col2539' 'col2540' 'col2541' 'col2542']. At least one non-missing value is needed for imputation with strategy='median'.
 'col781' 'col782' 'col783' 'col784' 'col785' 'col786' 'col787' 'col788'
 'col789' 'col790' 'col791' 'col792' 'col10

ROC-AUC на тестовых данных: 0.9523
Результаты сохранены в submission_file.csv
