In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from catboost import CatBoostClassifier, Pool
from sklearn.model_selection import train_test_split
import scikitplot as skplt
from collections import Counter

In [None]:
df = pd.read_csv('combined_file.csv')

In [None]:
X = df.drop('outflow', axis=1).values
y = df['outflow'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=10)

In [None]:
df.head()

In [None]:
train_data = Pool(X_train, y_train)
test_data = Pool(X_test, y_test)

In [None]:
model = CatBoostClassifier(class_weights=[1, 10],
                           learning_rate=0.01,
                           eval_metric='F1',
                           iterations=5000)
model.fit(train_data, eval_set=test_data)

In [None]:
feature_importance = model.feature_importances_
sorted_idx = np.argsort(feature_importance)
fig = plt.figure(figsize=(12, 6))
plt.barh(range(len(sorted_idx)), feature_importance[sorted_idx], align='center')
plt.yticks(range(len(sorted_idx)), np.array(df.columns)[sorted_idx])
plt.title('Feature Importance')

In [None]:
# Предсказание классов
predictions = model.predict(X_test)
print("Predicted classes:", predictions)

# Предсказание вероятностей
probabilities = model.predict_proba(X_test)
print("Predicted probabilities:", probabilities)

In [None]:
probabilities[:,1]

In [None]:
y_true = y_test
y_probas = probabilities
skplt.metrics.plot_roc_curve(y_true, y_probas)
plt.show()