### Метод логистической регрессии для прогнозирования возвратов кредитов

Подключим необходимые модули

In [None]:
from google.colab import drive
import pandas as pd
import sklearn

drive.mount('/content/drive')

Mounted at /content/drive


Загрузим заранее предобработанный датасет

In [None]:
df = pd.read_csv("/content/drive/MyDrive/bank_churners_preprocessed.csv")
df

Unnamed: 0.1,Unnamed: 0,Attrition_Flag,Customer_Age,Dependent_count,Education_Level,Income_Category,Card_Category,Months_on_book,Total_Relationship_Count,Months_Inactive_12_mon,...,Total_Trans_Amt,Total_Trans_Ct,Total_Ct_Chng_Q4_Q1,Avg_Utilization_Ratio,c_F,c_M,c_Divorced,c_Married,c_Single,c_Unknown
0,0,0,45,3,2,3,0,39,5,1,...,1144,42,1.625,0.061,0,1,0,1,0,0
1,1,0,49,5,5,1,0,44,6,1,...,1291,33,3.714,0.105,1,0,0,0,1,0
2,2,0,51,3,5,4,0,36,4,1,...,1887,20,2.333,0.000,0,1,0,1,0,0
3,3,0,40,4,2,1,0,34,3,4,...,1171,20,2.333,0.760,1,0,0,0,0,1
4,4,0,40,3,1,3,0,21,5,1,...,816,28,2.500,0.000,0,1,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10122,10122,0,50,2,5,2,0,40,3,2,...,15476,117,0.857,0.462,0,1,0,0,1,0
10123,10123,1,41,2,0,2,0,25,4,2,...,8764,69,0.683,0.511,0,1,1,0,0,0
10124,10124,1,44,1,2,1,0,36,5,3,...,10291,60,0.818,0.000,1,0,0,1,0,0
10125,10125,1,30,2,5,2,0,36,4,3,...,8395,62,0.722,0.000,0,1,0,0,0,1


Удалим ненужную колонку

In [None]:
df.drop(columns = ["Unnamed: 0"], inplace=True)

Подключим модули для обучения модели

In [None]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, r2_score
from sklearn.linear_model import LinearRegression, LogisticRegression, Lasso, Ridge

Подготовим данные для обучения

In [None]:
y = df['Attrition_Flag']
X = df.drop(columns = ['Attrition_Flag'])
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2)

Попробуем обучить модель

In [None]:
LogRM = LogisticRegression(max_iter=1000)
LogRM.fit(X_train, y_train)
print(classification_report(y_test, LogRM.predict(X_test)))
print(confusion_matrix(y_test, LogRM.predict(X_test)))

              precision    recall  f1-score   support

           0       0.91      0.97      0.94      1701
           1       0.77      0.53      0.62       325

    accuracy                           0.90      2026
   macro avg       0.84      0.75      0.78      2026
weighted avg       0.89      0.90      0.89      2026

[[1649   52]
 [ 154  171]]


Модель логистической регрессии показывает неплохие результаты

Попробуем подобрать параметры и скалировать данные

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
LogRM = LogisticRegression(penalty='l2', C=1.0, solver='lbfgs', max_iter=1000)
LogRM.fit(X_train_scaled, y_train)
print(classification_report(y_test, LogRM.predict(X_test_scaled)))
print(confusion_matrix(y_test, LogRM.predict(X_test_scaled)))

              precision    recall  f1-score   support

           0       0.92      0.97      0.95      1701
           1       0.78      0.58      0.67       325

    accuracy                           0.91      2026
   macro avg       0.85      0.78      0.81      2026
weighted avg       0.90      0.91      0.90      2026

[[1649   52]
 [ 136  189]]


Точность модели немного улучшилась



---



### Метод линейной регрессии для прогнозирования возвратов кредитов

Попробуем обучить модель линейной регрессии

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

LinRM = LinearRegression()
LinRM.fit(X_train_scaled, y_train)
print(f'Точность модели: {LinRM.score(X_test_scaled, y_test)}')

Точность модели: 0.3597066298574185


Результат плохой, попробуем провести регуляризацию

In [None]:
ridge = Ridge().fit(X_train_scaled, y_train)
print(ridge.score(X_test_scaled, y_test))

0.3597005373485861


In [None]:
lasso = Lasso().fit(X_train_scaled, y_train)
print(lasso.score(X_test_scaled, y_test))

-6.965508578815616e-07
