In [1]:
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [2]:
electrical_grid = pd.read_csv('/Users/andrejkleonskij/Data Science Git/Electrical_Grid_Stability.csv', sep=';')
print('Размер датасета:', electrical_grid.shape)
electrical_grid.head()

Размер датасета: (10000, 13)


Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4,stability
0,2.95906,3.079885,8.381025,9.780754,3.763085,-0.782604,-1.257395,-1.723086,0.650456,0.859578,0.887445,0.958034,0
1,9.304097,4.902524,3.047541,1.369357,5.067812,-1.940058,-1.872742,-1.255012,0.413441,0.862414,0.562139,0.78176,1
2,8.971707,8.848428,3.046479,1.214518,3.405158,-1.207456,-1.27721,-0.920492,0.163041,0.766689,0.839444,0.109853,0
3,0.716415,7.6696,4.486641,2.340563,3.963791,-1.027473,-1.938944,-0.997374,0.446209,0.976744,0.929381,0.362718,0
4,3.134112,7.608772,4.943759,9.857573,3.525811,-1.125531,-1.845975,-0.554305,0.79711,0.45545,0.656947,0.820923,0


In [3]:
# посмотрим, как соотносятся классы набора данных
print('Соотношение классов:\n', electrical_grid['stability'].value_counts())

Соотношение классов:
 0    6380
1    3620
Name: stability, dtype: int64


In [4]:
# разделим наши данные на признаки (матрица X) и целевую переменную (y)
X = electrical_grid.drop('stability', axis=1)
y = electrical_grid['stability']

In [5]:
# разделяем модель на обучающую и валидационную выборку
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [6]:
# зададим алгоритм для нашей модели
model = LogisticRegression(solver='liblinear')

# обучим модель
model.fit(X_train, y_train)

# воспользуемся уже обученной моделью, чтобы сделать прогнозы
probabilities = model.predict_proba(X_test)[:, 1]

# бинарный прогноз
predictions = model.predict(X_test)



In [7]:
print('Метрики при автоматическом прогнозе с помощью predict')
print('Accuracy: {:.2f}'.format(accuracy_score(y_test, predictions)))
print('Precision: {:.2f}'.format(precision_score(y_test, predictions)))
print('Recall: {:.2f}'.format(recall_score(y_test, predictions)))
print('F1: {:.2f}\n'.format(f1_score(y_test, predictions)))

Метрики при автоматическом прогнозе с помощью predict
Accuracy: 0.82
Precision: 0.78
Recall: 0.70
F1: 0.73



In [8]:
threshold = 0.4


custom_predictions = [0 if i < threshold else 1 for i in probabilities]


print('Метрики для прогноза с кастомным порогом')
print(
    'Accuracy for custom: {:.2f}'.format(accuracy_score(y_test, custom_predictions))
    
)
print(
    'Precision for custom: {:.2f}'.format(precision_score (y_test, custom_predictions)
        
    )
)
print(
    'Recall for custom: {:.2f}'.format(recall_score (y_test, custom_predictions)
        
    )
)
print('F1 for custom: {:.2f}'.format(f1_score(y_test, custom_predictions)))

Метрики для прогноза с кастомным порогом
Accuracy for custom: 0.81
Precision for custom: 0.71
Recall for custom: 0.77
F1 for custom: 0.74
