In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler

from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

from sklearn.metrics import accuracy_score, recall_score,f1_score, precision_score, roc_auc_score

In [5]:
def print_all_metrics(y_true, y_pred, y_proba, title='Метрики классификации'):
    print(title)
    print('\tAccuracy: {:.2f}'.format(accuracy_score(y_true, y_pred)))
    print('\tPrecision: {:.2f}'.format(precision_score(y_true, y_pred)))
    print('\tRecall: {:.2f}'.format(recall_score(y_true, y_pred)))
    print('\tF1: {:.2f}'.format(f1_score(y_true, y_pred)))
    print('\tROC_AUC: {:.2f}'.format(roc_auc_score(y_true, y_proba)))

In [7]:
electrical_grid = pd.read_csv('/Users/andrejkleonskij/Data Science Git/Electrical_Grid_Stability.csv',sep = ';')

In [13]:
print(electrical_grid.shape)
electrical_grid.head()

(10000, 13)


Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4,stability
0,2.95906,3.079885,8.381025,9.780754,3.763085,-0.782604,-1.257395,-1.723086,0.650456,0.859578,0.887445,0.958034,0
1,9.304097,4.902524,3.047541,1.369357,5.067812,-1.940058,-1.872742,-1.255012,0.413441,0.862414,0.562139,0.78176,1
2,8.971707,8.848428,3.046479,1.214518,3.405158,-1.207456,-1.27721,-0.920492,0.163041,0.766689,0.839444,0.109853,0
3,0.716415,7.6696,4.486641,2.340563,3.963791,-1.027473,-1.938944,-0.997374,0.446209,0.976744,0.929381,0.362718,0
4,3.134112,7.608772,4.943759,9.857573,3.525811,-1.125531,-1.845975,-0.554305,0.79711,0.45545,0.656947,0.820923,0


In [14]:
electrical_grid['stability'].value_counts()

0    6380
1    3620
Name: stability, dtype: int64

In [17]:
X = electrical_grid.drop('stability', axis =1)
y = electrical_grid['stability']

In [18]:
X_train,X_test,y_train,y_test = train_test_split(X,y, test_size = 0.2, random_state = 0)

In [19]:
scaler = StandardScaler()
scaler.fit(X)

StandardScaler()

In [23]:
X_train_st = scaler.transform(X_train)
X_test_st = scaler.transform(X_test)

In [24]:
lr_model = LogisticRegression(random_state = 0)
lr_model.fit(X_train_st,y_train)

LogisticRegression(random_state=0)

In [25]:
lr_predictions = lr_model.predict(X_test_st)
lr_probabilities = lr_model.predict_proba(X_test_st)[:, 1]


In [26]:
print_all_metrics(
    y_test,
    lr_predictions,
    lr_probabilities,
    title='Метрики для модели логистической регрессии:',
)

Метрики для модели логистической регрессии:
	Accuracy: 0.81
	Precision: 0.77
	Recall: 0.70
	F1: 0.73
	ROC_AUC: 0.89


In [31]:
tree_model = DecisionTreeClassifier(random_state=0)
tree_model.fit(X_train_st, y_train)
tree_predictions = tree_model.predict(X_test_st)
tree_probabilities = tree_model.predict_proba(X_test_st)[:, 1]
print_all_metrics(
    y_test,
    tree_predictions,
    tree_probabilities,
    title='Метрики для модели дерева решений:',
)

Метрики для модели дерева решений:
	Accuracy: 0.83
	Precision: 0.77
	Recall: 0.77
	F1: 0.77
	ROC_AUC: 0.82


In [28]:
rf_model = RandomForestClassifier(n_estimators = 100, random_state = 0) 
rf_model.fit(X_train_st, y_train)
rf_predictions = rf_model.predict(X_test_st)# Ваш код здесь
rf_probabilities = rf_model.predict_proba(X_test_st)[:,1]# Ваш код здесь
print_all_metrics(y_test, rf_predictions, rf_probabilities,  title = 'Метрики для модели случайного леса:')

Метрики для модели случайного леса:
	Accuracy: 0.92
	Precision: 0.93
	Recall: 0.84
	F1: 0.88
	ROC_AUC: 0.98


In [30]:
gb_model = GradientBoostingClassifier(n_estimators = 100, random_state = 0)
gb_model.fit(X_train_st, y_train)
gb_predictions = gb_model.predict(X_test_st) # Ваш код здесь
gb_probabilities = gb_model.predict_proba(X_test_st)[:,1] 
print_all_metrics(y_test, gb_predictions, gb_probabilities, title = 'Метрики для модели градиентного бустинга:')

Метрики для модели градиентного бустинга:
	Accuracy: 0.93
	Precision: 0.94
	Recall: 0.85
	F1: 0.89
	ROC_AUC: 0.98
