In [3]:
import pandas as pd
import numpy as np

In [4]:
df = pd.read_csv("energy.csv")

In [5]:
df.isna().sum() 

tau1     0
tau2     0
tau3     0
tau4     0
p1       0
p2       0
p3       0
p4       0
g1       0
g2       0
g3       0
g4       0
stab     0
stabf    0
dtype: int64

In [6]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [7]:
data_df = df.drop(columns=['stabf'])

In [10]:
normalised_df = pd.DataFrame(scaler.fit_transform(data_df),columns=data_df.columns)
X = normalised_df.drop(columns='stab')
y = df[ 'stabf' ]

In [11]:
from sklearn.model_selection import train_test_split 
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size= 0.2 , random_state= 1 ) 
y_train.value_counts() 

unstable    5092
stable      2908
Name: stabf, dtype: int64

RANDOM FOREST

In [13]:
from sklearn.ensemble import RandomForestClassifier

In [14]:
clf=RandomForestClassifier(n_estimators=100,random_state=1)

In [15]:
clf.fit(x_train,y_train)

RandomForestClassifier(random_state=1)

In [16]:
rf_predictions=clf.predict(x_test)

In [17]:
from sklearn.metrics import recall_score, accuracy_score, precision_score, f1_score,confusion_matrix  
cnf_mat = confusion_matrix(y_true=y_test, y_pred=rf_predictions, labels=[ 'stable' , 'unstable' ]) 
cnf_mat

array([[ 625,   87],
       [  55, 1233]], dtype=int64)

In [18]:
accuracy = accuracy_score(y_true=y_test, y_pred=rf_predictions) 
print( 'Accuracy: {}' .format(round(accuracy,4 )))

Accuracy: 0.929


In [19]:
precision = precision_score(y_true=y_test, y_pred=rf_predictions, pos_label= 'stable' ) 
print( 'Precision: {}' .format(round(precision,4 )))

Precision: 0.9191


In [20]:
recall = recall_score(y_true=y_test, y_pred=rf_predictions, pos_label= 'stable' ) 
print( 'Recall: {}' .format(round(recall* 100), 2 ))

Recall: 88.0


In [21]:
f1 = f1_score(y_true=y_test, y_pred=rf_predictions, pos_label= 'stable' ) 
print( 'F1: {}' .format(round(f1* 100 ), 2 ))

F1: 90.0


Extra Trees Classifier

In [23]:
from sklearn.ensemble import ExtraTreesClassifier

In [24]:
extra_tree_forest = ExtraTreesClassifier(random_state=1)

In [25]:
extra_tree_forest.fit(x_train,y_train)

ExtraTreesClassifier(random_state=1)

In [26]:
etf_predictions=extra_tree_forest.predict(x_test)

In [27]:
cnf_mat = confusion_matrix(y_true=y_test, y_pred=etf_predictions, labels=[ 'stable' , 'unstable' ]) 
cnf_mat

array([[ 606,  106],
       [  38, 1250]], dtype=int64)

In [28]:
accuracy = accuracy_score(y_true=y_test, y_pred=etf_predictions) 
print( 'Accuracy: {}' .format(round(accuracy* 100 ), 2 ))

Accuracy: 93.0


In [29]:
precision = precision_score(y_true=y_test, y_pred=etf_predictions, pos_label= 'stable' ) 
print( 'Precision: {}' .format(round(precision* 100 ), 2 ))

Precision: 94.0


In [30]:
recall = recall_score(y_true=y_test, y_pred=etf_predictions, pos_label= 'stable' ) 
print( 'Recall: {}' .format(round(recall* 100 ), 2 ))

Recall: 85.0


In [31]:
f1 = f1_score(y_true=y_test, y_pred=etf_predictions, pos_label= 'stable' ) 
print( 'F1: {}' .format(round(f1* 100 ), 2 ))

F1: 89.0


extreme boosting model

In [32]:
from xgboost import XGBClassifier

In [33]:
xgb_model = XGBClassifier(random_state=1)

In [34]:
xgb_model.fit(x_train,y_train)





XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
              importance_type='gain', interaction_constraints='',
              learning_rate=0.300000012, max_delta_step=0, max_depth=6,
              min_child_weight=1, missing=nan, monotone_constraints='()',
              n_estimators=100, n_jobs=12, num_parallel_tree=1, random_state=1,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
              tree_method='exact', validate_parameters=1, verbosity=None)

In [35]:
xgb_predictions = xgb_model.predict(x_test)

In [36]:
cnf_mat = confusion_matrix(y_true=y_test, y_pred=xgb_predictions, labels=[ 'stable' , 'unstable' ]) 
cnf_mat

array([[ 648,   64],
       [  45, 1243]], dtype=int64)

In [37]:
accuracy = accuracy_score(y_true=y_test, y_pred=xgb_predictions) 
print( 'Accuracy: {}' .format(round(accuracy,4), 4 ))

Accuracy: 0.9455


In [38]:
precision = precision_score(y_true=y_test, y_pred=xgb_predictions, pos_label= 'stable' ) 
print( 'Precision: {}' .format(round(precision* 100 ,4), 2 ))

Precision: 93.5065


In [39]:
recall = recall_score(y_true=y_test, y_pred=xgb_predictions, pos_label= 'stable' ) 
print( 'Recall: {}' .format(round(recall* 100 ), 2 ))

Recall: 91.0


In [40]:
f1 = f1_score(y_true=y_test, y_pred=xgb_predictions, pos_label= 'stable' ) 
print( 'F1: {}' .format(round(f1* 100 ), 2 ))

F1: 92.0


light gradient boosting model

In [41]:
from lightgbm import LGBMClassifier

In [42]:
lgb_model = LGBMClassifier(random_state=1)

In [43]:
lgb_model.fit(x_train,y_train)

LGBMClassifier(random_state=1)

In [44]:
lgb_predictions = lgb_model.predict(x_test)

In [45]:
cnf_mat = confusion_matrix(y_true=y_test, y_pred=etf_predictions, labels=[ 'stable' , 'unstable' ]) 
cnf_mat

array([[ 606,  106],
       [  38, 1250]], dtype=int64)

In [46]:
accuracy = accuracy_score(y_true=y_test, y_pred=lgb_predictions) 
print( 'Accuracy: {}' .format(round(accuracy,4 )))

Accuracy: 0.94


In [47]:
precision = precision_score(y_true=y_test, y_pred=lgb_predictions, pos_label= 'stable' ) 
print( 'Precision: {}' .format(round(precision* 100 ), 2 ))

Precision: 93.0


In [48]:
recall = recall_score(y_true=y_test, y_pred=lgb_predictions, pos_label= 'stable' ) 
print( 'Recall: {}' .format(round(recall* 100 ), 2 ))

Recall: 90.0


In [49]:
f1 = f1_score(y_true=y_test, y_pred=lgb_predictions, pos_label= 'stable' ) 
print( 'F1: {}' .format(round(f1* 100 ), 2 ))

F1: 91.0


Randomized Search CV

In [51]:
from sklearn.model_selection import RandomizedSearchCV

In [52]:
model_params = {

    'n_estimators': [300,100,1000,500],
    # normally distributed max_features, with mean .25 stddev 0.1, bounded between 0 and 1
    'max_features':['auto','','','log2'],
    # uniform distribution from 0.01 to 0.2 (0.01 + 0.199)
    'min_samples_split': [5,7,2,2],
    'min_samples_leaf':[6,4,8,8],
}

In [53]:
random_search = RandomizedSearchCV(extra_tree_forest,model_params,cv=5, n_iter=10, scoring = 'accuracy', n_jobs = -1, verbose = 1,random_state = 1
                           )

In [54]:
random_search.fit(x_train,y_train)

Fitting 5 folds for each of 10 candidates, totalling 50 fits


30 fits failed out of a total of 50.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
30 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\Legion\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Legion\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 450, in fit
    trees = Parallel(
  File "C:\Users\Legion\anaconda3\lib\site-packages\joblib\parallel.py", line 1029, in __call__
    if self.dispatch_one_batch(iterator):
  File "C:\Users\Legion\anaconda3\lib\site-packages\joblib\parallel.py", line 847, in dispatch_one_batch
    self._dispatch(tasks)
  File "C:\Users\Legion\a

RandomizedSearchCV(cv=5, estimator=ExtraTreesClassifier(random_state=1),
                   n_jobs=-1,
                   param_distributions={'max_features': ['auto', '', '',
                                                         'log2'],
                                        'min_samples_leaf': [6, 4, 8, 8],
                                        'min_samples_split': [5, 7, 2, 2],
                                        'n_estimators': [300, 100, 1000, 500]},
                   random_state=1, scoring='accuracy', verbose=1)

In [55]:
random_search.best_params_

{'n_estimators': 500,
 'min_samples_split': 2,
 'min_samples_leaf': 4,
 'max_features': 'auto'}

New Extra Trees

In [56]:
new_extra_tree_forest = ExtraTreesClassifier(n_estimators= 500,
 min_samples_split= 2,
 min_samples_leaf= 4,
 max_features= 'auto')

In [57]:
new_extra_tree_forest.fit(x_train,y_train)

ExtraTreesClassifier(min_samples_leaf=4, n_estimators=500)

In [58]:
netf_predictions=new_extra_tree_forest.predict(x_test)

In [59]:
accuracy = accuracy_score(y_true=y_test, y_pred=netf_predictions) 
print( 'Accuracy: {}' .format(round(accuracy,4 ), 2 ))

Accuracy: 0.9195


In [60]:
new_extra_tree_forest.feature_importances_

array([0.13476192, 0.1399828 , 0.13083176, 0.13102438, 0.01412499,
       0.01757522, 0.01735723, 0.0178999 , 0.09163493, 0.10076954,
       0.10403399, 0.10000334])