In [2]:
import pandas as pd
import numpy as np

from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier

from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score, precision_score, f1_score, confusion_matrix

In [3]:
data = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/00471/Data_for_UCI_named.csv")
data.head()

Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4,stab,stabf
0,2.95906,3.079885,8.381025,9.780754,3.763085,-0.782604,-1.257395,-1.723086,0.650456,0.859578,0.887445,0.958034,0.055347,unstable
1,9.304097,4.902524,3.047541,1.369357,5.067812,-1.940058,-1.872742,-1.255012,0.413441,0.862414,0.562139,0.78176,-0.005957,stable
2,8.971707,8.848428,3.046479,1.214518,3.405158,-1.207456,-1.27721,-0.920492,0.163041,0.766689,0.839444,0.109853,0.003471,unstable
3,0.716415,7.6696,4.486641,2.340563,3.963791,-1.027473,-1.938944,-0.997374,0.446209,0.976744,0.929381,0.362718,0.028871,unstable
4,3.134112,7.608772,4.943759,9.857573,3.525811,-1.125531,-1.845975,-0.554305,0.79711,0.45545,0.656947,0.820923,0.04986,unstable


In [4]:
#dropping the "stab" column as explained above
data = data.drop('stab', axis=1)

In [6]:
X = data.drop('stabf', axis=1)
y = data['stabf']

In [7]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

In [8]:
#convert target variable to 0 and 1
y_train = (y_train == 'stable').astype(int)
y_test = (y_test == 'stable').astype(int)

In [9]:
from sklearn.preprocessing import StandardScaler

#normalizing the train data
scaler = StandardScaler()
x_train_normalized = scaler.fit_transform(X_train, y_train)


In [10]:
#transform X_test
x_test_normalized = scaler.transform(X_test)

### Modeling

In [12]:
#train Randomforest
clf = RandomForestClassifier(random_state=1)
clf.fit(x_train_normalized, y_train)

#obtain predictions
predictions = clf.predict(x_test_normalized)

In [13]:
#evaluating rf using accuracy_score
accuracy = accuracy_score(y_true=y_test, y_pred=predictions)
print('Accuracy: {}'.format(accuracy)) 


Accuracy: 0.9295


### train ExtraTreesClassifier

In [14]:
#train ExtraTreesClassifier
clf = ExtraTreesClassifier(random_state=1)
clf.fit(x_train_normalized, y_train)

#obtain predictions
predictions = clf.predict(x_test_normalized)

In [15]:
#evaluating extra trees clf using accuracy_score
accuracy = accuracy_score(y_true=y_test, y_pred=predictions)
print('Accuracy: {}'.format(accuracy))

Accuracy: 0.9285


### Train XGBoost

In [16]:
#train ExtraTreesClassifier
clf = XGBClassifier(random_state=1)
clf.fit(x_train_normalized, y_train)

#obtain predictions
predictions = clf.predict(x_test_normalized)

In [18]:
#evaluating xgb using accuracy_score
accuracy = accuracy_score(y_true=y_test, y_pred=predictions)
print('Accuracy: {}'.format(accuracy))


Accuracy: 0.9455


### LightGBM Classifier

In [19]:
#train ExtraTreesClassifier
clf = LGBMClassifier(random_state=1)
clf.fit(x_train_normalized, y_train)

#obtain predictions
predictions = clf.predict(x_test_normalized)

In [20]:
#evaluating lgbm clf using accuracy_score
accuracy = accuracy_score(y_true=y_test, y_pred=predictions)
print('Accuracy: {}'.format(accuracy))


Accuracy: 0.9395
