In [4]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split

from xgboost import XGBClassifier, XGBRegressor

In [5]:
data =pd.read_csv("./smart_grid_stability_augmented.csv")

In [6]:
data

Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4,stab,stabf
0,2.959060,3.079885,8.381025,9.780754,3.763085,-0.782604,-1.257395,-1.723086,0.650456,0.859578,0.887445,0.958034,0.055347,unstable
1,9.304097,4.902524,3.047541,1.369357,5.067812,-1.940058,-1.872742,-1.255012,0.413441,0.862414,0.562139,0.781760,-0.005957,stable
2,8.971707,8.848428,3.046479,1.214518,3.405158,-1.207456,-1.277210,-0.920492,0.163041,0.766689,0.839444,0.109853,0.003471,unstable
3,0.716415,7.669600,4.486641,2.340563,3.963791,-1.027473,-1.938944,-0.997374,0.446209,0.976744,0.929381,0.362718,0.028871,unstable
4,3.134112,7.608772,4.943759,9.857573,3.525811,-1.125531,-1.845975,-0.554305,0.797110,0.455450,0.656947,0.820923,0.049860,unstable
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59995,2.930406,2.376523,9.487627,6.187797,3.343416,-1.449106,-0.658054,-1.236256,0.601709,0.813512,0.779642,0.608385,0.023892,unstable
59996,3.392299,2.954947,1.274827,6.894759,4.349512,-0.952437,-1.663661,-1.733414,0.502079,0.285880,0.567242,0.366120,-0.025803,stable
59997,2.364034,8.776391,2.842030,1.008906,4.299976,-0.943884,-1.380719,-1.975373,0.487838,0.149286,0.986505,0.145984,-0.031810,stable
59998,9.631511,2.757071,3.994398,7.821347,2.514755,-0.649915,-0.966330,-0.898510,0.365246,0.889118,0.587558,0.818391,0.037789,unstable


In [7]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 60000 entries, 0 to 59999
Data columns (total 14 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   tau1    60000 non-null  float64
 1   tau2    60000 non-null  float64
 2   tau3    60000 non-null  float64
 3   tau4    60000 non-null  float64
 4   p1      60000 non-null  float64
 5   p2      60000 non-null  float64
 6   p3      60000 non-null  float64
 7   p4      60000 non-null  float64
 8   g1      60000 non-null  float64
 9   g2      60000 non-null  float64
 10  g3      60000 non-null  float64
 11  g4      60000 non-null  float64
 12  stab    60000 non-null  float64
 13  stabf   60000 non-null  object 
dtypes: float64(13), object(1)
memory usage: 6.4+ MB


### PREPROCESSING


In [8]:
def preprocess_inputs(df, task='classification'):
    df = df.copy()
    
    if task == 'classification':
        df = df.drop('stab', axis=1)
        
        y = df['stabf'].copy()
        X = df.drop('stabf', axis=1).copy()
        
    elif task == 'regression':
        df = df.drop('stabf', axis=1)
        
        y = df['stab'].copy()
        X = df.drop('stab', axis=1).copy()
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, shuffle=True, random_state=1)
    
    return X_train, X_test, y_train, y_test

In [9]:
x=preprocess_inputs(data)

In [10]:
x

(           tau1      tau2      tau3      tau4        p1        p2        p3  \
 51782  4.306374  7.111600  2.698565  1.667149  4.036521 -0.661915 -1.721545   
 53781  8.196407  3.969140  8.062070  2.770065  2.307235 -0.759482 -0.980846   
 55123  9.030050  7.459977  7.533218  2.704361  2.297036 -0.888640 -0.907252   
 35823  0.615170  1.946848  6.944750  3.811661  4.115997 -1.014799 -1.428489   
 48869  9.689491  6.842392  6.715817  2.300154  2.394670 -0.603920 -1.174640   
 ...         ...       ...       ...       ...       ...       ...       ...   
 50057  2.918050  8.634376  1.492789  3.289806  3.928822 -0.735709 -1.580769   
 32511  7.833738  4.576594  8.163828  8.902948  3.853314 -1.528091 -0.853671   
 5192   5.085242  1.790031  2.486392  2.848192  3.159670 -0.892898 -1.025461   
 12172  2.103806  7.834493  0.678007  6.619317  4.946095 -1.320590 -1.724641   
 33003  8.809894  8.863281  3.041964  9.466062  4.212257 -0.755499 -1.782605   
 
              p4        g1        g2  

### CLASSIFICATION TASK

In [11]:
X_train, X_test, y_train, y_test = preprocess_inputs(data, task='classification')

In [12]:
X_train

Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4
51782,4.306374,7.111600,2.698565,1.667149,4.036521,-0.661915,-1.721545,-1.653061,0.502570,0.357268,0.784432,0.283608
53781,8.196407,3.969140,8.062070,2.770065,2.307235,-0.759482,-0.980846,-0.566906,0.656218,0.339355,0.631997,0.181889
55123,9.030050,7.459977,7.533218,2.704361,2.297036,-0.888640,-0.907252,-0.501144,0.520756,0.668365,0.979940,0.962761
35823,0.615170,1.946848,6.944750,3.811661,4.115997,-1.014799,-1.428489,-1.672709,0.363591,0.136974,0.089364,0.896053
48869,9.689491,6.842392,6.715817,2.300154,2.394670,-0.603920,-1.174640,-0.616110,0.500522,0.102504,0.747755,0.052535
...,...,...,...,...,...,...,...,...,...,...,...,...
50057,2.918050,8.634376,1.492789,3.289806,3.928822,-0.735709,-1.580769,-1.612345,0.315826,0.762099,0.817872,0.278033
32511,7.833738,4.576594,8.163828,8.902948,3.853314,-1.528091,-0.853671,-1.471552,0.689789,0.084526,0.738146,0.947283
5192,5.085242,1.790031,2.486392,2.848192,3.159670,-0.892898,-1.025461,-1.241311,0.473917,0.418512,0.326253,0.982488
12172,2.103806,7.834493,0.678007,6.619317,4.946095,-1.320590,-1.724641,-1.900864,0.723385,0.223536,0.745005,0.339882


### Regression Task

In [13]:
X_train, X_test, y_train, y_test = preprocess_inputs(data, task='regression')

In [14]:
X_train

Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4
51782,4.306374,7.111600,2.698565,1.667149,4.036521,-0.661915,-1.721545,-1.653061,0.502570,0.357268,0.784432,0.283608
53781,8.196407,3.969140,8.062070,2.770065,2.307235,-0.759482,-0.980846,-0.566906,0.656218,0.339355,0.631997,0.181889
55123,9.030050,7.459977,7.533218,2.704361,2.297036,-0.888640,-0.907252,-0.501144,0.520756,0.668365,0.979940,0.962761
35823,0.615170,1.946848,6.944750,3.811661,4.115997,-1.014799,-1.428489,-1.672709,0.363591,0.136974,0.089364,0.896053
48869,9.689491,6.842392,6.715817,2.300154,2.394670,-0.603920,-1.174640,-0.616110,0.500522,0.102504,0.747755,0.052535
...,...,...,...,...,...,...,...,...,...,...,...,...
50057,2.918050,8.634376,1.492789,3.289806,3.928822,-0.735709,-1.580769,-1.612345,0.315826,0.762099,0.817872,0.278033
32511,7.833738,4.576594,8.163828,8.902948,3.853314,-1.528091,-0.853671,-1.471552,0.689789,0.084526,0.738146,0.947283
5192,5.085242,1.790031,2.486392,2.848192,3.159670,-0.892898,-1.025461,-1.241311,0.473917,0.418512,0.326253,0.982488
12172,2.103806,7.834493,0.678007,6.619317,4.946095,-1.320590,-1.724641,-1.900864,0.723385,0.223536,0.745005,0.339882


In [15]:
y_train

51782   -0.012548
53781    0.032672
55123    0.069865
35823   -0.044868
48869    0.020140
           ...   
50057   -0.028760
32511    0.075413
5192     0.003594
12172   -0.042775
33003    0.055588
Name: stab, Length: 42000, dtype: float64

In [16]:
reg = XGBRegressor()
reg.fit(X_train, y_train)
print("Regressor trained.")

Regressor trained.


In [17]:
print("Regression Test R^2 Score: {:.5f}".format(reg.score(X_test, y_test)))

Regression Test R^2 Score: 0.96223
