### Perceptron classifier 

In [1]:
import numpy as np
import pandas as pd

data_web_address = "e:\Create Your Own Sophisticated Model with Neural Networks\Section 04\pima-indians-diabetes.data"

column_names = ['pregnancy_x',
'plasma_con',
'blood_pressure',
'skin_mm',
'insulin',
'bmi',
'pedigree_func',
'age',
'target']

feature_names = column_names[:-1]

all_data = pd.read_csv(data_web_address , names=column_names) 

X = all_data[feature_names]
y = all_data['target']

In [2]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,stratify=y)

In [3]:
from sklearn.preprocessing import StandardScaler 
scaler = StandardScaler()
scaler.fit(X_train) 
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [4]:
from sklearn.linear_model import Perceptron
pr = Perceptron()
pr.fit(X_train_scaled, y_train)



Perceptron(alpha=0.0001, class_weight=None, eta0=1.0, fit_intercept=True,
      max_iter=None, n_iter=None, n_jobs=1, penalty=None, random_state=0,
      shuffle=True, tol=None, verbose=0, warm_start=False)

In [5]:
from sklearn.model_selection import cross_val_score, StratifiedKFold
skf = StratifiedKFold(n_splits=3)
cross_val_score(pr, X_train_scaled, y_train, cv=skf,scoring='roc_auc').mean()



0.7660967880249991

In [6]:
from sklearn.metrics import accuracy_score, roc_auc_score

print("Classification accuracy : ", accuracy_score(y_test, pr.predict(X_test_scaled)))
print("ROC-AUC Score : ",roc_auc_score(y_test, pr.predict(X_test_scaled)))

Classification accuracy :  0.6753246753246753
ROC-AUC Score :  0.5711111111111111


In [7]:
from sklearn.model_selection import GridSearchCV

param_dist = {'alpha': [0.1,0.01,0.001,0.0001], 
 'penalty': [None, 'l2','l1','elasticnet'],
 'random_state': [7],
 'class_weight':['balanced',None],'eta0': [0.25,0.5,0.75,1.0], 
 'warm_start':[True,False], 'n_iter':[50]}

gs_perceptron = GridSearchCV(pr, param_dist, scoring='roc_auc',cv=skf).fit(X_train_scaled, y_train)

































In [8]:
gs_perceptron.best_params_

{'alpha': 0.1,
 'class_weight': 'balanced',
 'eta0': 0.25,
 'n_iter': 50,
 'penalty': None,
 'random_state': 7,
 'warm_start': True}

In [9]:
gs_perceptron.best_score_

0.7885313901434369

In [10]:
best_perceptron = gs_perceptron.best_estimator_
best_perceptron

Perceptron(alpha=0.1, class_weight='balanced', eta0=0.25, fit_intercept=True,
      max_iter=None, n_iter=50, n_jobs=1, penalty=None, random_state=7,
      shuffle=True, tol=None, verbose=0, warm_start=True)

In [11]:
from sklearn.ensemble import BaggingClassifier 
param_dist = {
 'max_samples': [0.5,1.0],
 'max_features' : [0.5,1.0],
 'oob_score' : [True, False],
 'n_estimators': [100],
 'n_jobs':[-1],
 'base_estimator__alpha': [0.001,0.002],
 'base_estimator__penalty': [None, 'l2','l1','elasticnet'], }

ensemble_estimator = BaggingClassifier(base_estimator = best_perceptron)
bag_perceptrons = GridSearchCV(ensemble_estimator, param_dist,scoring='roc_auc',cv=skf,n_jobs=-1).fit(X_train_scaled, y_train)

In [12]:
bag_perceptrons.best_score_

0.826292971797525

In [13]:
bag_perceptrons.best_params_

{'base_estimator__alpha': 0.002,
 'base_estimator__penalty': None,
 'max_features': 1.0,
 'max_samples': 0.5,
 'n_estimators': 100,
 'n_jobs': -1,
 'oob_score': True}

### Multilayer perceptron 

In [3]:
%matplotlib inline

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.datasets import fetch_california_housing
cali_housing = fetch_california_housing()

X = cali_housing.data
y = cali_housing.target

In [4]:
bins = np.arange(6)
binned_y = np.digitize(y, bins)

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,stratify=binned_y)

In [5]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(X_train)

StandardScaler(copy=True, with_mean=True, with_std=True)

In [6]:
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [11]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.neural_network import MLPRegressor

param_grid = {'alpha': [10,1,0.1,0.01],
               'hidden_layer_sizes' : [(50,50,50),(50,50,50,50,50)],
               'activation': ['relu','logistic'],
               'solver' : ['adam']
               }

pre_gs_inst = RandomizedSearchCV(MLPRegressor(random_state=7),
                                  param_distributions = param_grid,
                                  cv=3,
                                  n_iter=15,
                                  random_state=7)
pre_gs_inst.fit(X_train_scaled, y_train)

pre_gs_inst.best_score_

0.7787210409367319

In [12]:
pre_gs_inst.best_params_

{'activation': 'relu',
 'alpha': 0.01,
 'hidden_layer_sizes': (50, 50, 50),
 'solver': 'adam'}

### Stacking with a neural network 

In [13]:
from __future__ import division
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
 
from sklearn.datasets import fetch_california_housing
 
#From within an ipython notebook
%matplotlib inline
 
cali_housing = fetch_california_housing()
 
X = cali_housing.data
y = cali_housing.target

bins = np.arange(6)
binned_y = np.digitize(y, bins)

In [14]:
from sklearn.model_selection import train_test_split
X_train_prin, X_test_prin, y_train_prin, y_test_prin = train_test_split(X, y,test_size=0.2,stratify=binned_y,random_state=7)

binned_y_train_prin = np.digitize(y_train_prin, bins)

X_1, X_stack, y_1, y_stack = train_test_split(X_train_prin,y_train_prin,test_size=0.33,stratify=binned_y_train_prin,random_state=7 )

In [15]:
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPRegressor
 
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
 
mlp_pipe = Pipeline(steps=[('scale', StandardScaler()), ('neural_net', MLPRegressor())])
 
 
param_grid = {'neural_net__alpha': [0.02,0.01,0.005],
               'neural_net__hidden_layer_sizes' : [(50,50,50)],
               'neural_net__activation': ['relu'],
               'neural_net__solver' : ['adam']
               }
 
neural_net_gs = GridSearchCV(mlp_pipe, param_grid = param_grid,cv=3, n_jobs=-1)
neural_net_gs.fit(X_1, y_1)

GridSearchCV(cv=3, error_score='raise',
       estimator=Pipeline(memory=None,
     steps=[('scale', StandardScaler(copy=True, with_mean=True, with_std=True)), ('neural_net', MLPRegressor(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learnin...=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False))]),
       fit_params=None, iid=True, n_jobs=-1,
       param_grid={'neural_net__alpha': [0.02, 0.01, 0.005], 'neural_net__hidden_layer_sizes': [(50, 50, 50)], 'neural_net__activation': ['relu'], 'neural_net__solver': ['adam']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [16]:
neural_net_gs.best_params_

{'neural_net__activation': 'relu',
 'neural_net__alpha': 0.02,
 'neural_net__hidden_layer_sizes': (50, 50, 50),
 'neural_net__solver': 'adam'}

In [17]:
neural_net_gs.best_score_

0.7764852103006747

In [18]:
nn_best = neural_net_gs.best_estimator_
import pickle
 
f = open('nn_best.save', 'wb')
pickle.dump(nn_best, f, protocol = pickle.HIGHEST_PROTOCOL)
f.close()

In [19]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import GradientBoostingRegressor
 
 
param_grid = {'learning_rate': [0.1,0.05,0.03,0.01],
               'loss': ['huber'],
               'max_depth': [5,7,10],
               'max_features': [0.4,0.6,0.8,1.0],
               'min_samples_leaf': [2,3,5],
               'n_estimators': [100],
               'warm_start': [True], 'random_state':[7]
               }
 
boost_gs = RandomizedSearchCV(GradientBoostingRegressor(), param_distributions = param_grid,cv=3, n_jobs=-1,n_iter=25)
boost_gs.fit(X_1, y_1)

RandomizedSearchCV(cv=3, error_score='raise',
          estimator=GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,
             learning_rate=0.1, loss='ls', max_depth=3, max_features=None,
             max_leaf_nodes=None, min_impurity_decrease=0.0,
             min_impurity_split=None, min_samples_leaf=1,
             min_samples_split=2, min_weight_fraction_leaf=0.0,
             n_estimators=100, presort='auto', random_state=None,
             subsample=1.0, verbose=0, warm_start=False),
          fit_params=None, iid=True, n_iter=25, n_jobs=-1,
          param_distributions={'learning_rate': [0.1, 0.05, 0.03, 0.01], 'loss': ['huber'], 'max_depth': [5, 7, 10], 'max_features': [0.4, 0.6, 0.8, 1.0], 'min_samples_leaf': [2, 3, 5], 'n_estimators': [100], 'warm_start': [True], 'random_state': [7]},
          pre_dispatch='2*n_jobs', random_state=None, refit=True,
          return_train_score='warn', scoring=None, verbose=0)

In [20]:
boost_gs.best_score_

0.8247794120705623

In [21]:
boost_gs.best_params_

{'learning_rate': 0.1,
 'loss': 'huber',
 'max_depth': 10,
 'max_features': 0.4,
 'min_samples_leaf': 2,
 'n_estimators': 100,
 'random_state': 7,
 'warm_start': True}

In [22]:
gbt_inst = GradientBoostingRegressor(**{'learning_rate': 0.1,
 'loss': 'huber',
 'max_depth': 10,
 'max_features': 0.4,
 'min_samples_leaf': 5,
 'n_estimators': 4000,
 'warm_start': True, 'random_state':7}).fit(X_1, y_1)

In [23]:
def pickle_func(filename, saved_object):
    import pickle
 
    f = open(filename, 'wb')
    pickle.dump(saved_object, f, protocol = pickle.HIGHEST_PROTOCOL)
    f.close()
     
    return None
 
pickle_func('grad_boost.save', gbt_inst)

In [24]:
from sklearn.ensemble import BaggingRegressor,GradientBoostingRegressor
from sklearn.model_selection import RandomizedSearchCV
 
 
param_dist = {
     'max_samples': [0.5,1.0],
     'max_features' : [0.5,1.0],
     'oob_score' : [True, False],
     'base_estimator__min_samples_leaf': [4,5],
     'n_estimators': [20]}
 
 

single_estimator = GradientBoostingRegressor(**{'learning_rate': 0.1,
 'loss': 'huber',
 'max_depth': 10,
 'max_features': 0.4,
 'n_estimators': 20,
 'warm_start': True, 'random_state':7})
 
ensemble_estimator = BaggingRegressor(base_estimator = single_estimator)
 
 
pre_gs_inst_bag = RandomizedSearchCV(ensemble_estimator,
  param_distributions = param_dist,
  cv=3,
  n_iter = 5,
  n_jobs=-1)
 
pre_gs_inst_bag.fit(X_1, y_1)

  warn("Some inputs do not have OOB scores. "


RandomizedSearchCV(cv=3, error_score='raise',
          estimator=BaggingRegressor(base_estimator=GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,
             learning_rate=0.1, loss='huber', max_depth=10,
             max_features=0.4, max_leaf_nodes=None,
             min_impurity_decrease=0.0, min_impurity_split=None,
       ...n_estimators=10, n_jobs=1, oob_score=False,
         random_state=None, verbose=0, warm_start=False),
          fit_params=None, iid=True, n_iter=5, n_jobs=-1,
          param_distributions={'max_samples': [0.5, 1.0], 'max_features': [0.5, 1.0], 'oob_score': [True, False], 'base_estimator__min_samples_leaf': [4, 5], 'n_estimators': [20]},
          pre_dispatch='2*n_jobs', random_state=None, refit=True,
          return_train_score='warn', scoring=None, verbose=0)

In [25]:
pre_gs_inst_bag.best_score_

0.7803598528400342

In [26]:
pre_gs_inst_bag.best_params_

{'base_estimator__min_samples_leaf': 5,
 'max_features': 1.0,
 'max_samples': 1.0,
 'n_estimators': 20,
 'oob_score': True}

In [27]:
pickle_func('bag_gbm.save', pre_gs_inst_bag.best_estimator_)

In [28]:
def pickle_load_func(filename):
    f = open(filename, 'rb')
    to_return = pickle.load(f)
    f.close()
    
    return to_return

neural_net = pickle_load_func('nn_best.save')
gbt = pickle_load_func('grad_boost.save')
bag_gbm = pickle_load_func('bag_gbm.save')

In [29]:
def handle_X_set(X_train_set_in):
    X_train_set = X_train_set_in.copy()
    
    y_pred_nn = neural_net.predict(X_train_set)
    y_pred_gbt = gbt.predict(X_train_set)
    y_pred_bag = bag_gbm.predict(X_train_set)
    
    
    preds_df = pd.DataFrame(columns = ['nn', 'gbt','bag'])

    preds_df['nn'] = y_pred_nn
    preds_df['gbt'] = y_pred_gbt
    preds_df['bag'] = y_pred_bag
 
    return preds_df

def predict_from_X_set(X_train_set_in):
    X_train_set = X_train_set_in.copy()    
    return final_etr.predict(handle_X_set(X_train_set)) 

preds_df = handle_X_set(X_stack)
print (preds_df.corr())

           nn       gbt       bag
nn   1.000000  0.956668  0.972124
gbt  0.956668  1.000000  0.981021
bag  0.972124  0.981021  1.000000


In [30]:
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.model_selection import RandomizedSearchCV
 
param_dist = {'max_features' : ['sqrt','log2',1.0],
  'min_samples_leaf' : [1, 2, 3, 7, 11],
  'n_estimators': [50, 100],
  'oob_score': [True, False]}
 
pre_gs_inst = RandomizedSearchCV(ExtraTreesRegressor(warm_start=True,bootstrap=True,random_state=7),
  param_distributions = param_dist,
  cv=3,
  n_iter = 15,random_state=7)
 
pre_gs_inst.fit(preds_df.values, y_stack)

pre_gs_inst.best_params_

{'max_features': 1.0,
 'min_samples_leaf': 11,
 'n_estimators': 100,
 'oob_score': False}

In [31]:
final_etr = ExtraTreesRegressor(**{'max_features': 1.0,
  'min_samples_leaf': 11,
  'n_estimators': 3000,
  'oob_score': False, 'random_state':7}).fit(preds_df.values, y_stack)

In [32]:
from sklearn.model_selection import cross_val_score

cross_val_score(final_etr, preds_df.values, y_stack, cv=3).mean()

0.8230070770139922

In [33]:
y_pred = predict_from_X_set(X_test_prin)
 
from sklearn.metrics import r2_score, mean_absolute_error
 
print("R-squared",r2_score(y_test_prin, y_pred))
print("MAE : ",mean_absolute_error(y_test_prin, y_pred))
print("MAPE : ",(np.abs(y_test_prin- y_pred)/y_test_prin).mean())

R-squared 0.84151210593999
MAE :  0.30161814300186257
MAPE :  0.167117001053763


In [34]:
def handle_X_set_sp(X_train_set_in):
    X_train_set = X_train_set_in.copy()
    
    y_pred_nn = neural_net.predict(X_train_set)
    y_pred_gbt = gbt.predict(X_train_set)
    y_pred_bag = bag_gbm.predict(X_train_set)
    
    #only change in function: include input vectors in training dataframe
    preds_df = pd.DataFrame(X_train_set, columns = cali_housing.feature_names)
    
    preds_df['nn'] = y_pred_nn
    preds_df['gbt'] = y_pred_gbt
    preds_df['bag'] = y_pred_bag
 
    return preds_df

def predict_from_X_set_sp(X_train_set_in):
    X_train_set = X_train_set_in.copy()

    #change final estimator's name to final_etr_sp and use handle_X_set_sp within this function
    return final_etr_sp.predict(handle_X_set_sp(X_train_set))

In [35]:
preds_df_sp = handle_X_set_sp(X_stack)
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.model_selection import RandomizedSearchCV

param_dist = {'max_features' : ['sqrt','log2',1.0],
 'min_samples_leaf' : [1, 2, 3, 7, 11],
 'n_estimators': [50, 100],
 'oob_score': [True, False]}

pre_gs_inst_2 = RandomizedSearchCV(ExtraTreesRegressor(warm_start=True,bootstrap=True,random_state=7),
 param_distributions = param_dist,
 cv=3,
 n_iter = 15,random_state=7)

pre_gs_inst_2.fit(preds_df_sp.values, y_stack)
pre_gs_inst_2.best_params_

{'max_features': 'log2',
 'min_samples_leaf': 1,
 'n_estimators': 50,
 'oob_score': True}

In [36]:
final_etr_sp = ExtraTreesRegressor(**{'max_features': 'log2',
 'min_samples_leaf': 2,
 'n_estimators': 3000,
 'oob_score': False,'random_state':7}).fit(preds_df_sp.values, y_stack)

In [37]:
from sklearn.model_selection import cross_val_score
cross_val_score(final_etr_sp, preds_df_sp.values, y_stack, cv=3).mean()

0.8316741223480201

In [38]:
y_pred = predict_from_X_set_sp(X_test_prin)

from sklearn.metrics import r2_score, mean_absolute_error

print("R-squared",r2_score(y_test_prin, y_pred))
print("MAE : ",mean_absolute_error(y_test_prin, y_pred))
print("MAPE : ",(np.abs(y_test_prin- y_pred)/y_test_prin).mean())

R-squared 0.8480678238895286
MAE :  0.29327627005582657
MAPE :  0.16167257265737162


### Creating a simple estimator 

In [39]:
import numpy as np
import pandas as pd

from sklearn.datasets import load_breast_cancer

bc = load_breast_cancer() 

new_feature_names = ['_'.join(ele.split()) for ele in bc.feature_names]

X = pd.DataFrame(bc.data,columns = new_feature_names)
y = bc.target

In [40]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=7, stratify = y)

In [41]:
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.linear_model import Ridge
 
class RidgeClassifier(BaseEstimator, ClassifierMixin):
    
    """A Classifier made from Ridge Regression"""
    
    def __init__(self,alpha=0):
        self.alpha = alpha
        
    def fit(self, X, y = None):
        #pass along the alpha parameter to the internal ridge estimator and perform a fit using it
        self.ridge_regressor = Ridge(alpha = self.alpha) 
        self.ridge_regressor.fit(X, y)
        
        #save the seen class labels
        self.class_labels = np.unique(y)
        
        return self
    
    def predict(self, X_test):
        #store the results of the internal ridge regressor estimator
        results = self.ridge_regressor.predict(X_test)
        
        #find the nearest class label
        return np.array([self.class_labels[np.abs(self.class_labels - x).argmin()] for x in results])

In [42]:
r_classifier = RidgeClassifier(1.5)     
r_classifier.fit(X_train, y_train)
r_classifier.score(X_test, y_test)

0.9574468085106383

In [43]:
from sklearn.model_selection import GridSearchCV

param_grid = {'alpha': [0,0.5,1.0,1.5,2.0]}
gs_rc = GridSearchCV(RidgeClassifier(), param_grid, cv = 3).fit(X_train, y_train)

gs_rc.grid_scores_



[mean: 0.94751, std: 0.00399, params: {'alpha': 0},
 mean: 0.95801, std: 0.01010, params: {'alpha': 0.5},
 mean: 0.96063, std: 0.01140, params: {'alpha': 1.0},
 mean: 0.96063, std: 0.01140, params: {'alpha': 1.5},
 mean: 0.96063, std: 0.01140, params: {'alpha': 2.0}]

In [44]:
r_classifier.score(X_test, y_test)

0.9574468085106383

In [45]:
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression()
lr.fit(X_train,y_train)
lr.score(X_test,y_test)

0.9521276595744681

In [46]:
import statsmodels.api as sm
import statsmodels.formula.api as smf

from sklearn.base import BaseEstimator, ClassifierMixin


from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.linear_model import Ridge

class GEEClassifier(BaseEstimator, ClassifierMixin):
    
    """A Classifier made from statsmodels' Generalized Estimating Equations
    
    documentation available at: http://www.statsmodels.org/dev/gee.html
       
    """
    
    def __init__(self,group_by_feature):
        self.group_by_feature = group_by_feature
          
    def fit(self, X, y = None):
        #Same settings as the documentation's example: 
        self.fam = sm.families.Poisson()
        self.ind = sm.cov_struct.Exchangeable()
        
        #Auxiliary function: only used in this method within the class
        def expand_X(X, y, desired_group): 
            X_plus = X.copy()
            X_plus['y'] = y
    
            #roughly make ten groups
            X_plus[desired_group + '_group'] = (X_plus[desired_group] * 10)//10
    
            return X_plus
        
        #save the seen class labels
        self.class_labels = np.unique(y)
        
        dataframe_feature_names = X.columns
        not_group_by_features = [x for x in dataframe_feature_names if x != self.group_by_feature]
        
        formula_in = 'y ~ ' + ' + '.join(not_group_by_features)
        
        data = expand_X(X,y,self.group_by_feature)
        self.mod = smf.gee(formula_in, 
                           self.group_by_feature + "_group", 
                           data, 
                           cov_struct=self.ind, 
                           family=self.fam)
        
        self.res = self.mod.fit()
        
        return self
    
    def predict(self, X_test):
        #store the results of the internal GEE regressor estimator
        results = self.res.predict(X_test)
        
        #find the nearest class label
        return np.array([self.class_labels[np.abs(self.class_labels - x).argmin()] for x in results])
        
    def print_fit_summary(self):
        print(res.summary())
        return self

  from pandas.core import datetools


In [47]:
gee_classifier = GEEClassifier('mean_concavity')     
gee_classifier.fit(X_train, y_train)
gee_classifier.score(X_test, y_test)

0.9468085106382979

In [48]:
import numpy as np
import pandas as pd

data_web_address = "e:\Create Your Own Sophisticated Model with Neural Networks\Section 04\pima-indians-diabetes.data"

column_names = ['pregnancy_x', 
                'plasma_con', 
                'blood_pressure', 
                'skin_mm', 
                'insulin', 
                'bmi', 
                'pedigree_func', 
                'age', 
                'target']

feature_names = column_names[:-1]
all_data = pd.read_csv(data_web_address , names=column_names)


X = all_data[feature_names]
y = all_data['target']


from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=7,stratify=y)

In [49]:
gee_classifier = GEEClassifier('blood_pressure')     
gee_classifier.fit(X_train, y_train)
gee_classifier.score(X_test, y_test)

0.8051948051948052

In [50]:
r_classifier = RidgeClassifier()
r_classifier.fit(X_train, y_train)
r_classifier.score(X_test, y_test)

0.7662337662337663

In [51]:
import pickle

f = open('rc_inst.save','wb')
pickle.dump(r_classifier, f, protocol = pickle.HIGHEST_PROTOCOL)
f.close()

In [52]:
import pickle

f = open('rc_inst.save','rb')
r_classifier = pickle.load(f)
f.close()

In [53]:
r_classifier.fit(X_train, y_train)
r_classifier.score(X_test, y_test)

0.7662337662337663