In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.simplefilter('ignore', DeprecationWarning)
%matplotlib inline 
%load_ext memory_profiler
from sklearn.metrics import make_scorer
from scipy.special import expit
import time
import math
import random
from memory_profiler import memory_usage
from sklearn import metrics as mt
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import StratifiedKFold



from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV

target_classifier = 'PC'
df = pd.read_csv('responses.csv', sep=",")

The memory_profiler extension is already loaded. To reload it, use:
  %reload_ext memory_profiler


In [8]:
# remove rows whose target classfier value is NaN
df_cleaned_classifier = df[np.isfinite(df[target_classifier])]
# change NaN number values to the mean
df_imputed = df_cleaned_classifier.fillna(df.mean())
# get categorical features
object_features = list(df_cleaned_classifier.select_dtypes(include=['object']).columns)
# one hot encode categorical features
one_hot_df = pd.concat([pd.get_dummies(df_imputed[col],prefix=col) for col in object_features], axis=1)
# drop object features from imputed dataframe
df_imputed_dropped = df_imputed.drop(object_features, 1)
frames = [df_imputed_dropped, one_hot_df]
# concatenate both frames by columns
df_fixed = pd.concat(frames, axis=1)

In [9]:
# Research on Cost Matrix
# http://www.ibm.com/support/knowledgecenter/SSEPGG_11.1.0/com.ibm.im.model.doc/c_cost_matrix.html

cost_matrix = np.matrix([[0,1,2,3,4],
[1,0,1,2,3],
[3,1,0,1,2],
[5,3,1,0,1],
[7,5,2,1,0]])

def get_confusion_costTot(confusion_matrix, cost_matrix):
    score = np.sum(confusion_matrix*cost_matrix)
    return score

confusion_scorer = make_scorer(get_confusion_costTot, greater_is_better=False)
confusion_scorer

make_scorer(get_confusion_costTot, greater_is_better=False)

In [10]:
from sklearn.model_selection import ShuffleSplit

# we want to predict the X and y data as follows:
if target_classifier in df_fixed:
    y = df_fixed[target_classifier].values # get the labels we want
    del df_fixed[target_classifier] # get rid of the class label
    X = df_fixed.values # use everything else to predict!

X = X/5
num_folds = 10

cv_object = StratifiedKFold(n_splits= num_folds, random_state=None, shuffle=True)
cv_object.split(X,y)

print(cv_object)

StratifiedKFold(n_splits=10, random_state=None, shuffle=True)


In [11]:
for train_indices, test_indices in cv_object.split(X,y): 
        # I will create new variables here so that it is more obvious what 
        # the code is doing (you can compact this syntax and avoid duplicating memory,
        # but it makes this code less readable)
        X_train = (X[train_indices])
        y_train = y[train_indices]

    #     print(X_train)
    #     print(y_train)

        X_test = (X[test_indices])
        y_test = y[test_indices]

In [30]:



clf = MLPClassifier(hidden_layer_sizes=(50, ), 
                            activation='relu', # type of non-linearity, every layer
                            solver='sgd', 
                            alpha=1e-4, # L2 penalty
                            batch_size= 'auto', # min of 200, num_samples
                            learning_rate='constant', # adapt learning? only for sgd
                            learning_rate_init=0.1, # only SGD
                            power_t=0.0,    # only SGD with inverse scaling of learning rate
                            max_iter=75, # stopping criteria
                            shuffle=True, 
                            random_state=1, 
                            tol=0, # for stopping
                            verbose=False, 
                            warm_start=False, 
                            momentum=0.9, # only SGD
                            nesterovs_momentum=False, # only SGD
                            early_stopping=False, 
                            validation_fraction=0.0, # only if early_stop is true
                            beta_1=0.9, # adam decay rate of moment
                            beta_2=0.999, # adam decay rate of moment
                            epsilon=1e-08) # adam numerical stabilizer
    



In [31]:
from sklearn.ensemble import BaggingClassifier

class MyEnsemble():
    
    def __init__(self, c, num_c, max_s, v):
        self.Ensemble = BaggingClassifier(base_estimator= c,
                                    n_estimators = num_c,
                                     max_samples = max_s,
                                     verbose = v)
    def predict(self, X):
        return self.Ensemble.predict(X)
    
    def fit(self, X,y):
        self.Ensemble.fit(X,y)
        
    def predict_proba(self, X):
        return self.Ensemble.predict_proba(X)

In [45]:
num_instances = 20


ensemble = MyEnsemble(clf, num_instances,100,False)

ensemble.fit(X_train,y_train)
y_hat=ensemble.predict(X_test)
print(y_hat)


print(X_train.shape)

[ 4.  4.  4.  4.  3.  4.  4.  4.  4.  4.  3.  4.  4.  4.  4.  4.  3.  4.
  4.  4.  4.  3.  4.  4.  4.  3.  4.  4.  4.  3.  4.  4.  4.  4.  3.  4.
  4.  3.  3.  3.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  4.  3.  4.  4.
  4.  4.  4.  3.  4.  3.  4.  4.  4.  4.  4.  3.  4.  3.  4.  3.  4.  3.
  4.  3.  4.  4.  3.  4.  4.  4.  4.  4.  4.  4.  4.  3.  4.  4.  4.  4.
  4.  4.  4.  4.  4.  4.  4.  4.]
(906, 172)


In [42]:
ensemble.predict_proba(X_test)

array([[ 0.12103032,  0.20351941,  0.26594029,  0.20537988,  0.2041301 ],
       [ 0.12101987,  0.20351702,  0.26593818,  0.20539675,  0.20412817],
       [ 0.12103032,  0.20351941,  0.26594029,  0.20537988,  0.2041301 ],
       [ 0.12103032,  0.20351941,  0.26594029,  0.20537988,  0.2041301 ],
       [ 0.12103032,  0.20351941,  0.26594029,  0.20537988,  0.2041301 ],
       [ 0.12103032,  0.20351941,  0.26594029,  0.20537988,  0.2041301 ],
       [ 0.12103032,  0.20351941,  0.26594029,  0.20537988,  0.2041301 ],
       [ 0.12103032,  0.20351941,  0.26594029,  0.20537988,  0.2041301 ],
       [ 0.12103032,  0.20351941,  0.26594029,  0.20537988,  0.2041301 ],
       [ 0.12103032,  0.20351941,  0.26594029,  0.20537988,  0.2041301 ],
       [ 0.12103032,  0.20351941,  0.26594029,  0.20537988,  0.2041301 ],
       [ 0.12103032,  0.20351941,  0.26594029,  0.20537988,  0.2041301 ],
       [ 0.12103032,  0.20351941,  0.26594029,  0.20537988,  0.2041301 ],
       [ 0.12103032,  0.20351941,  0.2

In [29]:
 acc = mt.accuracy_score(y_test,y_hat)
            #         lr_clf_accuracies.append(acc)
            #         cost_accuracies.append([acc])

conf = mt.confusion_matrix(y_test,y_hat)
print(conf)

[[ 0 13  0  0  0]
 [ 0 20  0  0  0]
 [ 0 25  0  0  0]
 [ 0 20  0  0  0]
 [ 0 20  0  0  0]]


In [None]:
confidence = ensemble.

In [18]:
score = get_confusion_costTot(conf, cost_matrix)

In [19]:
score

980

In [None]:
# Great! Its seems we are also maxing out the F1 score, lets go with C==1e-3
from sklearn.metrics import roc_curve, auc
from scipy import interp

K = 4

if sklearn_version < '0.18':
    from sklearn.cross_validation import StratifiedKFold
    kfold = StratifiedKFold(y=y_train, 
                            n_folds=K,
                            random_state=1)
else:
    from sklearn.model_selection import StratifiedKFold
    kfold = StratifiedKFold(n_splits=K,
                            random_state=1).split(X_train, y_train)


mean_tpr = 0.0
mean_fpr = np.linspace(0, 1, 100)
all_tpr = []

for i, (train, test) in enumerate(kfold):
    probas = pipe_lr.fit(X_train[train],
                         y_train[train]).predict_proba(X_train[test])

    perclass_mean_tpr = 0.0
    roc_auc = 0
    classes = np.unique(y_train[train])
    # get the mean fpr and tpr, per class
    for j in classes:
        fpr, tpr, thresholds = roc_curve(y_train[test],
                                         probas[:, j],
                                         pos_label=j)
        perclass_mean_tpr += interp(mean_fpr, fpr, tpr)
        perclass_mean_tpr[0] = 0.0
        roc_auc += auc(fpr, tpr)
        
    perclass_mean_tpr /= len(classes)
    roc_auc /= len(classes)
    mean_tpr += perclass_mean_tpr
    plt.plot(mean_fpr,perclass_mean_tpr,'--',lw=1,label='Mean Class ROC fold %d (area = %0.2f)'
                   % (i+1, roc_auc))
    
mean_tpr /= K
mean_auc = auc(mean_fpr, mean_tpr)
plt.plot(mean_fpr,perclass_mean_tpr,'k-',lw=2,label='Total Mean ROC (area = %0.2f)'
                   % (mean_auc))
plt.legend(loc='best')
plt.grid()