In [1]:
import warnings
from matplotlib import MatplotlibDeprecationWarning
from scipy.linalg import LinAlgWarning
from sklearn.exceptions import ConvergenceWarning
from sklearn.exceptions import FitFailedWarning

warnings.filterwarnings(action='ignore', category=ConvergenceWarning)
warnings.filterwarnings(action='ignore', category=FitFailedWarning)
warnings.filterwarnings(action='ignore', category=FutureWarning)
warnings.filterwarnings(action='ignore', category=LinAlgWarning)
warnings.filterwarnings(action='ignore', category=MatplotlibDeprecationWarning)
warnings.filterwarnings(action='ignore', category=UserWarning)

# Wielowarstwowa sieć neuronowa

(*Multilayer perceptron*, *feedforward neural network*)



**Uwaga:** "Input layer" pomimo tego, że ma w nazwie słowo "warstwa", to tak naprawdę to nie jest żadna warstwa sieci... To są po prostu dane wejściowe... Niestety przyjęło się literaturze nazywanie tego w ten sposób, co jest mylące :(


Sieci uczy sie metodą spadku gradientu (pewnymi wariantami tej metody). Uczenie wykorzystuje algorytm **propagacji wstecznej** (https://en.wikipedia.org/wiki/Backpropagation).

<br>

<br>

<br>

**Uwaga!** Sieci neuronowe absolutnie zawsze wymagają zestandaryzowanych danych! Niezależnie od tego czy wykorzystujemy regularyzację czy nie i niezależnie od typu sieci!

<br>

<br>

### Fakt matematyczny: jednowarstwową siecią możemy otrzymać dowolny kształt. 

Co z tego wynika? To, że (teoretycznie) zawsze wystarczy sieć jednowarstwowa (odpowiednio duża). W praktyce rzeczywiście z reguły wystarcza jedna warstwa, ale mimo wszystko zawsze warto sprawdzić czy 2 (lub 3) nie zadziałają przypadkiem lepiej. Przy czym jeżeli dla dwóch wartsw jest gorzej, to nie ma sensu sprawdzać dla większej ilości.

In [2]:
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score, accuracy_score


from sklearn.linear_model import LogisticRegression 
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis

# Zad
* Wczytaj zbiór danych
* Podziel dane na train test
* Wykonaj uczenie modeli (dobierz najlepsze parametry)
    * LogisticRegression
    * LinearSVC
    * SVC
    * KNeighborsClassifier
    * DecisionTreeClassifier
    * RandomForestClassifier
    * BaggingClassifier
    * ExtraTreesClassifier
    * AdaBoostClassifier
    * GradientBoostingClassifier
    * VotingClassifier
    * xgboost.XGBClassifier
* Porównaj wyniki na zbiorze uczącym    

In [3]:
dataset = np.loadtxt('data/pima-indians-diabetes.csv', delimiter=",")

X = dataset[:,0:8]
Y = dataset[:,8]

print(X.shape)
print(np.mean(Y))

seed = 7
test_size = 0.33
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size, random_state=seed, shuffle=True)

from sklearn.model_selection import StratifiedKFold

seed=123
kfold = StratifiedKFold(n_splits=5, random_state=seed, shuffle=True)

(768, 8)
0.3489583333333333


In [4]:
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import ExtraTreeClassifier
from xgboost import XGBClassifier

In [5]:
pipe = Pipeline([('preprocessing', StandardScaler()), ('classifier', LinearSVC(C=1))])

param_grid = {
            'preprocessing': [StandardScaler(), None],
            'classifier__C': [0.001, 0.01, 0.1, 1, 10, 100]
}

grid_1 = GridSearchCV(pipe, param_grid, cv=kfold, return_train_score=True)

grid_1.fit(X_train, y_train)
grid_1.best_params_

{'classifier__C': 0.01, 'preprocessing': StandardScaler()}

In [6]:
pipe = Pipeline([('preprocessing', StandardScaler()), ('classifier', SVC(C=1, kernel='rbf', gamma=0.001))])

param_grid = {
            'preprocessing': [StandardScaler(), None],
            'classifier__C': [0.001, 0.01, 0.1, 1, 10, 100],
            'classifier__gamma': [0.001, 0.01, 0.1, 1, 10, 100]
}

grid_2 = GridSearchCV(pipe, param_grid, cv=kfold, return_train_score=True)

grid_2.fit(X_train, y_train)
grid_2.best_params_

{'classifier__C': 10,
 'classifier__gamma': 0.001,
 'preprocessing': StandardScaler()}

In [7]:
pipe = Pipeline([('preprocessing', StandardScaler()), ('classifier', LogisticRegression(penalty='l2', C=1))])

param_grid = {
            'preprocessing': [StandardScaler(), None],
            'classifier__C': [0.001, 0.01, 0.1, 1, 10, 100],
            'classifier__penalty': ['l1', 'l2', 'elasticnet']
}

grid_3 = GridSearchCV(pipe, param_grid, cv=kfold, return_train_score=True)

grid_3.fit(X_train, y_train)
grid_3.best_params_

{'classifier__C': 100, 'classifier__penalty': 'l2', 'preprocessing': None}

In [8]:
pipe = Pipeline([('preprocessing', StandardScaler()), ('classifier', KNeighborsClassifier(n_neighbors=1))])

param_grid = {
            'preprocessing': [StandardScaler(), None],
            'classifier__n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
}

grid_4 = GridSearchCV(pipe, param_grid, cv=kfold, return_train_score=True)

grid_4.fit(X_train, y_train)
grid_4.best_params_

{'classifier__n_neighbors': 9, 'preprocessing': None}

In [9]:
pipe = Pipeline([('preprocessing', StandardScaler()), ('classifier', DecisionTreeClassifier(max_depth=1, max_features=1, min_samples_leaf=1, min_samples_split=1))])

param_grid = {
            'preprocessing': [StandardScaler(), None],
            'classifier__criterion': ['gini', 'entropy'],
            'classifier__max_depth': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
            'classifier__max_features': [None, 'auto', 'log2', 'sqrt'],
            'classifier__min_samples_leaf': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
            'classifier__min_samples_split': [2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
}

grid_5 = GridSearchCV(pipe, param_grid, cv=kfold, return_train_score=True)

grid_5.fit(X_train, y_train)
grid_5.best_params_

{'classifier__criterion': 'entropy',
 'classifier__max_depth': 5,
 'classifier__max_features': 'log2',
 'classifier__min_samples_leaf': 9,
 'classifier__min_samples_split': 9,
 'preprocessing': None}

In [None]:
pipe = Pipeline([('preprocessing', StandardScaler()), ('classifier', RandomForestClassifier(n_estimators=1, max_depth=1, max_features=1, min_samples_leaf=1, min_samples_split=1))])

param_grid = {
            'preprocessing': [StandardScaler(), None],
            'classifier__criterion': ['gini', 'entropy'],
            'classifier__max_depth': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
            'classifier__max_features': [None, 'auto', 'log2', 'sqrt'],
            'classifier__min_samples_leaf': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
            'classifier__min_samples_split': [2, 3, 4, 5, 6, 7, 8, 9, 10, 11],
            'classifier__n_estimators': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
}

grid_6 = GridSearchCV(pipe, param_grid, cv=kfold, return_train_score=True)

grid_6.fit(X_train, y_train)
grid_6.best_params_

In [None]:
pipe = Pipeline([('preprocessing', StandardScaler()), ('classifier', BaggingClassifier(base_estimator=DecisionTreeClassifier(criterion='entropy', max_depth=1, random_state=0, max_features=None, min_samples_leaf=1, min_samples_split=2), n_estimators=10, random_state=0))])

param_grid = {
            'preprocessing': [StandardScaler(), None],
            'classifier__base_estimator__criterion': ['gini', 'entropy'],
            'classifier__base_estimator__max_depth': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
            'classifier__base_estimator__max_features': [None, 'auto', 'log2', 'sqrt'],
            'classifier__base_estimator__min_samples_leaf': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
            'classifier__base_estimator__min_samples_split': [2, 3, 4, 5, 6, 7, 8, 9, 10, 11],
            'classifier__n_estimators': [1, 10, 1007
}

grid_7 = GridSearchCV(pipe, param_grid, cv=kfold, return_train_score=True)

grid_7.fit(X_train, y_train)
grid_7.best_params_

In [None]:
pipe = Pipeline([('preprocessing', StandardScaler()), ('classifier', ExtraTreeClassifier(max_depth=1, max_features=1, min_samples_leaf=1, min_samples_split=1))])

param_grid = {
            'preprocessing': [StandardScaler(), None],
            'classifier__criterion': ['gini', 'entropy'],
            'classifier__max_depth': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
            'classifier__max_features': [None, 'auto', 'log2', 'sqrt'],
            'classifier__min_samples_leaf': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
            'classifier__min_samples_split': [2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
}

grid_8 = GridSearchCV(pipe, param_grid, cv=kfold, return_train_score=True)

grid_8.fit(X_train, y_train)
grid_8.best_params_

In [None]:
pipe = Pipeline([('preprocessing', StandardScaler()), ('classifier', AdaBoostClassifier(base_estimator=DecisionTreeClassifier(criterion='entropy', max_depth=1, random_state=0, max_features=None, min_samples_leaf=1, min_samples_split=2), n_estimators=10, random_state=0))])

param_grid = {
            'preprocessing': [StandardScaler(), None],
            'classifier__base_estimator__criterion': ['gini', 'entropy'],
            'classifier__base_estimator__max_depth': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
            'classifier__base_estimator__max_features': [None, 'auto', 'log2', 'sqrt'],
            'classifier__base_estimator__min_samples_leaf': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
            'classifier__base_estimator__min_samples_split': [2, 3, 4, 5, 6, 7, 8, 9, 10, 11],
            'classifier__n_estimators': [1, 10, 100]
}

grid_9 = GridSearchCV(pipe, param_grid, cv=kfold, return_train_score=True)

grid_9.fit(X_train, y_train)
grid_9.best_params_

In [None]:
pipe = Pipeline([('preprocessing', StandardScaler()), ('classifier', GradientBoostingClassifier(learning_rate=0.1, max_depth=1, max_features=1, min_samples_leaf=1, min_samples_split=1))])

param_grid = {
            'preprocessing': [StandardScaler(), None],
            'classifier__criterion': ['friedman_mse', 'mse', 'mae'],
            'classifier__max_depth': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
            'classifier__max_features': [None, 'auto', 'log2', 'sqrt'],
            'classifier__min_samples_leaf': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
            'classifier__min_samples_split': [2, 3, 4, 5, 6, 7, 8, 9, 10, 11],
            'classifier__n_estimators': [1, 10, 100]
}

grid_10 = GridSearchCV(pipe, param_grid, cv=kfold, return_train_score=True)

grid_10.fit(X_train, y_train)
grid_10.best_params_

In [None]:
pipe = Pipeline([('preprocessing', StandardScaler()), ('classifier', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1, colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0, max_depth=1, min_child_weight=1, missing=None, n_estimators=10, n_jobs=1, nthread=-1, objective='binary:logistic', random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=0, silent=True, subsample=1))])

param_grid = {
            'preprocessing': [StandardScaler(), None],
            'classifier__base_score': [0.5],
            'classifier__booster': ['gbtree'],
            'classifier__colsample_bylevel': [1],
            'classifier__colsample_bytree': [1],
            'classifier__gamma': [0],
            'classifier__learning_rate': [0.1],
            'classifier__max_delta_step': [0],
            'classifier__max_depth': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
            'classifier__min_child_weight': [1],
            'classifier__n_estimators': [1, 10, 100],
            'classifier__subsample': [1]
}

grid_11 = GridSearchCV(pipe, param_grid, cv=kfold, return_train_score=True)

grid_11.fit(X_train, y_train)
grid_11.best_params_

In [None]:
pipe = Pipeline([('preprocessing', StandardScaler()), ('classifier', XGBClassifier(nthread=-1,base_score=0.5, booster='gbtree', colsample_bylevel=1, colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0, max_depth=1, min_child_weight=1, missing=None, n_estimators=10, n_jobs=1, objective='reg:squarederror', random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=0, silent=True, subsample=1))])

param_grid = {
            'preprocessing': [StandardScaler(), None],
            'classifier__base_score': [0.5],
            'classifier__booster': ['gbtree'],
            'classifier__colsample_bylevel': [1],
            'classifier__colsample_bytree': [1],
            'classifier__gamma': [0],
            'classifier__learning_rate': [0.1],
            'classifier__max_delta_step': [0],
            'classifier__max_depth': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
            'classifier__min_child_weight': [1],
            'classifier__n_estimators': [1, 10, 100],
            'classifier__subsample': [1]
}

grid_12 = GridSearchCV(pipe, param_grid, cv=kfold, return_train_score=True)

grid_12.fit(X_train, y_train)
grid_12.best_params_

In [None]:
from sklearn.ensemble import VotingClassifier

voting_clf = VotingClassifier(estimators=[('rf', grid_1.best_estimator_),
                                          ('et', grid_2.best_estimator_),
                                          ('ada', grid_3.best_estimator_),
                                          ('gb', grid_4.best_estimator_),
                                          ('xgb', grid_5.best_estimator_),
                                          ('knn', grid_6.best_estimator_),
                                          ('lr', grid_7.best_estimator_),
                                          ('svm', grid_8.best_estimator_),
                                          ('gbc', grid_9.best_estimator_),
                                          ('xgbc', grid_10.best_estimator_),
                                          ('xgbr', grid_11.best_estimator_),
                                          ('xgbrf', grid_12.best_estimator_)],
                              voting='soft',
                              weights=[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [None]:
from sklearn import  metrics

models = []
models.append(('SVM linear', grid_1.best_estimator_))
models.append(('SVM rbf', grid_2.best_estimator_))
models.append(('LR', grid_3.best_estimator_))
models.append(('KNN', grid_4.best_estimator_))
models.append(('DecisionTreeClassifier', grid_5.best_estimator_))
models.append(('BaggingClassifier', grid_6.best_estimator_))
models.append(('RandomForestClassifier', grid_7.best_estimator_))
models.append(('ExtraTreesClassifier', grid_8.best_estimator_))
models.append(('AdaBoostClassifier', grid_9.best_estimator_))
models.append(('GradientBoostingClassifier', grid_10.best_estimator_))
models.append(('XGBClassifier', grid_11.best_estimator_))
models.append(('XGBClassifier r2', grid_12.best_estimator_))
models.append(('voting_clf', voting_clf))

precision_score = []
recall_score = []
f1_score = []
accuracy_score = []
roc_auc_score = []
for name, model in models:
    print(name)
    print("precision_score: {}".format(metrics.precision_score(y_test , model.predict(X_test)) ))
    print("recall_score: {}".format( metrics.recall_score(y_test , model.predict(X_test)) ))
    print("f1_score: {}".format( metrics.f1_score(y_test , model.predict(X_test)) ))
    print("accuracy_score: {}".format( metrics.accuracy_score(y_test , model.predict(X_test)) ))
    
    if (name == 'SVM linear'):
        print("roc_auc_score: {}".format( metrics.roc_auc_score(y_test , model.decision_function(X_test)) ))            
    else:
        print("roc_auc_score: {}".format( metrics.roc_auc_score(y_test , model.predict_proba(X_test)[:,1]) ))
    
    precision_score.append(metrics.precision_score(y_test , model.predict(X_test)))
    recall_score.append(metrics.recall_score(y_test , model.predict(X_test)))
    f1_score.append( metrics.f1_score(y_test , model.predict(X_test)))
    accuracy_score.append(metrics.accuracy_score(y_test , model.predict(X_test)))
    if (name == 'SVM linear'):
        roc_auc_score.append(metrics.roc_auc_score(y_test , model.decision_function(X_test)))        
    else:    
        roc_auc_score.append(metrics.roc_auc_score(y_test , model.predict_proba(X_test)[:,1]))

In [None]:
import pandas as pd
d = {'precision_score': precision_score, 
     'recall_score': recall_score, 
     'f1_score': f1_score,
     'accuracy_score' : accuracy_score,
     'roc_auc_score' : roc_auc_score
    }
df = pd.DataFrame(data=d)
df.insert(loc=0, column='Method', value=['SVM linear'])#,'SVM rbf','LR','KNN', 'DecisionTreeClassifier','BaggingClassifier','RandomForestClassifier','ExtraTreesClassifier', 'AdaBoostClassifier','GradientBoostingClassifier','XGBClassifier','XGBClassifier r', 'voting'])
df

# MLPClassifier

Dodajmy model sieci neuronowej

In [None]:
from sklearn.neural_network import MLPClassifier

In [None]:
model = MLPClassifier((20,10))
model.fit(X_train, y_train)

y_pred = model.predict_proba(X_test)[:,1]
predictions = y_pred.round()

accuracy = metrics.accuracy_score(y_test, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0), "AUC: ", metrics.roc_auc_score(y_score=y_pred,y_true=y_test))

# Zad
Wykonaj Walidację krzyżową

In [None]:
pipe = Pipeline([('preprocessing', StandardScaler()), ('classifier', MLPClassifier())])

param_grid = {
            'preprocessing': [StandardScaler(), None],
            'classifier__hidden_layer_sizes': [(20,10)],
            'classifier__learning_rate_init': [0.001],#, 0.01, 0.1],
            'classifier__max_iter': [100],
            'classifier__batch_size': [8, 16,32],
}

grid_2 = GridSearchCV(pipe, param_grid, cv=kfold, return_train_score=True)

grid_2.fit(X_train, y_train)
grid_2.best_params_

In [None]:
metrics.accuracy_score(y_test, grid_2.best_estimator_.predict(X_test))

In [None]:
from sklearn import  metrics


models = []
models.append(('SVM linear', grid_1.best_estimator_))
models.append(('SVM rbf', grid_2.best_estimator_))
models.append(('LR', grid_3.best_estimator_))
models.append(('KNN', grid_4.best_estimator_))
models.append(('DecisionTreeClassifier', grid_5.best_estimator_))
models.append(('BaggingClassifier', grid_6.best_estimator_))
models.append(('RandomForestClassifier', grid_7.best_estimator_))
models.append(('ExtraTreesClassifier', grid_8.best_estimator_))
models.append(('AdaBoostClassifier', grid_9.best_estimator_))
models.append(('GradientBoostingClassifier', grid_10.best_estimator_))
models.append(('XGBClassifier', grid_11.best_estimator_))
models.append(('XGBClassifier r2', grid_12.best_estimator_))
models.append(('voting_clf', voting_clf))
models.append(('MLP', grid_14.best_estimator_))

precision_score = []
recall_score = []
f1_score = []
accuracy_score = []
roc_auc_score = []
for name, model in models:
    print(name)
    print("precision_score: {}".format(metrics.precision_score(y_test , model.predict(X_test)) ))
    print("recall_score: {}".format( metrics.recall_score(y_test , model.predict(X_test)) ))
    print("f1_score: {}".format( metrics.f1_score(y_test , model.predict(X_test)) ))
    print("accuracy_score: {}".format( metrics.accuracy_score(y_test , model.predict(X_test)) ))
    
    if (name == 'SVM linear'):
        print("roc_auc_score: {}".format( metrics.roc_auc_score(y_test , model.decision_function(X_test)) ))            
    else:
        print("roc_auc_score: {}".format( metrics.roc_auc_score(y_test , model.predict_proba(X_test)[:,1]) ))
    
    precision_score.append(metrics.precision_score(y_test , model.predict(X_test)))
    recall_score.append(metrics.recall_score(y_test , model.predict(X_test)))
    f1_score.append( metrics.f1_score(y_test , model.predict(X_test)))
    accuracy_score.append(metrics.accuracy_score(y_test , model.predict(X_test)))
    if (name == 'SVM linear'):
        roc_auc_score.append(metrics.roc_auc_score(y_test , model.decision_function(X_test)))        
    else:    
        roc_auc_score.append(metrics.roc_auc_score(y_test , model.predict_proba(X_test)[:,1]))

In [None]:
import pandas as pd
d = {'precision_score': precision_score, 
     'recall_score': recall_score, 
     'f1_score': f1_score,
     'accuracy_score' : accuracy_score,
     'roc_auc_score' : roc_auc_score
    }
df = pd.DataFrame(data=d)
df.insert(loc=0, column='Method', value=['SVM linear','SVM rbf','LR','KNN', 'DecisionTreeClassifier','BaggingClassifier','RandomForestClassifier','ExtraTreesClassifier', 'AdaBoostClassifier','GradientBoostingClassifier','XGBClassifier','XGBClassifier r', 'voting', 'MLP'])
df

# Wczytaj dane treningowe i testowe

In [None]:
# Wczytaj dane treningowe i testowe

import pandas as pd

train_set = pd.read_csv('Dane/adult/adult.data', sep=", ",header = None)
test_set = pd.read_csv('Dane/adult/adult.test', sep=", ",skiprows = 1, header = None) # Make sure to skip a row for the test set


col_labels = ['age', 'workclass', 'fnlwgt', 'education', 'education_num', 'marital_status', 'occupation', 
              'relationship', 'race', 'sex', 'capital_gain', 'capital_loss', 'hours_per_week', 'native_country',
             'wage_class']
train_set.columns = col_labels
test_set.columns = col_labels

train = train_set.replace('?', np.nan).dropna()
test = test_set.replace('?', np.nan).dropna()



dataset = pd.concat([train,test])

dataset['wage_class'] = dataset.wage_class.replace({'<=50K.': 0,'<=50K':0, '>50K.':1, '>50K':1})

dataset.drop(["fnlwgt"],axis=1,inplace=True)

dataset.drop(["education"],axis=1,inplace=True)

x = dataset.groupby('native_country')["wage_class"].mean()

d = dict(pd.cut(x[x.index!=" United-States"],5,labels=range(5)))

dataset['native_country'] = dataset['native_country'].replace(d)

dataset = pd.get_dummies(dataset,drop_first=True)

train = dataset.iloc[:train.shape[0]]
test = dataset.iloc[train.shape[0]:]

X_train = train.drop("wage_class",axis=1)
y_train = train.wage_class

X_test = test.drop("wage_class",axis=1)
y_test = test.wage_class

# from sklearn.preprocessing import StandardScaler
# sc = StandardScaler()
# X_train = sc.fit_transform(X_train)
# X_test = sc.transform(X_test)

# print(X_train.shape)
# X_test.shape

In [None]:
print(X_train.shape)
print(X_test.shape)

# Zad
Porównaj wyniki sieci na:
* oryginalnych danych 
* na wystandaryzowanych

In [None]:
model = MLPClassifier((20,10))
model.fit(X_train, y_train)

y_pred = model.predict_proba(X_test)[:,1]
predictions = y_pred.round()

accuracy = metrics.accuracy_score(y_test, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0), "AUC: ", metrics.roc_auc_score(y_score=y_pred,y_true=y_test))

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

print(X_train.shape)
X_test.shape

model = MLPClassifier((20,10))
model.fit(X_train, y_train)

y_pred = model.predict_proba(X_test)[:,1]
predictions = y_pred.round()

accuracy = metrics.accuracy_score(y_test, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0), "AUC: ", metrics.roc_auc_score(y_score=y_pred,y_true=y_test))