<a href="https://colab.research.google.com/github/Marcel-Milosz/Dane/blob/Code-Review/Milosz_Marcel_07_niestacj.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

from sklearn.pipeline import Pipeline
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import SGDClassifier
from sklearn.svm import SVC

from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import make_scorer
from sklearn.metrics import classification_report

seed = np.random.seed(15)
train_X = pd.read_csv("/content/drive/MyDrive/Projekt_ML_26.06.2021/train_data.csv", header=None)
train_label_df = pd.read_csv("/content/drive/MyDrive/Projekt_ML_26.06.2021/train_labels.csv", header=None)
train_label = train_label_df[0].values

X_train, X_test, y_train, y_test = train_test_split(train_X, train_label, test_size=0.97, shuffle=True, random_state=seed)

# Standard Scaaler
scaler = StandardScaler().fit(X_train)
X_train_std = scaler.transform(X_train)
X_test_std = scaler.transform(X_test)

pca = PCA(n_components=2, whiten=True, random_state=seed)
X_train_std_pca = pca.fit_transform(X_train_std)
X_test_std_pca = pca.fit_transform(X_test_std)

# LogisticRegression
log_reg_model = LogisticRegression()
sgd_model = SGDClassifier()
svc_model = SVC()

pipe = Pipeline([("classifier", SVC())])

SearchS = [
    {"classifier": [log_reg_model], "classifier__solver": ['lbfgs', 'sag', 'saga'], "classifier__penalty": ['l1', 'l2', 'elasticnet', None], "classifier__class_weight": ["balanced", None], "classifier__C": np.logspace(0, 4, 10), "classifier__multi_class": ['ovr']},
    {"classifier": [sgd_model],"classifier__penalty": ['l1', 'l2', 'elasticnet'],"classifier__class_weight": [None, "balanced"],"classifier__alpha": [1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1], "classifier__loss": ['hinge', 'log', 'perceptron']},
    {"classifier": [svc_model],"classifier__kernel":["linear", "rbf", "poly"],"classifier__class_weight": [None, "balanced"],"classifier__gamma": [1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1], "classifier__C": np.logspace(0, 4, 10)},
]

# StratifiedKFold
skf = StratifiedKFold(n_splits=3, random_state=seed, shuffle=True)
gridsearch = GridSearchCV(pipe, SearchS, cv=skf, verbose=0, n_jobs=-1)
best_model = gridsearch.fit(X_train_std_pca, y_train)

print(pca.explained_variance_ratio_)
print(pca.singular_values_)
print(best_model.best_estimator_.get_params()["classifier"])

gridsearch.best_params_
gridsearch.best_score_

model = LogisticRegression(C=1, class_weight=None, multi_class='ovr', penalty='l1', solver='saga')
model.fit(X_train_std_pca, y_train)
preds = model.predict(X_test_std_pca)

print(classification_report(y_test, preds))

[0.00066959 0.00066762]
[158.19854389 157.96561516]
SVC(C=1291.5496650148827, break_ties=False, cache_size=200, class_weight=None,
    coef0=0.0, decision_function_shape='ovr', degree=3, gamma=0.1, kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)
              precision    recall  f1-score   support

          -1       0.00      0.00      0.00       362
           1       0.90      1.00      0.95      3276

    accuracy                           0.90      3638
   macro avg       0.45      0.50      0.47      3638
weighted avg       0.81      0.90      0.85      3638



  _warn_prf(average, modifier, msg_start, len(result))
