# Classification Models: 
### Looking to Classify the Financial Impact on Countries that take in Refugees
---
Imports

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn.metrics as metrics

from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, RandomizedSearchCV

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

from sklearn.metrics import plot_confusion_matrix, mean_squared_error, f1_score, confusion_matrix
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.decomposition import PCA

In [3]:
pd.options.display.max_columns = 999
pd.options.display.max_rows = 999

## Classification Pipelines
---

In [None]:
pipelines = [
    ('LOGREG', (Pipeline([ ('LR', LogisticRegression(max_iter=1000, random_state=42))]))),
    ('DECISION TREE', (Pipeline([ ('TREE', DecisionTreeClassifier())]))),
    ('BAGGED TREE', (Pipeline([ ('BAG', BaggingClassifier())]))),
    ('RANDOM FOREST', (Pipeline([ ('RAND', RandomForestClassifier())]))),
    ('ADABOOST', (Pipeline([ ('ADA', AdaBoostClassifier())]))),
    ('KNN', (Pipeline([ ('sc', StandardScaler()),('KNN', KNeighborsClassifier())]))),
]

# X_train, X_test, y_train, y_test = train_test_split(X, y, stratify = y, random_state=42)
for pipe_name ,model in pipelines:

    print(pipe_name)
    model.fit(Z_train, y_train)
    trainscore = model.score(Z_train, y_train)
    testscore = model.score(Z_test, y_test)
    crossval = cross_val_score(model, Z_train, y_train).mean()
    y_pred_train = model.predict(Z_train)
    y_pred_test = model.predict(Z_test)
    
    f1_train = metrics.f1_score(y_train, y_pred_train)
    f1_test = metrics.f1_score(y_test, y_pred_test)
    
    #cf_matrix = plot_confusion_matrix(estimator = model, X = X_test, y_true = y_test)
    cm = confusion_matrix(y_test, y_pred_test, labels=model.classes_)
    disp = metrics.ConfusionMatrixDisplay(confusion_matrix=cm,
                                  display_labels=model.classes_)
    plt.title(pipe_name)
    disp.plot()

    print(f'Train: {trainscore}, Test: {testscore}, CV: {crossval}')
    print (f'f1 - Train = {f1_train}')
    print (f'f1 - Test = {f1_test}')
    print (' ')
    print (' ')

## Stacking
---

In [None]:
level1_models = [
    ('GRAD', GradientBoostingClassifier()),
    ('ADA', AdaBoostClassifier()),
    ('LR', LinearRegression())
]


stack = StackingClassifier(estimators = level1_models, final_estimator = LinearRegression())


stack.fit(X_train, y_train)
trainscore = stack.score(X_train, y_train)
testscore = stack.score(X_test, y_test)
crossval = cross_val_score(stack, X_train, y_train).mean()
y_pred_train = stack.predict(X_train)
y_pred_test = stack.predict(X_test)
mse_train = metrics.mean_squared_error(y_train, y_pred_train)
mse_test = metrics.mean_absolute_error(y_test, y_pred_test)


print(f'Train: {trainscore}, Test: {testscore}, CV: {crossval}')
print (f'Mean Squared Error - Train = {mse_train}')
print (f'Mean Squared Error - Test = {mse_test}')