from numpy.random import seed
seed(1)
from tensorflow import set_random_seed
set_random_seed(2)

In [15]:
#Import all the files to be shared among all notebooks 
import utils
import preprocessing
import data_visualization
import feature_engineering
from ML_algorithms import *
import pandas as pd
from seaborn import countplot
import numpy as np

In [39]:
df = utils.get_dataset()

In [40]:
df = preprocessing.encode_education(df)
df = preprocessing.one_hot_encoding(df,columns = ["Marital_Status"])
df = preprocessing.encode_days_as_costumer(df)
df = feature_engineering.drop_useless_columns(df)

In [41]:
df = preprocessing.impute_income_KNN(df)

In [31]:
df = df.drop(["Kidhome","Teenhome", "Education","Year_Birth","Dt_Customer"], axis=1)

In [33]:
seed = 0

In [None]:
# perform the train, test split
X_train, X_test, y_train, y_test = utils.data_split(df, test_size=0.2, random_state=seed)

In [None]:
X_train, X_test = preprocessing.Min_Max_Train(X_train, X_test)

#### a list of available initializations:
    
    * uniform
    * lecun_uniform: Uniform initialization scaled by the square root of the number of inputs (LeCun 98).
    normal
    * identity: Use with square 2D layers (shape[0] == shape[1]).
    * orthogonal: Use with square 2D layers (shape[0] == shape[1]).
    * zero
    * one
    * glorot_normal: Gaussian initialization scaled by fan_in + fan_out (Glorot 2010)
    * glorot_uniform
    * he_normal: Gaussian initialization scaled by fan_in (He et al., 2014)
    * he_uniform
    
####  a list of available optimizers:

    * adam
    * sgd
    * adagrad
    * adadelta
    * nadam
    * rmsprop
    * adamax
    
#### a list of available loss functions:

    * binary_crossentropy
    * mean_squared_error
    * categorical_hinge
    * kld
    
    



In [None]:
model = KerasNN(X_train, X_test, y_train, y_test, input_dim=27,init="uniform", metrics=["accuracy"])

In [None]:
initializations_list = ["uniform","lecun_uniform","zero","one","glorot_normal","glorot_uniform","he_normal","he_uniform"]
optimizers_list = ["adam","rmsprop"]
loss_list = ["binary_crossentropy","mean_squared_error"]
seeds = [0,1,2,3,4]

for i in  initializations_list:
    for j in optimizers_list:
        for n in loss_list:
            for z in seeds:
                model = KerasNN(X_train, X_test, y_train, y_test, input_dim=27,optimizer=j,loss=n,init=i,random_state=z)
                print("The initialization used is:", i)
                print("The optimizer used is:", j)
                print("The loss used is:", n)
                print("Seed:", z)
                print("-----------------------------------------------------")
                #utils.NN_evaluation(model, X_test, y_test)

                output_list = [i,j,n,utils.NN_evaluation(model, X_test, y_test)]
                output = open("Keras_Neural_Net_Results.txt", "a")
                output.write(str(output_list))
                output.close()

## Oversampling on NN

In [None]:
from imblearn import over_sampling

In [None]:
ros = over_sampling.RandomOverSampler(random_state=seed, ratio=0.5)
resamp_x, resamp_y= ros.fit_resample(X_train, y_train)

In [None]:
oversampling_model = KerasNN(resamp_x, X_test, resamp_y, y_test, input_dim=30,init="he_normal")

utils.NN_evaluation(oversampling_model, X_test, y_test)

In [None]:
ada = over_sampling.ADASYN(random_state=seed, ratio=0.7)
resamp_x, resamp_y= ada.fit_resample(X_train, y_train)

In [None]:
oversampling_model = KerasNN(resamp_x, X_test, resamp_y, y_test, input_dim=30,init="he_normal")

utils.NN_evaluation(oversampling_model, X_test, y_test)

In [None]:
smo = over_sampling.SMOTE(random_state=seed, ratio=0.5)
resamp_x, resamp_y= smo.fit_resample(X_train, y_train)

In [None]:
oversampling_model = KerasNN(resamp_x, X_test, resamp_y, y_test, input_dim=30,init="he_normal")

utils.NN_evaluation(oversampling_model, X_test, y_test)

## Undersampling on NN

In [None]:
from imblearn import under_sampling

In [None]:
rus = under_sampling.RandomUnderSampler(random_state=seed)
resamp_x, resamp_y= rus.fit_resample(X_train, y_train)

In [None]:
oversampling_model = KerasNN(resamp_x, X_test, resamp_y, y_test, input_dim=30,init="he_normal")

utils.NN_evaluation(oversampling_model, X_test, y_test)

In [None]:
tom = under_sampling.TomekLinks(random_state=seed)
resamp_x, resamp_y= tom.fit_resample(X_train, y_train)

In [None]:
oversampling_model = KerasNN(resamp_x, X_test, resamp_y, y_test, input_dim=30,init="he_normal")

utils.NN_evaluation(oversampling_model, X_test, y_test)

In [None]:
aknn = under_sampling.AllKNN(random_state=seed, n_neighbors=5)
resamp_x, resamp_y= aknn.fit_resample(X_train, y_train)

In [None]:
oversampling_model = KerasNN(resamp_x, X_test, resamp_y, y_test, input_dim=30,init="he_normal")

utils.NN_evaluation(oversampling_model, X_test, y_test)

In [None]:
enn = under_sampling.EditedNearestNeighbours(random_state=seed, n_neighbors=3)
resamp_x, resamp_y= enn.fit_resample(X_train, y_train)

In [None]:
oversampling_model = KerasNN(resamp_x, X_test, resamp_y, y_test, input_dim=30,init="he_normal")

utils.NN_evaluation(oversampling_model, X_test, y_test)

## Combined Methods

In [None]:
from imblearn import combine

In [None]:
smotom = combine.SMOTETomek(random_state=seed, ratio=0.8)
resamp_x, resamp_y= smotom.fit_resample(X_train, y_train)

In [None]:
oversampling_model = KerasNN(resamp_x, X_test, resamp_y, y_test, input_dim=30,init="he_normal")

utils.NN_evaluation(oversampling_model, X_test, y_test)

In [None]:
smotenn = combine.SMOTEENN(random_state=seed, ratio=0.8)
resamp_x, resamp_y= smotenn.fit_resample(X_train, y_train)

In [None]:
oversampling_model = KerasNN(resamp_x, X_test, resamp_y, y_test, input_dim=30,init="he_normal")

utils.NN_evaluation(oversampling_model, X_test, y_test)

In [None]:
from nltk.classify.scikitlearn import SklearnClassifier
from sklearn.naive_bayes import MultinomialNB, GaussianNB, BernoulliNB, ComplementNB
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.svm import SVC, LinearSVC, NuSVC
from sklearn.preprocessing import MinMaxScaler

In [None]:
def Gaussian_NB(X_train, X_test, y_train, y_test):
    scaler = MinMaxScaler()
    # Only fit the training data
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    
    gnb = GaussianNB()
    gnb.fit(X_train, y_train)
    y_pred = gnb.predict(X_test)
    
    print("Accuracy {:1.2f}".format(utils.calculate_accuracy(y_pred, y_test)))
    print("Area under the curve {:1.2f}".format(utils.calculate_auc(y_pred, y_test)))
    print("Precision {:1.2f}".format(utils.calculate_precision_score(y_pred, y_test)))
    print("Recall {:1.2f}".format(utils.calculate_recall_score(y_pred, y_test)))
    print("Profit Share {:1.2f}".format(utils.profit_share(y_pred, y_test)))
    return utils.profit_share(y_pred, y_test)

In [None]:
def Multinomial_NB(X_train, X_test, y_train, y_test):
    scaler = MinMaxScaler()
    # Only fit the training data
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    
    mnb = MultinomialNB()
    mnb.fit(X_train, y_train)
    y_pred = mnb.predict(X_test)
    return utils.profit_share(y_pred, y_test)
    return mnb

In [None]:
def Complement_NB(X_train, X_test, y_train, y_test):
    cnb = ComplementNB()
    cnb.
    cnb.fit(X_train, y_train)
    y_pred = cnb.predict(X_test)
    return utils.profit_share(y_pred, y_test)

In [None]:
def Bernoulli_NB(X_train, X_test, y_train, y_test):
    scaler = MinMaxScaler()
    # Only fit the training data
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    
    bnb = BernoulliNB()
    bnb.fit(X_train, y_train)
    y_pred = bnb.predict(X_test)
    return utils.profit_share(y_pred, y_test)

In [None]:
def KerasNN(X_train, X_test, y_train, y_test, input_dim=32, n_layers=4, optimizer="rmsprop", loss="binary_crossentropy", init="uniform", metrics=["accuracy"], random_state=42):
    """
    Keras Neural Network, define the amount of layers you want, which optimizer you want to use and which loss function you want to apply.
    """ 
    np.random.seed(random_state)

    model = models.Sequential()
    model.add(layers.Dense(6, activation="relu", input_dim=input_dim))
    for num in range(n_layers-2):
        model.add(layers.Dense(6, activation="relu"))
    model.add(layers.Dense(1, activation="sigmoid", init=init))
    model.compile(optimizer, loss, metrics=metrics)
    
    initial_weights = model.get_weights()
    
    utils.shuffle_weights(model, initial_weights)
    
    model.fit(X_train, y_train, epochs=100, verbose=0)
    return model