### Imports

In [None]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier

from xgboost import XGBClassifier
from imblearn.over_sampling import SMOTE

from MachineLearning import models

### Read Databases

In [None]:
local_view = pd.read_csv(
    "Preprocessed\preprocessed_local_view.csv", sep=",")
global_view = pd.read_csv(
    "Preprocessed\preprocessed_global_view.csv", sep=",")

local_view.drop(["Unnamed: 0"], axis=1, inplace=True)
global_view.drop(["Unnamed: 0"], axis=1, inplace=True)

dropna_list = [local_view, global_view]

for var in dropna_list:
    var.dropna(inplace=True)
    
print("\n============================================================================================================")
print("Checking base balance: ")

targets = pd.concat([local_view[['label']].rename(columns={'label': 'target_local'}), global_view[[
                    'label']].rename(columns={'label': 'target_global'})], axis=0, ignore_index=True)
counts = targets.apply(pd.Series.value_counts).fillna(0).astype(int)

print(counts)

### Transform target column values ​​into 0 and 1

In [None]:
target_map = {'CONFIRMED': 1, 'FALSE POSITIVE': 0}
local_view['label'] = local_view['label'].map(target_map)
global_view['label'] = global_view['label'].map(target_map)

### Separating into training and testing

In [None]:
# ============= Separating into X and y =============

X_local = local_view.iloc[:, :-1]
X_global = global_view.iloc[:, :-1]

y_local = local_view['label']
y_global = global_view['label']

# ============= Separating into training and testing =============

X_train_local, X_test_local, y_train_local, y_test_local = train_test_split(
    X_local, y_local, test_size= 0.3, random_state=42, stratify=y_local)

X_train_global, X_test_global, y_train_global, y_test_global = train_test_split(
    X_global, y_global, test_size= 0.3, random_state=42, stratify=y_global)

### Smote balancing

In [None]:
# Smote balancing
smote = SMOTE()  # Create a SMOTE instance
X_train_local, y_train_local = smote.fit_resample(X_train_local, y_train_local)  # Apply SMOTE to data local
X_train_global, y_train_global = smote.fit_resample(X_train_global, y_train_global)  # Apply SMOTE to data global

### All models and parameters of classification models

In [None]:
models_and_parameters_C = {
    'AdaBoostClassifier': {
        'clf': AdaBoostClassifier(random_state=42),
        'parameters': {
            'n_estimators': range(50, 450, 100)
        },
    },
    'XGBClassifier': {
        'clf': XGBClassifier(n_estimators=50, random_state=42),
        'parameters': {
            'n_estimators': range(50, 450, 100),
            'gamma': [0.5, 1, 1.5, 2, 5],
            'max_depth': [3, 4, 5]
        }
    },
    # 'SVM': {
    #     'clf': SVC(probability=True, random_state=42),
    #     'parameters': {
    #         'C': [1, 3, 5, 10, 15],
    #         'kernel': ['linear', 'rbf'],
    #         'tol': [1e-3, 1e-4]
    #     },
    # },
    'MLPClassifier': {
        'clf': MLPClassifier(random_state=42),
        'parameters': {
            'solver': ['sgd', 'adam'], 
            'max_iter': [500, 1000, 1500, 2000], 
            'hidden_layer_sizes': (100, 50),
            'tol': [1e-3, 1e-4]
        },
    },
    'DecisionTreeClassifier': {
        'clf': DecisionTreeClassifier(random_state=42),
        'parameters': {
            'criterion': ['gini', 'entropy'],  
            'splitter': ['best', 'random'],   
            'max_depth': [None, 10, 20, 30], 
            'min_samples_split': [2, 5, 10],  
            'min_samples_leaf': [1, 2, 4]     
        },
    }
    
}

### Classifier models

In [None]:
models.defining_classifiers(models_and_parameters_C, X_train_local, y_train_local, X_test_local, y_test_local, "local")
models.defining_classifiers(models_and_parameters_C, X_train_global, y_train_global, X_test_global, y_test_global, "global")

### Dynamic selection of classifiers

In [None]:
models.dynamic_selection_of_classifiers(models_and_parameters_C, X_train_local, y_train_local, X_test_local, y_test_local, "local")

In [None]:
models.dynamic_selection_of_classifiers(models_and_parameters_C, X_train_global, y_train_global, X_test_global, y_test_global, "global")

### LSTM

In [None]:
# Local
univariate_past_history = 201
future = univariate_future_target = 1

# Fitting the training and testing data with the new dimensions
x_train_uni = X_train_local.values.reshape(-1, univariate_past_history, future)
x_val_uni = X_test_local.values.reshape(-1, univariate_past_history, future)
y_train_uni = y_train_local.values
y_val_uni = y_test_local.values

print("Training and test data dimensions:")
print("x_train_uni:", x_train_uni.shape)
print("y_train_uni:", y_train_uni.shape)
print("x_val_uni:", x_val_uni.shape)
print("y_val_uni:", y_val_uni.shape)

In [None]:
# Test LSTM data local
models.method_LSTM(x_train_uni, y_train_uni, x_val_uni, y_val_uni, univariate_past_history, future)

In [None]:
# GlobaL
univariate_past_history = 2001
future = univariate_future_target = 1

# Fitting the training and testing data with the new dimensions
x_train_uni = X_train_global.values.reshape(-1, univariate_past_history, future)
x_val_uni = X_test_global.values.reshape(-1, univariate_past_history, future)
y_train_uni = y_train_global.values
y_val_uni = y_test_global.values

print("Training and test data dimensions:")
print("x_train_uni:", x_train_uni.shape)
print("y_train_uni:", y_train_uni.shape)
print("x_val_uni:", x_val_uni.shape)
print("y_val_uni:", y_val_uni.shape)

In [None]:
# Test LSTM data global
models.method_LSTM(x_train_uni, y_train_uni, x_val_uni, y_val_uni, univariate_past_history, future)