### Imports

In [3]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import AdaBoostClassifier

import xgboost as xgb
from imblearn.over_sampling import SMOTE

from MachineLearning import models

### Read Databases

In [4]:
local_view = pd.read_csv(
    "Preprocessed\preprocessed_local_view.csv", sep=",")
global_view = pd.read_csv(
    "Preprocessed\preprocessed_global_view.csv", sep=",")

local_view.drop(["Unnamed: 0"], axis=1, inplace=True)
global_view.drop(["Unnamed: 0"], axis=1, inplace=True)

dropna_list = [local_view, global_view]

for var in dropna_list:
    var.dropna(inplace=True)
    
print("\n============================================================================================================")
print("Checking base balance: ")

targets = pd.concat([local_view[['label']].rename(columns={'label': 'target_local'}), global_view[[
                    'label']].rename(columns={'label': 'target_global'})], axis=0, ignore_index=True)
counts = targets.apply(pd.Series.value_counts).fillna(0).astype(int)

print(counts)


Checking base balance: 
                target_local  target_global
FALSE POSITIVE          4744           4215
CONFIRMED               2639           2429


### Transform target column values ​​into 0 and 1

In [5]:
target_map = {'CONFIRMED': 0, 'FALSE POSITIVE': 1}
local_view['label'] = local_view['label'].map(target_map)
global_view['label'] = global_view['label'].map(target_map)

### Separating into training and testing

In [6]:
# ============= Separating into X and y =============

X_local = local_view.iloc[:, :-1]
X_global = global_view.iloc[:, :-1]

y_local = local_view['label']
y_global = global_view['label']

# ============= Separating into training and testing =============

X_train_local, X_test_local, y_train_local, y_test_local = train_test_split(
    X_local, y_local, test_size= 0.3, random_state=42, stratify=y_local)

X_train_global, X_test_global, y_train_global, y_test_global = train_test_split(
    X_global, y_global, test_size= 0.3, random_state=42, stratify=y_global)

### Smote balancing

In [7]:
# Smote balancing
smote = SMOTE()  # Create a SMOTE instance
X_train_local, y_train_local = smote.fit_resample(X_train_local, y_train_local)  # Apply SMOTE to data local
X_train_global, y_train_global = smote.fit_resample(X_train_global, y_train_global)  # Apply SMOTE to data global

### All models and parameters of classification models

In [8]:
models_and_parameters_C = {
    'AdaBoostClassifier': {
        'clf': AdaBoostClassifier(random_state=42),
        'parameters': {
            'n_estimators': range(60, 220, 40)
        },
    },
    'XGBClassifier': {
        'clf': xgb.XGBClassifier(objective = "binary:logistic", random_state=42),
        'parameters': {
            'min_child_weight': [1, 5, 10],
            'gamma': [0.5, 1, 1.5, 2, 5],
            'max_depth': [3, 4, 5]
        }
    },
    'SVM': {
        'clf': SVC(probability=True, random_state=42),
        'parameters': {
            'C': [1, 3, 5, 10, 15],
            'kernel': ['linear', 'rbf'],
            'tol': [1e-3, 1e-4]
        },
    },
    'MLPClassifier': {
        'clf': MLPClassifier(random_state=42),
        'parameters': {
            'solver': ['sgd', 'adam'], 
            'max_iter': [1000, 1300, 1500, 2000], 
            'alpha': 10.0 ** -np.arange(1, 10), 
            'hidden_layer_sizes':np.arange(10, 15),
            'tol': [1e-3, 1e-4]
        },
        },
    }

### Running classifier models

In [9]:
# models.defining_classifiers(models_and_parameters_C, X_train_local, y_train_local, X_test_local, y_test_local, "local")
# models.defining_classifiers(models_and_parameters_C, X_train_global, y_train_global, X_test_global, y_test_global, "global")

### Running LSTM

In [10]:
data = X_local
data.index =  y_local
uni_data = data.values

In [12]:
# As variáveis abaixo garantem padronização e reprodutibilidade
TRAIN_SPLIT = 300000

In [13]:
'''Tamanho da Janela do Historico'''
univariate_past_history = 30  #30 observacoes anteriores
future = univariate_future_target = 5  #a proxima observação

x_train_uni, y_train_uni = models.univariate_data(uni_data, 0, TRAIN_SPLIT,
                                        univariate_past_history,
                                        univariate_future_target)
x_val_uni, y_val_uni = models.univariate_data(uni_data, TRAIN_SPLIT, None,
                                    univariate_past_history,
                                    univariate_future_target)


# Test LSTM data local
models.method_LSTM(x_train_uni, y_train_uni, x_val_uni, y_val_uni)

# Test LSTM data global
# models.method_LSTM(x_train_uni, y_train_uni, x_val_uni, y_val_uni)

ValueError: cannot reshape array of size 6030 into shape (30,1)