<a href="https://colab.research.google.com/github/PaulaBergamasco/BCI/blob/main/tp4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##CARGA DE DATOS

In [None]:
pip install pyforest



In [None]:
from pathlib import Path
import numpy as np
import pyforest
from typing import Callable
import scipy
import scipy.signal as sgn
import matplotlib.pyplot as plt

In [None]:
#utils
import itertools
import numpy as np
import pandas as pd
from csv import reader
from pathlib import Path
from scipy import signal


def has_metadata(csv_path):
    f = open(csv_path, newline='')
    csv_reader = reader(f)
    return True if next(csv_reader)[0] == '%OpenBCI Raw EEG Data' else False


def raw_read_bci_csv(csv_path):
    csv_reader = reader(open(csv_path, newline=''))
    n_lines = 10 if next(csv_reader)[0] == '%OpenBCI Raw EEG Data' else 0
    column_names = [
        'sample_index', 'ch0', 'ch1', 'ch2', 'ch3', 'acc0',
        'acc1', 'acc2', 'label', 'provided_time', 'timestamp'
    ]
    dataset = pd.read_csv(csv_path, skiprows=n_lines, index_col=False, names=column_names)
    sample_rate = 200.
    dataset['time'] = dataset.index.values/sample_rate
    return dataset


def clean_read_bci_csv(csv_path):
    dataset = raw_read_bci_csv(csv_path)
    dataset.drop(columns=['sample_index', 'acc0', 'acc1', 'acc2', 'provided_time', 'timestamp'], inplace=True)
    return dataset


def add_sub_label(dataset):
    labels = dataset.label.unique().tolist()
    for label in labels:
        sample_idx = dataset.loc[dataset.label == label].index.values
        gap_btn_idx = sample_idx[1:] - sample_idx[:-1]
        selected_idx = np.insert((gap_btn_idx != 1), 0, True)
        desired_idx_bottom = sample_idx[selected_idx]
        selected_idx = np.append(selected_idx[1:], True)
        desired_idx_top = sample_idx[selected_idx]
        for k, (start, stop) in enumerate(zip(desired_idx_bottom, desired_idx_top)):
            selection = (dataset.index.values >= start) & (dataset.index.values <= stop)
            dataset.loc[selection, 'sub_label'] = f'{label}-{k}'
    return dataset


def detrend_channels(dataset, mode='both', window=200):
    if mode in ['global', 'both']:
        detrend_global = dataset.copy()
    if mode in ['both', 'local']:
        detrend_local = dataset.copy()
    for subject in dataset.subject.unique():
        selection = (dataset.subject == subject)
        for session in dataset.loc[selection, 'session'].unique():
            selection2 = selection & (dataset.session == session)
            if mode in ['global', 'both']:
                for ch in ['ch0', 'ch1', 'ch2', 'ch3']:
                    y = dataset.loc[selection2, ch].values
                    detrend_global.loc[selection2, ch] = signal.detrend(y)
            if mode in ['both', 'local']:
                for ch in ['ch0', 'ch1', 'ch2', 'ch3']:
                    index = dataset.loc[selection2].index.tolist()
                    iters = [iter(index)] * window
                    for k, index_set in enumerate(itertools.zip_longest(*iters)):
                        if None in index_set:
                            index_set = list(filter(None, index_set))
                        selection3 = selection2 & dataset.index.isin(index_set)
                        y = dataset.loc[selection3, ch].values
                        detrend_local.loc[selection3, ch] = signal.detrend(y)
    if mode == 'global':
        return detrend_global
    if mode == 'local':
        return detrend_local
    else:
        return detrend_global, detrend_local


def read_all_datasets(csvs_path: Path):
    complete_dataset = pd.DataFrame()
    for csv_path in csvs_path.iterdir():
        dataset = clean_read_bci_csv(csv_path)
        dataset = add_sub_label(dataset)
        dataset['subject'] = csv_path.name[:2]
        dataset['session'] = csv_path.name[3]
        dataset['sub_sess'] = dataset.subject + '-' + dataset.session
        complete_dataset = pd.concat([complete_dataset, dataset])
    return complete_dataset


def naif_fft_features(signal_matrix):
    N = signal_matrix.shape[1]
    dt = 1/200
    T = N*dt
    fft = np.fft.rfft(signal_matrix)
    Sxx = np.real(((2*dt**2)/T)*fft*fft.conj())
    return Sxx


In [None]:
def filtered_fft_features(signal_matrix):
    N = signal_matrix.shape[1]
    dt = 1/200
    T = N*dt
    sf = 200
    Q = 30
    f_notch = 50
    b_notch, a_notch = sgn.iirnotch(w0=f_notch, Q=Q, fs=sf)
    sig_notch = sgn.filtfilt(b_notch, a_notch, signal_matrix, axis=1)

    #Ahora creamos el filtro pasabanda Butterworth
    f_nq = sf/2
    f_low = 11.5
    f_high = 35
    order = 4
    b_band, a_band = sgn.iirfilter(
        N=order, Wn=[f_low/f_nq, f_high/f_nq], btype="bandpass", ftype="butter"
    )
    sig_filt = sgn.filtfilt(b_band, a_band, sig_notch, axis=1)

    fft = np.fft.rfft(sig_filt)
    Sxx = np.real(((2*dt**2)/T)*fft*fft.conj())
   
    return Sxx

In [None]:
class BCIDataset():
    def __init__(
        self, 
        csvs_path, 
        subject: str = 'all', 
        session: str = 'all',
        channel: str = 'all', 
        overlapping_fraction: float = 1/3, 
        window_size: int = 900,
        feature_extractor: Callable = naif_fft_features
    ):
        '''
        Object containing all examples from a time series from the dataset.
        Args:
            csvs_path (str): path al directorio donde estan los csv de los datos
            subject (str): sujeto a estudiar.
                Si se indica 'all' el dataset final generado tendrá ejemplos de todos los sujetos
            session (str): sesión a estudiar del sujeto seleccionado.
                si se indica 'all'  el dataset final generado tendrá ejemplos de todas las sesiones
            channel (str): 'ch0', 'ch1', 'ch2', 'ch3'. Si se indica 'all', los ejemplos serán la
                concatenación de los 4 canales.
            overlapping_fraction (float): porcentaje de desplazamiento de la "ventana" que hace el ejemplo
            window_size (int): tamaño de la ventana de tiempo que hace a un ejemplo (en muestras).
            feature_extractor (func): Función de extracción de features, le ingresa un arreglo
                (ejemplos en las filas, muestras en las columnas) y devuelve un arreglo (ejemplos en
                las filas y features en las columnas)
        '''
        self.csvs_path = Path(csvs_path)
        self.channel = channel
        self.parts = int(1 / overlapping_fraction)
        self.fraction = 1 / self.parts
        self.ws = window_size
        self.subject = subject
        self.session = session
        self.channels = ['ch0','ch1','ch2','ch3']
        self.feature_extractor = feature_extractor
        self.complete_dataset = read_all_datasets(self.csvs_path)
        
        self.complete_examples_signal, self.complete_examples_features, \
        self.complete_labels, self.complete_metadata = \
            self.generate_examples()
        
    def generate_examples(self):
        # For each session generate the signal examples,
        # the feature extraction examples and labels arrays
        complete_examples_signal, complete_examples_features = [], []
        complete_labels, complete_metadata = [], []
        
        for subject in self.complete_dataset.subject.unique():
            # Just load the selected subject
            if self.subject != 'all' and subject != self.subject:
                continue
            sessions = \
                self.complete_dataset.loc[self.complete_dataset.subject==subject].session.unique()
            for session in sessions:
                # Just load the selected session
                if self.session != 'all' and session != self.session:
                    continue
                
                print(f'Processing subject: {subject} - session: {session}...')
                
                # Generate a subset of the dataset only with the desired rows
                selection = self.complete_dataset.loc[
                    (self.complete_dataset.subject==subject) &
                    (self.complete_dataset.session==session)
                ]
                
                # Standarize length of the array to a multiple to window size
                labels = selection.label.values
                n_rows = labels.shape[0] // self.ws
                labels = labels[: n_rows * self.ws]

                # Generate examples from the signal
                n_examples = n_rows * self.parts - (self.parts - 1)
                examples = np.empty((n_examples, self.ws))
                most_frec_labels = np.empty((n_examples, self.ws))
                times = np.empty((n_examples, self.ws))
                
                concat_ch_examples_signal, concat_ch_examples_features = [], []

                for k, ch in enumerate(self.channels):
                    # Use the four channels or just one
                    if self.channel != 'all' and self.channel != ch:
                        continue
                    
                    # Standarize length of the signal to a multiple to window size
                    signal = selection[ch].values
                    signal = signal[: n_rows * self.ws]
                    time = selection.time.values
                    time = time[: n_rows * self.ws]

                    # Increase the number of examples by overlapping the windows
                    for part in range(self.parts):
                        
                        # Find the place in the output array for each example
                        position = np.arange(part, n_examples, self.parts)
                        #position = position if part == 0 else position[:-part]

                        # Crop the signal according to the window size and overlap
                        start = int(self.ws / self.parts) * part
                        end = -int(self.ws - (self.ws / self.parts) * part)
                        end = end if part!=0 else signal.shape[0]
                        subset_signal = signal[start:end]
                        subset_labels = labels[start:end]
                        subset_times = time[start:end]

                        # Generate the examples
                        n_rows_ = int(subset_signal.shape[0]/self.ws)
                        examples[position, :] = subset_signal.reshape((n_rows_, self.ws))
                        most_frec_labels[position, :] = subset_labels.reshape((n_rows_, self.ws))
                        times[position, :] = subset_times.reshape((n_rows_, self.ws))
                    
                    # Obtain most frequent label
                    labels_ = scipy.stats.mode(most_frec_labels, axis=1).mode
                    labels_temp = scipy.stats.mode(most_frec_labels, axis=1).count
                    pureness = labels_temp == self.ws
                    # Get first and last time of the window
                    times_ = np.asarray([np.min(times, axis=1), np.max(times, axis=1)]).T
                    # Extract features
                    features = self.feature_extractor(examples)

                    concat_ch_examples_signal.append(examples.copy())
                    concat_ch_examples_features.append(features)

                concat_ch_examples_signal = np.concatenate(concat_ch_examples_signal, axis=1)
                concat_ch_examples_features = np.concatenate(concat_ch_examples_features, axis=1)

                complete_examples_signal.append(concat_ch_examples_signal)
                complete_examples_features.append(concat_ch_examples_features)
                complete_labels.append(labels_)
                lt = len(times_)
                metadata_ = np.concatenate(
                    [pureness, times_, np.repeat(subject, lt)[:,None], np.repeat(session, lt)[:,None]],
                    axis=1
                )
                complete_metadata.append(metadata_)

        complete_examples_signal = np.concatenate(complete_examples_signal)
        complete_examples_features = np.concatenate(complete_examples_features)
        complete_labels = np.concatenate(complete_labels)
        complete_metadata = np.concatenate(complete_metadata)
        
        return complete_examples_signal, complete_examples_features, complete_labels, complete_metadata
    
    def __len__(self):
        return self.complete_examples_signal.shape[0]
    
    def __getitem__(self, idx):
        return {
            'signal': self.complete_examples_signal[idx,:],
            'features': self.complete_examples_features[idx,:],
            'label': self.complete_labels[idx,:],
            'metadata': self.complete_metadata[idx,:]
        }

    def get_X_signal(self):
        return self.complete_examples_signal

    def get_X_features(self):
        return self.complete_examples_features

    def get_Y(self):
        return self.complete_labels

    def get_metadata(self):
        return self.complete_metadata

In [None]:
! git clone https://github.com/joaco18/MetoriaBCI

fatal: destination path 'MetoriaBCI' already exists and is not an empty directory.


In [None]:
import zipfile

with zipfile.ZipFile('/content/MetoriaBCI/Database.zip', 'r') as zip_ref:
    zip_ref.extractall('/content/tmp')

In [None]:
csvs_path = '/content/tmp'
dataset = BCIDataset(csvs_path, overlapping_fraction=1/6, feature_extractor= filtered_fft_features)

Processing subject: AA - session: 1...
Processing subject: AA - session: 2...
Processing subject: AA - session: 0...
Processing subject: MA - session: 1...
Processing subject: HA - session: 1...
Processing subject: JA - session: 1...
Processing subject: JA - session: 2...


#BENCHMARKING

In [None]:
X= dataset.get_X_features()
y= dataset.get_Y().astype(int)
X.shape, y.shape

((4339, 1804), (4339, 1))

In [None]:
random_target1 = np.random.choice(pd.Series(y.flatten()).unique(),y.shape[0])
random_target2 = np.random.choice(pd.Series(y.flatten()).unique(),y.shape[0])
random_target3 = np.random.choice(pd.Series(y.flatten()).unique(),y.shape[0])
random_target4 = np.random.choice(pd.Series(y.flatten()).unique(),y.shape[0])
random_target5 = np.random.choice(pd.Series(y.flatten()).unique(),y.shape[0])

metrics= [random_target1, random_target2, random_target3, random_target4, random_target5]

In [None]:
accuracy= list()
f1score= list()

from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score

In [None]:
for metric in metrics:
  accuracy.append(accuracy_score(y, metric))

for metric in metrics:
  f1score.append(f1_score(y, metric, average='weighted'))

print('Accuracy=', accuracy)
print('F1Score=', f1score)

Accuracy= [0.34800645309979256, 0.3309518322194054, 0.3360221249135746, 0.33740493201198435, 0.33625259276330954]
F1Score= [0.37496086024282443, 0.35598259686200434, 0.3614530402987148, 0.36352356981061346, 0.36372684083293666]


In [None]:
print(max(accuracy))

0.34800645309979256


In [None]:
print(max(f1score))

0.37496086024282443


#MODELOS

In [None]:
! git clone https://github.com/PaulaBergamasco/BCI

fatal: destination path 'BCI' already exists and is not an empty directory.


##PARTICION DE DATOS

###DATASET FEATURES TIME

In [None]:
dataset_time=pd.read_csv('/content/BCI/df_time.csv')

In [None]:
X_time= dataset_time.iloc[:,:-1]
y_time= dataset_time.iloc[:,-1].astype(int)
X_time.shape, y_time.shape

((4339, 16), (4339,))

In [None]:
from sklearn.model_selection import train_test_split

X_train_time, X_val_time, y_train_time, y_val_time = train_test_split(X_time, y_time, test_size=0.1, random_state=0)
X_train_time, X_test_time, y_train_time, y_test_time = train_test_split(X_train_time, y_train_time, test_size=0.2, random_state=0)

###DATASET FEATURES FRECUENCIA

In [None]:
dataset_frec_max=pd.read_csv('/content/BCI/df_frec_max.csv')

In [None]:
X_freq= dataset_frec_max.iloc[:,:-1]
y_freq= dataset_frec_max.iloc[:,-1].astype(int)
X_freq.shape, y_freq.shape

((4339, 4), (4339,))

In [None]:
X_train_freq, X_val_freq, y_train_freq, y_val_freq = train_test_split(X_freq, y_freq, test_size=0.10, random_state=0)
X_train_freq, X_test_freq, y_train_freq, y_test_freq = train_test_split(X_train_freq, y_train_freq, test_size=0.20, random_state=0)

###DATASET FEATURES AMPLITUD

In [None]:
def filtered_fft_features_varias(signal_matrix):
    N = signal_matrix.shape[1]
    dt = 1/200
    T = N*dt
    sf = 200
    Q = 30
    f_notch = 50
    b_notch, a_notch = sgn.iirnotch(w0=f_notch, Q=Q, fs=sf)
    sig_notch = sgn.filtfilt(b_notch, a_notch, signal_matrix, axis=1)

    #Ahora creamos el filtro pasabanda Butterworth
    f_nq = sf/2
    f_low = 11.5
    f_high = 35
    order = 4
    b_band, a_band = sgn.iirfilter(
        N=order, Wn=[f_low/f_nq, f_high/f_nq], btype="bandpass", ftype="butter"
    )
    sig_filt = sgn.filtfilt(b_band, a_band, sig_notch, axis=1)

    fft = np.fft.rfft(sig_filt)
    Sxx = np.real(((2*dt**2)/T)*fft*fft.conj())
    mean= np.mean(Sxx, axis= 1)
    ln= np.log (mean)
    sxx_mean_value = ln.reshape((signal_matrix.shape[0], 1))
    
    max_sxx= np.argmax(Sxx, axis= 1)
    lnm= np.log (max_sxx)
    sxx_max_value = lnm.reshape((signal_matrix.shape[0], 1))

    q95_ln= np.quantile (fft, 0.90, axis= 1)
    q95_ln= np.log (q95_ln)
    freq_q95_ln_value = q95_ln.reshape((signal_matrix.shape[0], 1))


    return np.concatenate([sxx_mean_value, sxx_max_value, freq_q95_ln_value], axis = 1)


In [None]:
dataset_sxx= BCIDataset(csvs_path, subject='AA', overlapping_fraction= 1/6, feature_extractor= filtered_fft_features_varias) 

Processing subject: AA - session: 1...
Processing subject: AA - session: 2...
Processing subject: AA - session: 0...


In [None]:
X_sxx= dataset_sxx.get_X_features().astype(float)
y_sxx= dataset_sxx.get_Y().astype(int)
X_sxx.shape, y_sxx.shape

  """Entry point for launching an IPython kernel.


((1941, 12), (1941, 1))

In [None]:
X_train_sxx, X_val_sxx, y_train_sxx, y_val_sxx = train_test_split(X_sxx, y_sxx, test_size=0.10, random_state=0)
X_train_sxx, X_test_sxx, y_train_sxx, y_test_sxx = train_test_split(X_train_sxx, y_train_sxx, test_size=0.20, random_state=0)

###DATASET FEATURES FRECUENCIAS SELECTIVAS

In [None]:
dataset_freq_select=pd.read_csv('/content/BCI/df_n2_mean.csv')

In [None]:
X_freq_s= dataset_freq_select.iloc[:,:-1]
y_freq_s= dataset_freq_select.iloc[:,-1].astype(int)
X_freq_s.shape, y_freq_s.shape

((4339, 16), (4339,))

In [None]:
X_train_freq_s, X_val_freq_s, y_train_freq_s, y_val_freq_s = train_test_split(X_freq_s, y_freq_s, test_size=0.10, random_state=0)
X_train_freq_s, X_test_freq_s, y_train_freq_s, y_test_freq_s = train_test_split(X_train_freq_s, y_train_freq_s, test_size=0.20, random_state=0)

###DATASET FEATURES FRECUENCIAS SELECTIVAS PROMEDIOS

In [None]:
dataset_freq_unif=pd.read_csv('/content/BCI/df_n2_mean_unificado.csv')

In [None]:
dataset_freq_unif

Unnamed: 0,12.5,16.5,25.0,33.0,label
0,337.4000,287.5875,301.7750,183.2750,99.0
1,380.1000,238.1250,329.2625,263.5375,99.0
2,457.2000,181.5750,399.4125,252.4625,99.0
3,234.5250,189.5750,350.8125,245.7625,99.0
4,205.3125,229.6875,286.2000,393.8375,99.0
...,...,...,...,...,...
4334,131.1625,100.4500,73.0000,117.7500,99.0
4335,152.4375,161.5375,84.8625,84.0375,99.0
4336,132.4000,220.1750,126.2625,49.0875,99.0
4337,137.7375,159.1875,93.6250,59.2250,99.0


In [None]:
X_freq_u= dataset_freq_unif.iloc[:,:-1]
y_freq_u= dataset_freq_unif.iloc[:,-1].astype(int)
X_freq_u.shape, y_freq_u.shape

((4339, 4), (4339,))

In [None]:
X_train_freq_u, X_val_freq_u, y_train_freq_u, y_val_freq_u = train_test_split(X_freq_u, y_freq_u, test_size=0.10, random_state=0)
X_train_freq_u, X_test_freq_u, y_train_freq_u, y_test_freq_u = train_test_split(X_train_freq_u, y_train_freq_u,test_size=0.20, random_state=0)

###DATASET MENTORES

In [None]:
dataset_mentores=pd.read_csv('/content/BCI/df_mentores.csv')

In [None]:
dataset_mentores

Unnamed: 0,main1,main2,harm1,harm2,labels
0,326.972899,424.740004,114.590462,201.167890,2.0
1,357.930372,499.655167,168.664040,133.872709,2.0
2,345.581598,618.271127,132.024337,122.745340,2.0
3,430.210302,751.190187,122.416609,170.167454,2.0
4,476.193835,886.782840,179.118974,186.464999,2.0
...,...,...,...,...,...
3932,242.085407,369.680405,461.675725,157.212589,1.0
3933,343.913162,379.645024,429.955552,164.178137,1.0
3934,311.704362,347.813195,517.043082,164.727626,1.0
3935,320.424090,344.494940,465.871418,168.922916,1.0


In [None]:
X_m= dataset_mentores.iloc[:,:-1]
y_m= dataset_mentores.iloc[:,-1].astype(int)
X_m.shape, y_m.shape

((3937, 4), (3937,))

In [None]:
X_train_m, X_val_m, y_train_m, y_val_m = train_test_split(X_m, y_m, test_size=0.10, random_state=0)
X_train_m, X_test_m, y_train_m, y_test_m = train_test_split(X_train_m, y_train_m,test_size=0.20, random_state=0)

##MODELO 1: NAIVE BAYES

In [None]:
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.naive_bayes import  MultinomialNB

In [None]:
#FEATURES FRECUENCIA
nb0 = MultinomialNB()
nb0.fit(X_train_freq, y_train_freq)
print('F1Score=',f1_score(y_val_freq, nb0.predict(X_val_freq), average='weighted'))
print('Accuracy=',accuracy_score(y_val_freq, nb0.predict(X_val_freq)))
print('')
print(classification_report(y_val_freq, nb0.predict(X_val_freq)))

F1Score= 0.46580645161290324
Accuracy= 0.6129032258064516

              precision    recall  f1-score   support

           1       0.00      0.00      0.00        68
           2       0.00      0.00      0.00       100
          99       0.61      1.00      0.76       266

    accuracy                           0.61       434
   macro avg       0.20      0.33      0.25       434
weighted avg       0.38      0.61      0.47       434



  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
#FEATURES FRECUENCIA SELECTIVA
nb1 = MultinomialNB()
nb1.fit(X_train_freq_s, y_train_freq_s)
print('F1Score=', f1_score(y_val_freq_s, nb1.predict(X_val_freq_s), average='weighted'))
print('Accuracy=', accuracy_score(y_val_freq_s, nb1.predict(X_val_freq_s)))
print('')
print(classification_report(y_val_freq_s, nb1.predict(X_val_freq_s)))

F1Score= 0.627869926543784
Accuracy= 0.6221198156682027

              precision    recall  f1-score   support

           1       0.54      0.79      0.64        68
           2       0.46      0.66      0.55       100
          99       0.78      0.56      0.66       266

    accuracy                           0.62       434
   macro avg       0.60      0.67      0.61       434
weighted avg       0.67      0.62      0.63       434



In [None]:
#FEATURES FRECUENCIA SELECTIVA PROMEDIOS
nb2 = MultinomialNB()
nb2.fit(X_train_freq_u, y_train_freq_u)
print('F1Score', f1_score(y_val_freq_u, nb2.predict(X_val_freq_u), average='weighted'))
print('Accuracy', accuracy_score(y_val_freq_u, nb2.predict(X_val_freq_u)))
print('')
print(classification_report(y_val_freq_u, nb2.predict(X_val_freq_u)))

F1Score 0.6362312487935693
Accuracy 0.6290322580645161

              precision    recall  f1-score   support

           1       0.46      0.74      0.57        68
           2       0.53      0.68      0.59       100
          99       0.79      0.58      0.67       266

    accuracy                           0.63       434
   macro avg       0.59      0.67      0.61       434
weighted avg       0.68      0.63      0.64       434



In [None]:
#FEATURES FRECUENCIA SELECTIVA SIN CLASE 99
nb3 = MultinomialNB()
nb3.fit(X_train_m, y_train_m)
print('F1Score', f1_score(y_val_m, nb3.predict(X_val_m), average='weighted'))
print('Accuracy', accuracy_score(y_val_m, nb3.predict(X_val_m)))
print('')
print(classification_report(y_val_m, nb3.predict(X_val_m)))

F1Score 0.7790952586108267
Accuracy 0.7791878172588832

              precision    recall  f1-score   support

           1       0.78      0.76      0.77       193
           2       0.78      0.80      0.79       201

    accuracy                           0.78       394
   macro avg       0.78      0.78      0.78       394
weighted avg       0.78      0.78      0.78       394



##MODELO 2: SVM

In [None]:
from sklearn.svm import SVC

In [None]:
#FEATURES FRECUENCIA
svm0 = SVC(class_weight='balanced')
svm0.fit(X_train_freq, y_train_freq)
print('F1Score=',f1_score(y_val_freq, svm0.predict(X_val_freq), average='weighted'))
print('Accuracy=',accuracy_score(y_val_freq, svm0.predict(X_val_freq)))
print('')
print(classification_report(y_val_freq, svm0.predict(X_val_freq)))

F1Score= 0.5673636555165077
Accuracy= 0.5552995391705069

              precision    recall  f1-score   support

           1       0.41      0.68      0.51        68
           2       0.41      0.66      0.50       100
          99       0.81      0.48      0.61       266

    accuracy                           0.56       434
   macro avg       0.54      0.61      0.54       434
weighted avg       0.65      0.56      0.57       434



In [None]:
#FEATURES FRECUENCIA SELECTIVAS
svm1 = SVC(class_weight='balanced')
svm1.fit(X_train_freq_s, y_train_freq_s)
print('F1Score=', f1_score(y_val_freq_s, svm1.predict(X_val_freq_s), average='weighted'))
print('Accuracy=', accuracy_score(y_val_freq_s, svm1.predict(X_val_freq_s)))
print('')
print(classification_report(y_val_freq_s, svm1.predict(X_val_freq_s)))

F1Score= 0.23507235343261004
Accuracy= 0.35714285714285715

              precision    recall  f1-score   support

           1       0.39      0.75      0.52        68
           2       0.32      0.94      0.48       100
          99       1.00      0.04      0.07       266

    accuracy                           0.36       434
   macro avg       0.57      0.58      0.35       434
weighted avg       0.75      0.36      0.24       434



In [None]:
#FEATURES FRECUENCIA SELECTIVAS UNIFICADAS
svm2 = SVC(class_weight='balanced')
svm2.fit(X_train_freq_u, y_train_freq_u)
print('F1Score=', f1_score(y_val_freq_u, svm2.predict(X_val_freq_u), average='weighted'))
print('Accuracy=', accuracy_score(y_val_freq_u, svm2.predict(X_val_freq_u)))
print('')
print(classification_report(y_val_freq_u, svm2.predict(X_val_freq_u)))

F1Score= 0.2030502909454364
Accuracy= 0.3387096774193548

              precision    recall  f1-score   support

           1       0.31      0.76      0.44        68
           2       0.35      0.91      0.50       100
          99       1.00      0.02      0.03       266

    accuracy                           0.34       434
   macro avg       0.55      0.56      0.32       434
weighted avg       0.74      0.34      0.20       434



In [None]:
#FEATURES TIME
svm3 = SVC(class_weight='balanced')
svm3.fit(X_train_time, y_train_time)
print('F1Score=',f1_score(y_val_time, svm3.predict(X_val_time), average='weighted'))
print('Accuracy=', accuracy_score(y_val_time, svm3.predict(X_val_time)))
print('')
print(classification_report(y_val_time, svm3.predict(X_val_time)))

F1Score= 0.5359246256900981
Accuracy= 0.5875576036866359

              precision    recall  f1-score   support

           1       0.25      0.26      0.26        68
           2       0.53      0.10      0.17       100
          99       0.66      0.85      0.75       266

    accuracy                           0.59       434
   macro avg       0.48      0.41      0.39       434
weighted avg       0.57      0.59      0.54       434



In [None]:
#FEATURES AMPLITUD
svm4 = SVC(class_weight='balanced')
svm4.fit(X_train_sxx, y_train_sxx.ravel())
print('F1Score=', f1_score(y_val_sxx, svm4.predict(X_val_sxx), average='weighted'))
print('Accuracy=', accuracy_score(y_val_sxx, svm4.predict(X_val_sxx)))
print('')
print(classification_report(y_val_sxx, svm4.predict(X_val_sxx)))

F1Score= 0.48172415491220155
Accuracy= 0.4564102564102564

              precision    recall  f1-score   support

           1       0.31      0.41      0.36        41
           2       0.28      0.71      0.41        35
          99       0.89      0.39      0.55       119

    accuracy                           0.46       195
   macro avg       0.50      0.51      0.44       195
weighted avg       0.66      0.46      0.48       195



In [None]:
#FEATURES FRECUENCIA SELECTIVA SIN CLASE 99
svm5 = SVC(class_weight='balanced')
svm5.fit(X_train_m, y_train_m.ravel())
print('F1Score=', f1_score(y_val_m, svm5.predict(X_val_m), average='weighted'))
print('Accuracy=', accuracy_score(y_val_m, svm5.predict(X_val_m)))
print('')
print(classification_report(y_val_m, svm5.predict(X_val_m)))

F1Score= 0.7994497287177305
Accuracy= 0.799492385786802

              precision    recall  f1-score   support

           1       0.80      0.79      0.79       193
           2       0.80      0.81      0.80       201

    accuracy                           0.80       394
   macro avg       0.80      0.80      0.80       394
weighted avg       0.80      0.80      0.80       394



##MODELO 3: KERNELS

In [None]:
#FEATURES FRECUENCIA
svmk0 = SVC(kernel="rbf", class_weight='balanced')
svmk0.fit(X_train_freq, y_train_freq)
print('F1Score=', f1_score(y_val_freq, svmk0.predict(X_val_freq), average='weighted'))
print('Accuracy=', accuracy_score(y_val_freq, svmk0.predict(X_val_freq)))
print('')
print(classification_report(y_val_freq, svmk0.predict(X_val_freq)))

F1Score= 0.5673636555165077
Accuracy= 0.5552995391705069

              precision    recall  f1-score   support

           1       0.41      0.68      0.51        68
           2       0.41      0.66      0.50       100
          99       0.81      0.48      0.61       266

    accuracy                           0.56       434
   macro avg       0.54      0.61      0.54       434
weighted avg       0.65      0.56      0.57       434



In [None]:
#FEATURES FRECUENCIA SELECTIVAS
svmk1 = SVC(kernel="sigmoid", class_weight='balanced')
svmk1.fit(X_train_freq_s, y_train_freq_s)
print('F1Score=', f1_score(y_val_freq_s, svmk1.predict(X_val_freq_s), average='weighted'))
print('Accuracy=', accuracy_score(y_val_freq_s, svmk1.predict(X_val_freq_s)))
print('')
print(classification_report(y_val_freq_s, svmk1.predict(X_val_freq_s)))

F1Score= 0.4843528059209243
Accuracy= 0.4769585253456221

              precision    recall  f1-score   support

           1       0.32      0.34      0.33        68
           2       0.36      0.66      0.46       100
          99       0.66      0.44      0.53       266

    accuracy                           0.48       434
   macro avg       0.45      0.48      0.44       434
weighted avg       0.54      0.48      0.48       434



In [None]:
#FEATURES FRECUENCIA SELECTIVAS UNIFICADAS
svmk2 = SVC(kernel="sigmoid", class_weight='balanced')
svmk2.fit(X_train_freq_u, y_train_freq_u)
print('F1Score=', f1_score(y_val_freq_u, svmk2.predict(X_val_freq_u), average='weighted'))
print('Accuracy=', accuracy_score(y_val_freq_u, svmk2.predict(X_val_freq_u)))
print('')
print(classification_report(y_val_freq_u, svmk2.predict(X_val_freq_u)))

F1Score= 0.4557201845106959
Accuracy= 0.45161290322580644

              precision    recall  f1-score   support

           1       0.25      0.19      0.22        68
           2       0.33      0.67      0.44       100
          99       0.65      0.44      0.52       266

    accuracy                           0.45       434
   macro avg       0.41      0.43      0.39       434
weighted avg       0.51      0.45      0.46       434



In [None]:
#FEATURES TIME
svmk3 = SVC(kernel="rbf", class_weight='balanced')
svmk3.fit(X_train_time, y_train_time)
print('F1Score=', f1_score(y_val_time, svmk3.predict(X_val_time), average='weighted'))
print('Accuracy=', accuracy_score(y_val_time, svmk3.predict(X_val_time)))
print('')
print(classification_report(y_val_time, svmk3.predict(X_val_time)))

F1Score= 0.5359246256900981
Accuracy= 0.5875576036866359

              precision    recall  f1-score   support

           1       0.25      0.26      0.26        68
           2       0.53      0.10      0.17       100
          99       0.66      0.85      0.75       266

    accuracy                           0.59       434
   macro avg       0.48      0.41      0.39       434
weighted avg       0.57      0.59      0.54       434



In [None]:
#FEATURES AMPLITUD
svmk4 = SVC(kernel="poly", class_weight='balanced')
svmk4.fit(X_train_sxx, y_train_sxx.ravel())
print('F1Score=', f1_score(y_val_sxx, svmk4.predict(X_val_sxx), average='weighted'))
print('Accuracy=', accuracy_score(y_val_sxx, svmk4.predict(X_val_sxx)))
print('')
print(classification_report(y_val_sxx, svmk4.predict(X_val_sxx)))

F1Score= 0.4982868718876996
Accuracy= 0.47692307692307695

              precision    recall  f1-score   support

           1       0.31      0.51      0.39        41
           2       0.33      0.66      0.44        35
          99       0.84      0.41      0.55       119

    accuracy                           0.48       195
   macro avg       0.50      0.53      0.46       195
weighted avg       0.64      0.48      0.50       195



In [None]:
#FEATURES FRECUENCIA SELECTIVA SIN CLASE 99
svmk5 = SVC(kernel="poly", class_weight='balanced')
svmk5.fit(X_train_m, y_train_m.ravel())
print('F1Score=', f1_score(y_val_m, svmk5.predict(X_val_m), average='weighted'))
print('Accuracy=', accuracy_score(y_val_m, svmk5.predict(X_val_m)))
print('')
print(classification_report(y_val_m, svmk5.predict(X_val_m)))

F1Score= 0.7726117468610602
Accuracy= 0.7791878172588832

              precision    recall  f1-score   support

           1       0.91      0.61      0.73       193
           2       0.72      0.94      0.81       201

    accuracy                           0.78       394
   macro avg       0.81      0.78      0.77       394
weighted avg       0.81      0.78      0.77       394



## MODELO 4: RANDOM FOREST



In [None]:
from sklearn import ensemble
from sklearn.metrics import classification_report

In [None]:
#FEATURES FRECUENCIA
rf0 = ensemble.RandomForestClassifier(class_weight='balanced')
rf0.fit(X_train_freq, y_train_freq)
print('F1Score=', f1_score(y_val_freq, rf0.predict(X_val_freq), average='weighted'))
print('Accuracy=', accuracy_score(y_val_freq, rf0.predict(X_val_freq)))
print('')
print('Train=',classification_report(y_train_freq, rf0.predict(X_train_freq)))
print('Val=',classification_report(y_val_freq, rf0.predict(X_val_freq)))

F1Score= 0.7114464728266604
Accuracy= 0.7258064516129032

Train=               precision    recall  f1-score   support

           1       0.93      0.96      0.94       598
           2       0.90      0.98      0.94       594
          99       0.99      0.95      0.97      1932

    accuracy                           0.96      3124
   macro avg       0.94      0.96      0.95      3124
weighted avg       0.96      0.96      0.96      3124

Val=               precision    recall  f1-score   support

           1       0.68      0.53      0.60        68
           2       0.65      0.44      0.52       100
          99       0.75      0.88      0.81       266

    accuracy                           0.73       434
   macro avg       0.69      0.62      0.64       434
weighted avg       0.72      0.73      0.71       434



In [None]:
#FEATURES FRECUENCIA SELECTIVAS
rf1 = ensemble.RandomForestClassifier(class_weight='balanced')
rf1.fit(X_train_freq_s, y_train_freq_s)
print('F1Score=', f1_score(y_val_freq_s, rf1.predict(X_val_freq_s), average='weighted'))
print('Accuracy=', accuracy_score(y_val_freq_s, rf1.predict(X_val_freq_s)))
print('')
print('Train=', classification_report(y_train_freq_s, rf1.predict(X_train_freq_s)))
print('Val=', classification_report(y_val_freq_s, rf1.predict(X_val_freq_s)))

F1Score= 0.7259676836283084
Accuracy= 0.7465437788018433

Train=               precision    recall  f1-score   support

           1       1.00      1.00      1.00       598
           2       1.00      1.00      1.00       594
          99       1.00      1.00      1.00      1932

    accuracy                           1.00      3124
   macro avg       1.00      1.00      1.00      3124
weighted avg       1.00      1.00      1.00      3124

Val=               precision    recall  f1-score   support

           1       0.83      0.66      0.74        68
           2       0.72      0.36      0.48       100
          99       0.74      0.91      0.82       266

    accuracy                           0.75       434
   macro avg       0.76      0.65      0.68       434
weighted avg       0.75      0.75      0.73       434



In [None]:
#FEATURES FRECUENCIA SELECTIVAS UNIFICADAS
rf2 = ensemble.RandomForestClassifier(class_weight='balanced')
rf2.fit(X_train_freq_u, y_train_freq_u)
print('F1Score=', f1_score(y_val_freq_u, rf2.predict(X_val_freq_u), average='weighted'))
print('Accuracy=', accuracy_score(y_val_freq_u, rf2.predict(X_val_freq_u)))
print('')
print('Train=', classification_report(y_train_freq_u, rf2.predict(X_train_freq_u)))
print('Val=', classification_report(y_val_freq_u, rf2.predict(X_val_freq_u)))

F1Score= 0.644839100210888
Accuracy= 0.6658986175115207

Train=               precision    recall  f1-score   support

           1       1.00      1.00      1.00       598
           2       1.00      1.00      1.00       594
          99       1.00      1.00      1.00      1932

    accuracy                           1.00      3124
   macro avg       1.00      1.00      1.00      3124
weighted avg       1.00      1.00      1.00      3124

Val=               precision    recall  f1-score   support

           1       0.62      0.46      0.53        68
           2       0.55      0.33      0.41       100
          99       0.69      0.85      0.76       266

    accuracy                           0.67       434
   macro avg       0.62      0.54      0.57       434
weighted avg       0.65      0.67      0.64       434



In [None]:
#FEATURES TIME
rf3 = ensemble.RandomForestClassifier(class_weight='balanced')
rf3.fit(X_train_time, y_train_time)
print('F1Score=', f1_score(y_val_time, rf3.predict(X_val_time), average='weighted'))
print('Accuracy=', accuracy_score(y_val_time, rf3.predict(X_val_time)))
print('')
print('Train=', classification_report(y_train_time, rf3.predict(X_train_time)))
print('Val=',classification_report(y_val_time, rf3.predict(X_val_time)))

F1Score= 0.9259856758376322
Accuracy= 0.9262672811059908

Train=               precision    recall  f1-score   support

           1       1.00      1.00      1.00       598
           2       1.00      1.00      1.00       594
          99       1.00      1.00      1.00      1932

    accuracy                           1.00      3124
   macro avg       1.00      1.00      1.00      3124
weighted avg       1.00      1.00      1.00      3124

Val=               precision    recall  f1-score   support

           1       0.87      0.87      0.87        68
           2       0.94      0.88      0.91       100
          99       0.94      0.96      0.95       266

    accuracy                           0.93       434
   macro avg       0.91      0.90      0.91       434
weighted avg       0.93      0.93      0.93       434



In [None]:
#FEATURES AMPLITUD
rf4 = ensemble.RandomForestClassifier(class_weight='balanced')
rf4.fit(X_train_sxx, y_train_sxx.ravel())
print('F1Score=', f1_score(y_val_sxx, rf4.predict(X_val_sxx), average='weighted'))
print('Accuracy=', accuracy_score(y_val_sxx, rf4.predict(X_val_sxx)))
print('')
print('Train=', classification_report(y_train_sxx, rf4.predict(X_train_sxx)))
print('Val=', classification_report(y_val_sxx, rf4.predict(X_val_sxx)))

F1Score= 0.7687369626135112
Accuracy= 0.7743589743589744

Train=               precision    recall  f1-score   support

           1       1.00      1.00      1.00       278
           2       1.00      1.00      1.00       303
          99       1.00      1.00      1.00       815

    accuracy                           1.00      1396
   macro avg       1.00      1.00      1.00      1396
weighted avg       1.00      1.00      1.00      1396

Val=               precision    recall  f1-score   support

           1       0.72      0.56      0.63        41
           2       0.77      0.69      0.73        35
          99       0.79      0.87      0.83       119

    accuracy                           0.77       195
   macro avg       0.76      0.71      0.73       195
weighted avg       0.77      0.77      0.77       195



In [None]:
#FEATURES FRECUENCIA SELECTIVA SIN CLASE 99
rf5 = ensemble.RandomForestClassifier(class_weight='balanced')
rf5.fit(X_train_m, y_train_m.ravel())
print('F1Score=', f1_score(y_val_m, rf5.predict(X_val_m), average='weighted'))
print('Accuracy=', accuracy_score(y_val_m, rf5.predict(X_val_m)))
print('')
print(classification_report(y_val_m, rf5.predict(X_val_m)))

F1Score= 0.7817258883248731
Accuracy= 0.7817258883248731

              precision    recall  f1-score   support

           1       0.78      0.78      0.78       193
           2       0.79      0.79      0.79       201

    accuracy                           0.78       394
   macro avg       0.78      0.78      0.78       394
weighted avg       0.78      0.78      0.78       394



##MODELO 5: NEURAL NETWORK

In [None]:
from sklearn import neural_network

In [None]:
#FEATURES FRECUENCIA
rn0 = neural_network.MLPClassifier()
rn0.fit(X_train_freq, y_train_freq)
print('F1Score=',f1_score(y_val_freq, rn0.predict(X_val_freq), average='weighted'))
print('Accuracy=', accuracy_score(y_val_freq, rn0.predict(X_val_freq)))
print('')
print(classification_report(y_val_freq, rn0.predict(X_val_freq)))

F1Score= 0.4853429687913418
Accuracy= 0.6105990783410138

              precision    recall  f1-score   support

           1       0.46      0.09      0.15        68
           2       0.00      0.00      0.00       100
          99       0.62      0.97      0.75       266

    accuracy                           0.61       434
   macro avg       0.36      0.35      0.30       434
weighted avg       0.45      0.61      0.49       434



  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
#FEATURES FRECUENCIA SELECTIVAS
rn1 = neural_network.MLPClassifier()
rn1.fit(X_train_freq_s, y_train_freq_s)
print('F1Score=',f1_score(y_val_freq_s, rn1.predict(X_val_freq_s), average='weighted'))
print('Accuracy=', accuracy_score(y_val_freq_s, rn1.predict(X_val_freq_s)))
print('')
print(classification_report(y_val_freq_s, rn1.predict(X_val_freq_s)))

F1Score= 0.7156586801372139
Accuracy= 0.7235023041474654

              precision    recall  f1-score   support

           1       0.64      0.66      0.65        68
           2       0.66      0.47      0.55       100
          99       0.76      0.83      0.79       266

    accuracy                           0.72       434
   macro avg       0.69      0.66      0.67       434
weighted avg       0.72      0.72      0.72       434



In [None]:
#FEATURES FRECUENCIA SELECTIVAS UNIFICADAS
rn2 = neural_network.MLPClassifier()
rn2.fit(X_train_freq_u, y_train_freq_u)
print('F1Score=',f1_score(y_val_freq_u, rn2.predict(X_val_freq_u), average='weighted'))
print('Accuracy=', accuracy_score(y_val_freq_u, rn2.predict(X_val_freq_u)))
print('')
print(classification_report(y_val_freq_u, rn2.predict(X_val_freq_u)))

F1Score= 0.6503621649922668
Accuracy= 0.684331797235023

              precision    recall  f1-score   support

           1       0.65      0.50      0.57        68
           2       0.68      0.25      0.36       100
          99       0.69      0.89      0.78       266

    accuracy                           0.68       434
   macro avg       0.67      0.55      0.57       434
weighted avg       0.68      0.68      0.65       434



In [None]:
#FEATURES TIME
rn3 = neural_network.MLPClassifier()
rn3.fit(X_train_time, y_train_time)
print('F1Score=',f1_score(y_val_time, rn3.predict(X_val_time), average='weighted'))
print('Accuracy=', accuracy_score(y_val_time, rn3.predict(X_val_time)))
print('')
print(classification_report(y_val_time, rn3.predict(X_val_time)))

F1Score= 0.5100995955017065
Accuracy= 0.6267281105990783

              precision    recall  f1-score   support

           1       0.50      0.04      0.08        68
           2       0.50      0.06      0.11       100
          99       0.63      0.99      0.77       266

    accuracy                           0.63       434
   macro avg       0.54      0.36      0.32       434
weighted avg       0.58      0.63      0.51       434



In [None]:
#FEATURES AMPLITUD
rn4 = neural_network.MLPClassifier()
rn4.fit(X_train_sxx, y_train_sxx.ravel())
print('F1Score=',f1_score(y_val_sxx, rn4.predict(X_val_sxx), average='weighted'))
print('Accuracy=', accuracy_score(y_val_sxx, rn4.predict(X_val_sxx)))
print('')
print(classification_report(y_val_sxx, rn4.predict(X_val_sxx)))

F1Score= 0.5287020863135244
Accuracy= 0.6051282051282051

              precision    recall  f1-score   support

           1       0.24      0.10      0.14        41
           2       0.40      0.11      0.18        35
          99       0.65      0.92      0.77       119

    accuracy                           0.61       195
   macro avg       0.43      0.38      0.36       195
weighted avg       0.52      0.61      0.53       195



In [None]:
#FEATURES FRECUENCIA SELECTIVA SIN CLASE 99
rn5 = neural_network.MLPClassifier()
rn5.fit(X_train_m, y_train_m.ravel())
print('F1Score=',f1_score(y_val_m, rn5.predict(X_val_m), average='weighted'))
print('Accuracy=', accuracy_score(y_val_m, rn5.predict(X_val_m)))
print('')
print(classification_report(y_val_m, rn5.predict(X_val_m)))

F1Score= 0.8185402984494585
Accuracy= 0.8197969543147208

              precision    recall  f1-score   support

           1       0.87      0.74      0.80       193
           2       0.78      0.90      0.84       201

    accuracy                           0.82       394
   macro avg       0.83      0.82      0.82       394
weighted avg       0.83      0.82      0.82       394



## MODELO 6: KNN

In [None]:
from sklearn.neighbors import KNeighborsClassifier

In [None]:
#FEATURES FRECUENCIA
KNN0 = KNeighborsClassifier()
KNN0.fit(X_train_freq, y_train_freq)
print('F1Score=', f1_score(y_val_freq, KNN0.predict(X_val_freq), average='weighted'))
print('Accuracy=', accuracy_score(y_val_freq, KNN0.predict(X_val_freq)))
print('')
print(classification_report(y_val_freq, KNN0.predict(X_val_freq)))

F1Score= 0.6329550259881939
Accuracy= 0.6405529953917051

              precision    recall  f1-score   support

           1       0.51      0.56      0.53        68
           2       0.45      0.35      0.40       100
          99       0.73      0.77      0.75       266

    accuracy                           0.64       434
   macro avg       0.56      0.56      0.56       434
weighted avg       0.63      0.64      0.63       434



In [None]:
#FEATURES FRECUENCIA SELECTIVAS
KNN1 = KNeighborsClassifier()
KNN1.fit(X_train_freq_s, y_train_freq_s)
print('F1Score=', f1_score(y_val_freq_s, KNN1.predict(X_val_freq_s), average='weighted'))
print('Accuracy=', accuracy_score(y_val_freq_s, KNN1.predict(X_val_freq_s)))
print('')
print(classification_report(y_val_freq_s, KNN1.predict(X_val_freq_s)))

F1Score= 0.7737649992208198
Accuracy= 0.7788018433179723

              precision    recall  f1-score   support

           1       0.79      0.79      0.79        68
           2       0.69      0.55      0.61       100
          99       0.80      0.86      0.83       266

    accuracy                           0.78       434
   macro avg       0.76      0.74      0.74       434
weighted avg       0.77      0.78      0.77       434



In [None]:
#FEATURES FRECUENCIA SELECTIVAS UNIFICADAS
KNN2 = KNeighborsClassifier()
KNN2.fit(X_train_freq_u, y_train_freq_u)
print('F1Score=', f1_score(y_val_freq_u, KNN2.predict(X_val_freq_u), average='weighted'))
print('Accuracy=', accuracy_score(y_val_freq_u, KNN2.predict(X_val_freq_u)))
print('')
print(classification_report(y_val_freq_u, KNN2.predict(X_val_freq_u)))

F1Score= 0.6271738484410296
Accuracy= 0.6359447004608295

              precision    recall  f1-score   support

           1       0.55      0.51      0.53        68
           2       0.49      0.38      0.43       100
          99       0.69      0.76      0.73       266

    accuracy                           0.64       434
   macro avg       0.58      0.55      0.56       434
weighted avg       0.62      0.64      0.63       434



In [None]:
#FEATURES TIME
KNN3 = KNeighborsClassifier()
KNN3.fit(X_train_time, y_train_time)
print('F1Score=', f1_score(y_val_time, KNN3.predict(X_val_time), average='weighted'))
print('Accuracy=', accuracy_score(y_val_time, KNN3.predict(X_val_time)))
print('')
print(classification_report(y_val_time, KNN3.predict(X_val_time)))

F1Score= 0.7680284764643391
Accuracy= 0.7672811059907834

              precision    recall  f1-score   support

           1       0.51      0.63      0.57        68
           2       0.77      0.60      0.67       100
          99       0.85      0.86      0.86       266

    accuracy                           0.77       434
   macro avg       0.71      0.70      0.70       434
weighted avg       0.78      0.77      0.77       434



In [None]:
#FEATURES AMPLITUD
KNN4 = KNeighborsClassifier()
KNN4.fit(X_train_sxx, y_train_sxx)
print('F1Score=', f1_score(y_val_sxx, KNN4.predict(X_val_sxx), average='weighted'))
print('Accuracy=', accuracy_score(y_val_sxx, KNN4.predict(X_val_sxx)))
print('')
print(classification_report(y_val_sxx, KNN4.predict(X_val_sxx)))


F1Score= 0.5671257747753156
Accuracy= 0.5692307692307692

              precision    recall  f1-score   support

           1       0.38      0.29      0.33        41
           2       0.35      0.43      0.38        35
          99       0.70      0.71      0.70       119

    accuracy                           0.57       195
   macro avg       0.47      0.48      0.47       195
weighted avg       0.57      0.57      0.57       195



  This is separate from the ipykernel package so we can avoid doing imports until


In [None]:
#FEATURES FRECUENCIA SELECTIVA SIN CLASE 99
KNN5 = KNeighborsClassifier()
KNN5.fit(X_train_m, y_train_m)
print('F1Score=', f1_score(y_val_m, KNN5.predict(X_val_m), average='weighted'))
print('Accuracy=', accuracy_score(y_val_m, KNN5.predict(X_val_m)))
print('')
print(classification_report(y_val_m, KNN5.predict(X_val_m)))


F1Score= 0.7638605406552017
Accuracy= 0.7639593908629442

              precision    recall  f1-score   support

           1       0.74      0.79      0.77       193
           2       0.79      0.74      0.76       201

    accuracy                           0.76       394
   macro avg       0.76      0.76      0.76       394
weighted avg       0.77      0.76      0.76       394



##MODELO 7: BOOSTING

In [None]:
import xgboost as xgb

In [None]:
#FEATURES FRECUENCIA
xgb0= xgb.XGBClassifier()
xgb0.fit(X_train_freq, y_train_freq)
print('F1Score=', f1_score(y_val_freq, xgb0.predict(X_val_freq), average='weighted'))
print('Accuracy=', accuracy_score(y_val_freq, xgb0.predict(X_val_freq)))
print('')
print(classification_report(y_val_freq, xgb0.predict(X_val_freq)))

F1Score= 0.7013528933161628
Accuracy= 0.7258064516129032

              precision    recall  f1-score   support

           1       0.78      0.51      0.62        68
           2       0.66      0.35      0.46       100
          99       0.73      0.92      0.81       266

    accuracy                           0.73       434
   macro avg       0.72      0.60      0.63       434
weighted avg       0.72      0.73      0.70       434



In [None]:
#FEATURES FRECUENCIA SELECTIVAS
xgb1 = xgb.XGBClassifier()
xgb1.fit(X_train_freq_s, y_train_freq_s)
print('F1Score=', f1_score(y_val_freq_s, xgb1.predict(X_val_freq_s), average='weighted'))
print('Accuracy=', accuracy_score(y_val_freq_s, xgb1.predict(X_val_freq_s)))
print('')
print('Train=', classification_report(y_train_freq_s, xgb1.predict(X_train_freq_s)))
print('Val=', classification_report(y_val_freq, xgb0.predict(X_val_freq)))

F1Score= 0.7218904416053936
Accuracy= 0.7419354838709677

Train=               precision    recall  f1-score   support

           1       0.85      0.76      0.80       598
           2       0.83      0.52      0.64       594
          99       0.81      0.93      0.86      1932

    accuracy                           0.82      3124
   macro avg       0.83      0.73      0.77      3124
weighted avg       0.82      0.82      0.81      3124

Val=               precision    recall  f1-score   support

           1       0.78      0.51      0.62        68
           2       0.66      0.35      0.46       100
          99       0.73      0.92      0.81       266

    accuracy                           0.73       434
   macro avg       0.72      0.60      0.63       434
weighted avg       0.72      0.73      0.70       434



In [None]:
#FEATURES FRECUENCIA SELECTIVAS UNIFICADAS
xgb2 = xgb.XGBClassifier()
xgb2.fit(X_train_freq_u, y_train_freq_u)
print('F1Score=', f1_score(y_val_freq_u, xgb2.predict(X_val_freq_u), average='weighted'))
print('Accuracy=', accuracy_score(y_val_freq_u, xgb2.predict(X_val_freq_u)))
print('')
print('Train=', classification_report(y_train_freq_u, xgb2.predict(X_train_freq_u)))
print('Val=', classification_report(y_val_freq_u, xgb2.predict(X_val_freq_u)))

F1Score= 0.685470579083813
Accuracy= 0.7096774193548387

Train=               precision    recall  f1-score   support

           1       0.76      0.61      0.68       598
           2       0.77      0.45      0.57       594
          99       0.76      0.90      0.82      1932

    accuracy                           0.76      3124
   macro avg       0.76      0.65      0.69      3124
weighted avg       0.76      0.76      0.75      3124

Val=               precision    recall  f1-score   support

           1       0.70      0.51      0.59        68
           2       0.71      0.34      0.46       100
          99       0.71      0.90      0.79       266

    accuracy                           0.71       434
   macro avg       0.71      0.58      0.62       434
weighted avg       0.71      0.71      0.69       434



In [None]:
#FEATURES TIME
xgb3 = xgb.XGBClassifier()
xgb3.fit(X_train_time, y_train_time)
print('F1Score=', f1_score(y_val_time, xgb3.predict(X_val_time), average='weighted'))
print('Accuracy=', accuracy_score(y_val_time, xgb3.predict(X_val_time)))
print('')
print('Train=', classification_report(y_train_time, xgb3.predict(X_train_time)))
print('Val=', classification_report(y_val_time, xgb3.predict(X_val_time)))

F1Score= 0.6962463799770954
Accuracy= 0.7304147465437788

Train=               precision    recall  f1-score   support

           1       0.91      0.57      0.70       598
           2       0.94      0.45      0.61       594
          99       0.78      0.99      0.87      1932

    accuracy                           0.81      3124
   macro avg       0.87      0.67      0.73      3124
weighted avg       0.83      0.81      0.79      3124

Val=               precision    recall  f1-score   support

           1       0.75      0.40      0.52        68
           2       0.79      0.34      0.48       100
          99       0.72      0.96      0.82       266

    accuracy                           0.73       434
   macro avg       0.75      0.57      0.61       434
weighted avg       0.74      0.73      0.70       434



In [None]:
#FEATURES AMPLITUD
xgb4= xgb.XGBClassifier()
xgb4.fit(X_train_sxx, y_train_sxx.ravel())
print('F1Score=', f1_score(y_val_sxx, xgb4.predict(X_val_sxx), average='weighted'))
print('Accuracy=', accuracy_score(y_val_sxx, xgb4.predict(X_val_sxx)))
print('')
print('Train=', classification_report(y_train_sxx, xgb4.predict(X_train_sxx)))
print('Val=', classification_report(y_val_sxx, xgb4.predict(X_val_sxx)))

F1Score= 0.7674137827983983
Accuracy= 0.7692307692307693

Train=               precision    recall  f1-score   support

           1       0.91      0.74      0.82       278
           2       0.82      0.73      0.77       303
          99       0.85      0.93      0.89       815

    accuracy                           0.85      1396
   macro avg       0.86      0.80      0.83      1396
weighted avg       0.85      0.85      0.85      1396

Val=               precision    recall  f1-score   support

           1       0.68      0.61      0.64        41
           2       0.74      0.74      0.74        35
          99       0.80      0.83      0.82       119

    accuracy                           0.77       195
   macro avg       0.74      0.73      0.73       195
weighted avg       0.77      0.77      0.77       195



In [None]:
#FEATURES FRECUENCIA SELECTIVA SIN CLASE 99
xgb5= xgb.XGBClassifier()
xgb5.fit(X_train_m, y_train_m.ravel())
print('F1Score=', f1_score(y_val_m, xgb5.predict(X_val_m), average='weighted'))
print('Accuracy=', accuracy_score(y_val_m, xgb5.predict(X_val_m)))
print('')
print('Val=', classification_report(y_val_m, xgb5.predict(X_val_m)))

F1Score= 0.8145378611366669
Accuracy= 0.8147208121827412

Val=               precision    recall  f1-score   support

           1       0.83      0.79      0.81       193
           2       0.80      0.84      0.82       201

    accuracy                           0.81       394
   macro avg       0.82      0.81      0.81       394
weighted avg       0.82      0.81      0.81       394



##MODELO 8: DecisionTreeClassifier:

In [None]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
#FEATURES FRECUENCIA
t0= DecisionTreeClassifier(class_weight='balanced')
t0.fit(X_train_freq, y_train_freq)
print('F1Score=', f1_score(y_val_freq, t0.predict(X_val_freq), average='weighted'))
print('Accuracy=', accuracy_score(y_val_freq, t0.predict(X_val_freq)))
print('')
print('Train=', classification_report(y_train_freq, t0.predict(X_train_freq)))
print('Val=', classification_report(y_val_freq, t0.predict(X_val_freq)))

F1Score= 0.65868885928705
Accuracy= 0.6566820276497696

Train=               precision    recall  f1-score   support

           1       0.92      0.96      0.94       598
           2       0.88      0.99      0.93       594
          99       1.00      0.94      0.97      1932

    accuracy                           0.96      3124
   macro avg       0.93      0.97      0.95      3124
weighted avg       0.96      0.96      0.96      3124

Val=               precision    recall  f1-score   support

           1       0.47      0.54      0.51        68
           2       0.54      0.52      0.53       100
          99       0.76      0.74      0.75       266

    accuracy                           0.66       434
   macro avg       0.59      0.60      0.59       434
weighted avg       0.66      0.66      0.66       434



In [None]:
#FEATURES FRECUENCIA SELECTIVAS
t1= DecisionTreeClassifier(class_weight='balanced')
t1.fit(X_train_freq_s, y_train_freq_s)
print('F1Score=', f1_score(y_val_freq_s, t1.predict(X_val_freq_s), average='weighted'))
print('Accuracy=', accuracy_score(y_val_freq_s, t1.predict(X_val_freq_s)))
print('')
print('Train=', classification_report(y_train_freq_s, t1.predict(X_train_freq_s)))
print('Val=', classification_report(y_val_freq_s, t1.predict(X_val_freq_s)))

F1Score= 0.6634623325570256
Accuracy= 0.6705069124423964

Train=               precision    recall  f1-score   support

           1       1.00      1.00      1.00       598
           2       1.00      1.00      1.00       594
          99       1.00      1.00      1.00      1932

    accuracy                           1.00      3124
   macro avg       1.00      1.00      1.00      3124
weighted avg       1.00      1.00      1.00      3124

Val=               precision    recall  f1-score   support

           1       0.62      0.51      0.56        68
           2       0.52      0.45      0.48       100
          99       0.72      0.79      0.76       266

    accuracy                           0.67       434
   macro avg       0.62      0.59      0.60       434
weighted avg       0.66      0.67      0.66       434



In [None]:
#FEATURES FRECUENCIA SELECTIVAS UNIFICADAS
t2 = DecisionTreeClassifier(class_weight='balanced')
t2.fit(X_train_freq_u, y_train_freq_u)
print('F1Score=', f1_score(y_val_freq_u, t2.predict(X_val_freq_u), average='weighted'))
print('Accuracy=', accuracy_score(y_val_freq_u, t2.predict(X_val_freq_u)))
print('')
print('Train=', classification_report(y_train_freq_u, t2.predict(X_train_freq_u)))
print('Val=', classification_report(y_val_freq_u, t2.predict(X_val_freq_u)))

F1Score= 0.6133816259956545
Accuracy= 0.6221198156682027

Train=               precision    recall  f1-score   support

           1       1.00      1.00      1.00       598
           2       1.00      1.00      1.00       594
          99       1.00      1.00      1.00      1932

    accuracy                           1.00      3124
   macro avg       1.00      1.00      1.00      3124
weighted avg       1.00      1.00      1.00      3124

Val=               precision    recall  f1-score   support

           1       0.48      0.47      0.48        68
           2       0.46      0.36      0.40       100
          99       0.70      0.76      0.73       266

    accuracy                           0.62       434
   macro avg       0.55      0.53      0.54       434
weighted avg       0.61      0.62      0.61       434



In [None]:
#FEATURES TIME
t3 = DecisionTreeClassifier(class_weight='balanced')
t3.fit(X_train_time, y_train_time)
print('F1Score=', f1_score(y_val_time, t3.predict(X_val_time), average='weighted'))
print('Accuracy=', accuracy_score(y_val_time, t3.predict(X_val_time)))
print('')
print('Train=', classification_report(y_train_time, t3.predict(X_train_time)))
print('Val=', classification_report(y_val_time, t3.predict(X_val_time)))

F1Score= 0.8587053255561693
Accuracy= 0.8594470046082949

Train=               precision    recall  f1-score   support

           1       1.00      1.00      1.00       598
           2       1.00      1.00      1.00       594
          99       1.00      1.00      1.00      1932

    accuracy                           1.00      3124
   macro avg       1.00      1.00      1.00      3124
weighted avg       1.00      1.00      1.00      3124

Val=               precision    recall  f1-score   support

           1       0.76      0.82      0.79        68
           2       0.83      0.74      0.78       100
          99       0.90      0.91      0.91       266

    accuracy                           0.86       434
   macro avg       0.83      0.83      0.83       434
weighted avg       0.86      0.86      0.86       434



In [None]:
#FEATURES AMPLITUD
t4= DecisionTreeClassifier(class_weight='balanced')
t4.fit(X_train_sxx, y_train_sxx.ravel())
print('F1Score=', f1_score(y_val_sxx, t4.predict(X_val_sxx), average='weighted'))
print('Accuracy=', accuracy_score(y_val_sxx, t4.predict(X_val_sxx)))
print('')
print('Train=', classification_report(y_train_sxx, t4.predict(X_train_sxx)))
print('Val=', classification_report(y_val_sxx, t4.predict(X_val_sxx)))

F1Score= 0.6751219024180398
Accuracy= 0.6717948717948717

Train=               precision    recall  f1-score   support

           1       1.00      1.00      1.00       278
           2       1.00      1.00      1.00       303
          99       1.00      1.00      1.00       815

    accuracy                           1.00      1396
   macro avg       1.00      1.00      1.00      1396
weighted avg       1.00      1.00      1.00      1396

Val=               precision    recall  f1-score   support

           1       0.62      0.59      0.60        41
           2       0.48      0.57      0.52        35
          99       0.76      0.73      0.75       119

    accuracy                           0.67       195
   macro avg       0.62      0.63      0.62       195
weighted avg       0.68      0.67      0.68       195



In [None]:
#FEATURES FRECUENCIA SELECTIVA SIN CLASE 99
t5= DecisionTreeClassifier(class_weight='balanced')
t5.fit(X_train_m, y_train_m.ravel())
print('F1Score=', f1_score(y_val_m, t5.predict(X_val_m), average='weighted'))
print('Accuracy=', accuracy_score(y_val_m, t5.predict(X_val_m)))
print('')
print(classification_report(y_val_m, t5.predict(X_val_m)))

F1Score= 0.7335282930879715
Accuracy= 0.733502538071066

              precision    recall  f1-score   support

           1       0.72      0.74      0.73       193
           2       0.74      0.73      0.74       201

    accuracy                           0.73       394
   macro avg       0.73      0.73      0.73       394
weighted avg       0.73      0.73      0.73       394



#MODELOS SELECCIONADOS

##RANDOM FOREST

In [None]:
rf = ensemble.RandomForestClassifier()

In [None]:
#se sobremuestrean las clases minoritarias para balancear los datos
from imblearn.over_sampling import RandomOverSampler

over_sampler = RandomOverSampler(random_state=0)
X_res_t, y_res_t = over_sampler.fit_resample(X_train_time, y_train_time)



In [None]:
param_grid_RF = {
    'criterion': ['entropy', 'gini'],
    'n_estimators': [100],
    'max_depth': [None, 10, 15, 20, 30, 50],
    'class_weight': ["balanced", "balanced_subsample", None]
}
RF_models = GridSearchCV(rf, param_grid_RF, cv=5, scoring='accuracy', n_jobs=-1)
RF_models.fit(X_res_t, y_res_t)

<IPython.core.display.Javascript object>

GridSearchCV(cv=5, error_score=nan,
             estimator=RandomForestClassifier(bootstrap=True, ccp_alpha=0.0,
                                              class_weight=None,
                                              criterion='gini', max_depth=None,
                                              max_features='auto',
                                              max_leaf_nodes=None,
                                              max_samples=None,
                                              min_impurity_decrease=0.0,
                                              min_impurity_split=None,
                                              min_samples_leaf=1,
                                              min_samples_split=2,
                                              min_weight_fraction_leaf=0.0,
                                              n_estimators=100, n_jobs=None,
                                              oob_score=False,
                                              rando

In [None]:
best_RF_models = RF_models.best_estimator_
best_RF_models

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight='balanced',
                       criterion='entropy', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [None]:
print('F1Score=', f1_score(y_val_time, best_RF_models.predict(X_val_time), average='weighted'))
print('Accuracy=', accuracy_score(y_val_time, best_RF_models.predict(X_val_time)))
print('')
print(classification_report(y_val_time, best_RF_models.predict(X_val_time)))

F1Score= 0.9355580615106299
Accuracy= 0.9354838709677419

              precision    recall  f1-score   support

           1       0.86      0.91      0.89        68
           2       0.94      0.89      0.91       100
          99       0.96      0.96      0.96       266

    accuracy                           0.94       434
   macro avg       0.92      0.92      0.92       434
weighted avg       0.94      0.94      0.94       434



In [None]:
print('MODELO: RANDOM FOREST')
print('')
print('F1Score=', f1_score(y_test_time, best_RF_models.predict(X_test_time), average='weighted'))
print('Accuracy=', accuracy_score(y_test_time, best_RF_models.predict(X_test_time)))
print('')
print(classification_report(y_test_time, best_RF_models.predict(X_test_time)))

MODELO: RANDOM FOREST

F1Score= 0.8932389511405618
Accuracy= 0.8924455825864277

              precision    recall  f1-score   support

           1       0.78      0.86      0.82       140
           2       0.88      0.87      0.88       173
          99       0.93      0.91      0.92       468

    accuracy                           0.89       781
   macro avg       0.87      0.88      0.87       781
weighted avg       0.89      0.89      0.89       781



##KNN

In [None]:
KNN = KNeighborsClassifier()

In [None]:
#se sobremuestrean las clases minoritarias para balancear los datos
from imblearn.over_sampling import RandomOverSampler

over_sampler = RandomOverSampler(random_state=0)
X_res, y_res = over_sampler.fit_resample(X_train_freq_s, y_train_freq_s)



In [None]:
param_grid_neigh = {
    'weights': ['uniform', 'distance'],
    'n_neighbors': [1, 2, 3, 4, 5, 8, 12, 16, 17, 18, 19, 20, 30, 50],
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
    'p': [1, 2], 
    'leaf_size': [1, 2, 5, 10, 20, 30, 40, 50]

}
neigh_models = GridSearchCV(KNN, param_grid_neigh, cv=5, scoring='accuracy', n_jobs=-1)
neigh_models.fit(X_res, y_res)

<IPython.core.display.Javascript object>

GridSearchCV(cv=5, error_score=nan,
             estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30,
                                            metric='minkowski',
                                            metric_params=None, n_jobs=None,
                                            n_neighbors=5, p=2,
                                            weights='uniform'),
             iid='deprecated', n_jobs=-1,
             param_grid={'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
                         'leaf_size': [1, 2, 5, 10, 20, 30, 40, 50],
                         'n_neighbors': [1, 2, 3, 4, 5, 8, 12, 16, 17, 18, 19,
                                         20, 30, 50],
                         'p': [1, 2], 'weights': ['uniform', 'distance']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='accuracy', verbose=0)

In [None]:
best_neigh_models = neigh_models.best_estimator_ 
best_neigh_models

KNeighborsClassifier(algorithm='auto', leaf_size=1, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=1, p=1,
                     weights='uniform')

In [None]:
print('F1Score=', f1_score(y_val_freq_s, best_neigh_models.predict(X_val_freq_s), average='weighted'))
print('Accuracy=', accuracy_score(y_val_freq_s, best_neigh_models.predict(X_val_freq_s)))
print('')
print(classification_report(y_val_freq_s, best_neigh_models.predict(X_val_freq_s)))

F1Score= 0.7915380668518089
Accuracy= 0.7926267281105991

              precision    recall  f1-score   support

           1       0.81      0.82      0.82        68
           2       0.66      0.63      0.65       100
          99       0.83      0.85      0.84       266

    accuracy                           0.79       434
   macro avg       0.77      0.77      0.77       434
weighted avg       0.79      0.79      0.79       434



In [None]:
print('MODELO: KNeighborsClassifier')
print('')
print('F1Score=', f1_score(y_test_freq_s, best_neigh_models.predict(X_test_freq_s), average='weighted'))
print('Accuracy=', accuracy_score(y_test_freq_s, best_neigh_models.predict(X_test_freq_s)))
print('')
print(classification_report(y_test_freq_s, best_neigh_models.predict(X_test_freq_s)))

MODELO: KNeighborsClassifier

F1Score= 0.80636926576299
Accuracy= 0.8066581306017926

              precision    recall  f1-score   support

           1       0.79      0.80      0.79       140
           2       0.72      0.70      0.71       173
          99       0.84      0.85      0.85       468

    accuracy                           0.81       781
   macro avg       0.78      0.78      0.78       781
weighted avg       0.81      0.81      0.81       781



##XGBOOST

In [None]:
xgb_clf= xgb.XGBClassifier()

In [None]:
xgb_param_grid = {
       'nthread':[1],
       'n_estimators': [30, 50, 60, 70],
       'max_depth': [5, 7, 10],
       'verbosity': [0, 1],
       'silent':[True, False, None],
       'learning_rate': [0.05, 0.1, 0.5]
       }
       

In [None]:
xgb_models = GridSearchCV(xgb_clf, xgb_param_grid, cv=5, scoring='f1', n_jobs= -1)
xgb_models.fit(X_train_m, y_train_m)

<IPython.core.display.Javascript object>

GridSearchCV(cv=5, error_score=nan,
             estimator=XGBClassifier(base_score=0.5, booster='gbtree',
                                     colsample_bylevel=1, colsample_bynode=1,
                                     colsample_bytree=1, gamma=0,
                                     learning_rate=0.1, max_delta_step=0,
                                     max_depth=3, min_child_weight=1,
                                     missing=None, n_estimators=100, n_jobs=1,
                                     nthread=None, objective='binary:logistic',
                                     random_state=0, reg_alpha=0, reg_lambda=1,
                                     scale_pos_weight=1, seed=None, silent=None,
                                     subsample=1, verbosity=1),
             iid='deprecated', n_jobs=-1,
             param_grid={'learning_rate': [0.05, 0.1, 0.5],
                         'max_depth': [5, 7, 10],
                         'n_estimators': [30, 50, 60, 70], 'nthread':

In [None]:
best_xgb= xgb_models.best_estimator_
best_xgb

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0,
              learning_rate=0.1, max_delta_step=0, max_depth=7,
              min_child_weight=1, missing=None, n_estimators=50, n_jobs=1,
              nthread=1, objective='binary:logistic', random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
              silent=True, subsample=1, verbosity=0)

In [None]:
print('F1Score=', f1_score(y_val_m, best_xgb.predict(X_val_m), average='weighted'))
print('Accuracy=', accuracy_score(y_val_m, best_xgb.predict(X_val_m)))
print('')
print(classification_report(y_val_m, best_xgb.predict(X_val_m))) 

F1Score= 0.7969281382452965
Accuracy= 0.7969543147208121

              precision    recall  f1-score   support

           1       0.80      0.79      0.79       193
           2       0.80      0.81      0.80       201

    accuracy                           0.80       394
   macro avg       0.80      0.80      0.80       394
weighted avg       0.80      0.80      0.80       394

