In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os

from sklearn.utils import class_weight
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedKFold, KFold


import tensorflow as tf
import tensorflow.keras.layers as layers
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical




### Holdout aproach

In [None]:
dataset = pd.read_csv("training_dataset.csv").drop("Unnamed: 0", axis = 1)
test_dataset = pd.read_csv("test_dataset.csv").drop("Unnamed: 0", axis = 1)

#dataset

In [None]:
# mapeamento do atributo alvo (weather)
key_mapping = {value : i for i, value in enumerate(pd.unique(dataset['weather']))}
reverse_key_mapping = {value : key for key, value in key_mapping.items()}

#key_mapping

#### Preprocessing (holdout)

In [None]:
# mapeamento do atributo alvo (weather)
key_mapping = {value : i for i, value in enumerate(pd.unique(dataset['weather']))}
reverse_key_mapping = {value : key for key, value in key_mapping.items()}

key_mapping




In [None]:
dataset['weather'] = dataset['weather'].map(key_mapping)
#dataset

In [None]:
#padroniza os dados do dataset 
for variavel_climatica in ['humidity', 'pressure', 'temperature', 'wind']:
    scale = StandardScaler().fit(dataset[variavel_climatica].to_numpy().reshape(-1, 1))  #cria a escala para cada dado do dataset
    dataset[variavel_climatica] = scale.transform(dataset[variavel_climatica].to_numpy().reshape(-1, 1))  #transforma os dados do dataset

#dataset


In [None]:
#dataset.describe()

In [None]:
#converte de type: Dataset para type: np.array
y = to_categorical(dataset['weather'].to_numpy(), num_classes=len(pd.unique(dataset['weather'])))   # one-hot encoding baseado na quantidade de weathers 
                                                                                                    # categorical crossentropy usa one-hot, sparse categorical crossentropy usa inteiros 
x = np.column_stack([dataset['humidity'].to_numpy().astype(np.float32)     
     , dataset['pressure'].to_numpy().astype(np.float32)
     , dataset['temperature'].to_numpy().astype(np.float32)
     , dataset['wind'].to_numpy().astype(np.float32)])


In [None]:
#as classes são desbalanceadas, é necessário balancear elas
#dataset['weather'].value_counts()



In [None]:
# balancea as classes (uma vez que a amostragem é desproporcional), dado pesos para cada weather durante o treinamento do modelo
weights = {key:weight for key, weight in enumerate(class_weight.compute_class_weight('balanced',  classes = pd.unique(dataset['weather']), y=  dataset['weather']))}
#weights

#### Architecture definition (holdout)

In [None]:
def architecture():
    neural_network = tf.keras.models.Sequential([
                    #layers.Dense(1024, activation= 'relu'), #ignore, Optional:  input_dim = 4,
                    #layers.Dense(2048, activation= 'relu'), 
                    #layers.Dense(32768, activation= 'relu'),
                    #layers.Dense(16384, activation= 'relu'),
                    #layers.Dense(4096, activation= 'relu'),
                    layers.Dense(2048, activation= 'relu'),
                    #tf.keras.layers.Dropout(rate = 0.5),
                    layers.Dense(1024, activation= 'relu'),
                    #tf.keras.layers.Dropout(rate = 0.4),
                    layers.Dense(512, activation= 'relu'),
                    #tf.keras.layers.Dropout(rate = 0.3),
                    layers.Dense(256, activation= 'relu'),
                    #tf.keras.layers.Dropout(rate = 0.2),
                    #layers.Dense(128, activation= 'relu'),
                    #layers.Dense(128, activation= 'relu'),
                    #layers.Dense(64, activation= 'relu'),
                    layers.Dense(len(pd.unique(dataset['weather'])), activation= 'softmax')])
    
    return neural_network

In [None]:
class callback(tf.keras.callbacks.Callback):
  def on_epoch_end(self, epochs, logs=None):  # altera o comportamento do metodo herdado de Callback
    global VALUE_CHECKPOINT
    global fold
    
    #metrics = list(logs.values())

    checkpoint_path = f'checkpoint({fold},{VALUE_CHECKPOINT}_loss)'

    if logs.get('loss') < VALUE_CHECKPOINT:       #verifica se a loss atinge valores menores que o especificado
      self.model.save_weights(checkpoint_path, save_format='tf')
      self.model.stop_training = True
        

#### Training (holdout)

In [None]:
neural_network = architecture()

In [None]:
optimizer = Adam(learning_rate=0.0001, clipvalue=0.5)

neural_network.compile(optimizer=optimizer, loss='categorical_crossentropy')

In [None]:
class callback(tf.keras.callbacks.Callback):
  def on_epoch_end(self, epochs, logs=None):  # altera o comportamento do metodo herdado de Callback
    global VALUE_CHECKPOINT
    
    #metrics = list(logs.values())

    checkpoint_path = f'checkpoint({VALUE_CHECKPOINT}_loss)'

    if logs.get('loss') < VALUE_CHECKPOINT:       #verifica se a loss atinge valores menores que 1.8
      self.model.save_weights(checkpoint_path, save_format='tf')
      self.model.stop_training = True
        

In [None]:
#neural_network.build(input_shape = (4,1))
neural_network.load_weights('checkpoint')


In [None]:
VALUE_CHECKPOINT = 0.26
epochs = 1000

# Treinamento do modelo
report_metrics = neural_network.fit(x, y, epochs=epochs, callbacks=callback(), class_weight=weights)

# Gravação dos valores de loss no arquivo report.txt
with open(f'report({VALUE_CHECKPOINT}_loss).txt', 'w') as report:
    report.write(','.join(map(str, range(1, epochs + 1))) + '\n')  # Escreve os números de épocas no cabeçalho
    for loss in report_metrics.history['loss']:
        report.write(f'{loss},')




In [None]:
neural_network.fit(x,y, epochs = 3, callbacks = callback(), class_weight= weights)

### K fold aproach

In [2]:
dataset = pd.read_csv("dados_originais.csv").drop("Unnamed: 0", axis = 1).rename({'month': 'season'},axis = 1)
dataset

Unnamed: 0,season,humidity,pressure,temperature,wind,weather
0,spring,58,1012,15,7,few_clouds
1,spring,57,1012,15,7,few_clouds
2,spring,57,1012,15,7,few_clouds
3,spring,57,1012,15,7,few_clouds
4,spring,57,1012,15,6,few_clouds
...,...,...,...,...,...,...
43575,spring,36,1019,16,3,sky_is_clear
43576,spring,38,1019,16,1,sky_is_clear
43577,spring,54,1019,14,2,sky_is_clear
43578,spring,62,1020,12,3,sky_is_clear


#### Preprocessing (k fold)

In [3]:
# mapeamento do atributo alvo (weather)
key_mapping = {value : i for i, value in enumerate(pd.unique(dataset['weather']))}
reverse_key_mapping = {value : key for key, value in key_mapping.items()}

key_mapping

{'few_clouds': 0,
 'scattered_clouds': 1,
 'broken_clouds': 2,
 'sky_is_clear': 3,
 'overcast_clouds': 4,
 'mist': 5,
 'drizzle': 6,
 'moderate_rain': 7,
 'light_intensity_drizzle': 8,
 'light_rain': 9,
 'fog': 10,
 'haze': 11,
 'heavy_snow': 12,
 'heavy_intensity_drizzle': 13,
 'heavy_intensity_rain': 14,
 'light_rain_and_snow': 15,
 'snow': 16,
 'light_snow': 17,
 'proximity_thunderstorm': 18,
 'thunderstorm': 19,
 'thunderstorm_with_rain': 20,
 'thunderstorm_with_heavy_rain': 21,
 'thunderstorm_with_light_rain': 22,
 'very_heavy_rain': 23,
 'dust': 24}

In [4]:
#Faz o mapeamento do atributo season em X
key_mapping_season = {value : round(i/3, 3) for i, value in enumerate(pd.unique(dataset['season']))}  
key_mapping_season

{'spring': 0.0, 'summer': 0.333, 'autumn': 0.667, 'winter': 1.0}

In [5]:
dataset['season'] = dataset['season'].map(key_mapping_season)
dataset

Unnamed: 0,season,humidity,pressure,temperature,wind,weather
0,0.0,58,1012,15,7,few_clouds
1,0.0,57,1012,15,7,few_clouds
2,0.0,57,1012,15,7,few_clouds
3,0.0,57,1012,15,7,few_clouds
4,0.0,57,1012,15,6,few_clouds
...,...,...,...,...,...,...
43575,0.0,36,1019,16,3,sky_is_clear
43576,0.0,38,1019,16,1,sky_is_clear
43577,0.0,54,1019,14,2,sky_is_clear
43578,0.0,62,1020,12,3,sky_is_clear


In [6]:
# cria um dicionario com a chave sendo cada k_fold e os values são outro dicionario com cada key sendo o tipo de objeto (treinamento ou validacao) e os valores são um dataframe
k_folds = { fold : {"x_train": pd.read_csv(f'./K Folds Cross Validation/{fold}/x_train_{fold}').drop("Unnamed: 0", axis = 1),
                    "x_val": pd.read_csv(f'./K Folds Cross Validation/{fold}/x_val_{fold}').drop("Unnamed: 0", axis = 1),
                    "y_train": pd.read_csv(f'./K Folds Cross Validation/{fold}/y_train_{fold}').drop("Unnamed: 0", axis = 1)['weather'],  #abre o dataframe e ja faz o mapeamento
                    "y_val": pd.read_csv(f'./K Folds Cross Validation/{fold}/y_val_{fold}').drop("Unnamed: 0", axis = 1)['weather'],}     #abre o dataframe e ja faz o mapeamento
                     
                     for fold in os.listdir("./K Folds Cross Validation/")}
    
k_folds


{'fold0': {'x_train':        season  humidity  pressure  temperature  wind
  0      spring        69      1015            4     3
  1      spring        51      1022            0     5
  2      spring        51      1022           -1     4
  3      spring        55      1022           -1     4
  4      spring        51      1022           -1     5
  ...       ...       ...       ...          ...   ...
  34859  spring        36      1019           16     3
  34860  spring        38      1019           16     1
  34861  spring        54      1019           14     2
  34862  spring        62      1020           12     3
  34863  spring        58      1020           11     2
  
  [34864 rows x 5 columns],
  'x_val':       season  humidity  pressure  temperature  wind
  0     spring        58      1012           15     7
  1     spring        57      1012           15     7
  2     spring        57      1012           15     7
  3     spring        57      1012           15     7
  4     sp

In [7]:
#mapeia as estacoes do ano (season) para float
for fold, data in k_folds.items():
    data['x_train']['season'] = data['x_train']['season'].map(key_mapping_season)
    data['x_val']['season'] = data['x_val']['season'].map(key_mapping_season)

np.unique(k_folds[fold][data]['season'])

In [None]:
#np.unique(k_folds['fold1']['x_train']['season'])


In [10]:
#padroniza os dados do dataset 
scaler = StandardScaler()

for fold, data in k_folds.items():
    x_train = data["x_train"]
    x_val = data["x_val"]

    # Ajusta e transforma o conjunto de treinamento
    x_train = scaler.fit_transform(x_train)
    
    # Ajusta e transforma o conjunto de validação
    x_val = scaler.transform(x_val)

    # Atualize os DataFrames no dicionário
    k_folds[fold]["x_train"] = x_train
    k_folds[fold]["x_val"] = x_val



In [11]:
# define os pesos para cada fold 
weights = {}

for fold, data in k_folds.items():
    y_train = data["y_train"]

    weights[fold] = {key: weight for key, weight in enumerate(class_weight.compute_class_weight('balanced', classes=np.unique(y_train), y=y_train))}

weights




{'fold0': {0: 0.533700727133563,
  1: 0.38867335562987737,
  2: 0.2837932437932438,
  3: 0.15370439766339689,
  4: 0.40398609501738125,
  5: 0.32281481481481483,
  6: 7.878870056497175,
  7: 1.8668808567603747,
  8: 4.358,
  9: 0.4415959468017733,
  10: 1.6178190255220417,
  11: 2.526376811594203,
  12: 14.526666666666667,
  13: 87.16,
  14: 4.7273220338983055,
  15: 154.95111111111112,
  16: 17.878974358974357,
  17: 4.557385620915032,
  18: 15.158260869565217,
  19: 23.636610169491526,
  20: 87.16,
  21: 154.95111111111112,
  22: 48.08827586206897,
  23: 21.12969696969697,
  24: 82.03294117647059},
 'fold1': {0: 0.533700727133563,
  1: 0.38867335562987737,
  2: 0.2837932437932438,
  3: 0.15370439766339689,
  4: 0.4038690993339125,
  5: 0.3228895577680019,
  6: 7.878870056497175,
  7: 1.8668808567603747,
  8: 4.344423676012461,
  9: 0.4417358251504593,
  10: 1.6178190255220417,
  11: 2.526376811594203,
  12: 14.526666666666667,
  13: 87.16,
  14: 4.7273220338983055,
  15: 154.95111111

#### Architecture (k fold)

In [12]:
def architecture():
    neural_network = tf.keras.models.Sequential([
    
                    layers.Dense(2048, activation= 'relu'),
                    tf.keras.layers.Dropout(rate = 0.3),
                    layers.Dense(1024, activation= 'relu'),
                    tf.keras.layers.Dropout(rate = 0.2),
                    layers.Dense(512, activation= 'relu'),
                    tf.keras.layers.Dropout(rate = 0.1),
                    layers.Dense(256, activation= 'relu'),

                    layers.Dense(len(key_mapping.keys()), activation= 'softmax')]) # ignore: pd.unique(dataset['weather']))
    return neural_network



In [17]:
class callback(tf.keras.callbacks.Callback):
  def on_epoch_end(self, epochs, logs=None):  # altera o comportamento do metodo herdado de Callback
    global VALUE_CHECKPOINT
    global fold
    
    #metrics = list(logs.values())

    checkpoint_path = f'checkpoint({fold},{VALUE_CHECKPOINT}_loss)'

    if logs.get('loss') < VALUE_CHECKPOINT:       #verifica se a loss atinge valores menores que o especificado
      self.model.save_weights(checkpoint_path, save_format='tf')
      self.model.stop_training = True

#### Training (k fold)

In [18]:
neural_network = architecture()




In [19]:
optimizer = Adam(learning_rate=0.00001, clipvalue=0.5)

neural_network.compile(optimizer=optimizer, loss='categorical_crossentropy')





In [None]:
#neural_network.load_weights('checkpoint')

In [None]:
#treina e valida com k-folds cross-validation
epochs = 300
VALUE_CHECKPOINT = 1.5

for fold in k_folds.keys():
    x = k_folds.get(fold).get("x_train")
    y = k_folds.get(fold).get("y_train")
    x_val = k_folds.get(fold).get("x_val")
    y_val = k_folds.get(fold).get("y_val")
    
    y = to_categorical(np.array(y), num_classes=len(np.unique(y)))   # one-hot encoding baseado na quantidade de classes 
                                                                     # categorical crossentropy usa one-hot, sparse categorical crossentropy usa inteiros 
    #x = np.column_stack([x['humidity'], x['pressure'], x['temperature'], x['wind']])

    y_val = to_categorical(np.array(y_val), num_classes=len(np.unique(y_val)))

    #x_val = np.column_stack([x_val['humidity'], x_val['pressure'], x_val['temperature'], x_val['wind']])
    
    #carrega o checkpoint
    #neural_network.load_weights('checkpoint')
    
    # Treinamento do modelo
    report_metrics = neural_network.fit(x, y, validation_data = (x_val, y_val), epochs = epochs, callbacks = callback(), class_weight= weights[fold]) 

    # Gravação dos valores de loss no arquivo report.txt
    with open(f'report({fold}).txt', 'w') as report:
        report.write(','.join(map(str, range(1, epochs + 1))) + '\n')  # Escreve os números de épocas no cabeçalho
        for loss in report_metrics.history['loss']:
            report.write(f'{loss},')
            
        report.write(','.join(map(str, range(1, epochs + 1))) + '\n')
        for val_loss in report_metrics.history['val_loss']:
            report.write(f'{val_loss},')


#### Visualization (k fold)

In [22]:
vis_fold0 = pd.read_csv("report(fold_0).txt").transpose().dropna().rename({0:'loss', 1:'val_loss'},axis =1)
vis_fold1 = pd.read_csv("report(fold_1).txt").transpose().dropna().rename({0:'loss', 1:'val_loss'},axis =1)
vis_fold2 = pd.read_csv("report(fold_2).txt").transpose().dropna().rename({0:'loss', 1:'val_loss'},axis =1)
vis_fold3 = pd.read_csv("report(fold_3).txt").transpose().dropna().rename({0:'loss', 1:'val_loss'},axis =1)
vis_fold4 = pd.read_csv("report(fold_4).txt").transpose().dropna().rename({0:'loss', 1:'val_loss'},axis =1)

In [23]:
vis_fold4.transpose()

Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24,Unnamed: 25,Unnamed: 26,Unnamed: 27,Unnamed: 28,Unnamed: 29,Unnamed: 30,Unnamed: 31,Unnamed: 32,Unnamed: 33,Unnamed: 34,Unnamed: 35,Unnamed: 36,Unnamed: 37,Unnamed: 38,Unnamed: 39,Unnamed: 40,Unnamed: 41,Unnamed: 42,Unnamed: 43,Unnamed: 44,Unnamed: 45,Unnamed: 46,Unnamed: 47,Unnamed: 48,Unnamed: 49,Unnamed: 50,Unnamed: 51,Unnamed: 52,Unnamed: 53,Unnamed: 54,Unnamed: 55,Unnamed: 56,Unnamed: 57,Unnamed: 58,Unnamed: 59,Unnamed: 60,Unnamed: 61,Unnamed: 62,Unnamed: 63,Unnamed: 64,Unnamed: 65,Unnamed: 66,Unnamed: 67,Unnamed: 68,Unnamed: 69,Unnamed: 70,Unnamed: 71,Unnamed: 72,Unnamed: 73,Unnamed: 74,Unnamed: 75,Unnamed: 76,Unnamed: 77,Unnamed: 78,Unnamed: 79,Unnamed: 80,Unnamed: 81,Unnamed: 82,Unnamed: 83,Unnamed: 84,Unnamed: 85,Unnamed: 86,Unnamed: 87,Unnamed: 88,Unnamed: 89,Unnamed: 90,Unnamed: 91,Unnamed: 92,Unnamed: 93,Unnamed: 94,Unnamed: 95,Unnamed: 96,Unnamed: 97,Unnamed: 98,Unnamed: 99,Unnamed: 100,Unnamed: 101,Unnamed: 102,Unnamed: 103,Unnamed: 104,Unnamed: 105,Unnamed: 106,Unnamed: 107,Unnamed: 108,Unnamed: 109,Unnamed: 110,Unnamed: 111,Unnamed: 112,Unnamed: 113,Unnamed: 114,Unnamed: 115,Unnamed: 116,Unnamed: 117,Unnamed: 118,Unnamed: 119,Unnamed: 120,Unnamed: 121,Unnamed: 122,Unnamed: 123,Unnamed: 124,Unnamed: 125,Unnamed: 126,Unnamed: 127,Unnamed: 128,Unnamed: 129,Unnamed: 130,Unnamed: 131,Unnamed: 132,Unnamed: 133,Unnamed: 134,Unnamed: 135,Unnamed: 136,Unnamed: 137,Unnamed: 138,Unnamed: 139,Unnamed: 140,Unnamed: 141,Unnamed: 142,Unnamed: 143,Unnamed: 144,Unnamed: 145,Unnamed: 146,Unnamed: 147,Unnamed: 148,Unnamed: 149,Unnamed: 150,Unnamed: 151,Unnamed: 152,Unnamed: 153,Unnamed: 154,Unnamed: 155,Unnamed: 156,Unnamed: 157,Unnamed: 158,Unnamed: 159,Unnamed: 160,Unnamed: 161,Unnamed: 162,Unnamed: 163,Unnamed: 164,Unnamed: 165,Unnamed: 166,Unnamed: 167,Unnamed: 168,Unnamed: 169,Unnamed: 170,Unnamed: 171,Unnamed: 172,Unnamed: 173,Unnamed: 174,Unnamed: 175,Unnamed: 176,Unnamed: 177,Unnamed: 178,Unnamed: 179,Unnamed: 180,Unnamed: 181,Unnamed: 182,Unnamed: 183,Unnamed: 184,Unnamed: 185,Unnamed: 186,Unnamed: 187,Unnamed: 188,Unnamed: 189,Unnamed: 190,Unnamed: 191,Unnamed: 192,Unnamed: 193,Unnamed: 194,Unnamed: 195,Unnamed: 196,Unnamed: 197,Unnamed: 198,Unnamed: 199,Unnamed: 200,Unnamed: 201,Unnamed: 202,Unnamed: 203,Unnamed: 204,Unnamed: 205,Unnamed: 206,Unnamed: 207,Unnamed: 208,Unnamed: 209,Unnamed: 210,Unnamed: 211,Unnamed: 212,Unnamed: 213,Unnamed: 214,Unnamed: 215,Unnamed: 216,Unnamed: 217,Unnamed: 218,Unnamed: 219,Unnamed: 220,Unnamed: 221,Unnamed: 222,Unnamed: 223,Unnamed: 224,Unnamed: 225,Unnamed: 226,Unnamed: 227,Unnamed: 228,Unnamed: 229,Unnamed: 230,Unnamed: 231,Unnamed: 232,Unnamed: 233,Unnamed: 234,Unnamed: 235,Unnamed: 236,Unnamed: 237,Unnamed: 238,Unnamed: 239,Unnamed: 240,Unnamed: 241,Unnamed: 242,Unnamed: 243,Unnamed: 244,Unnamed: 245,Unnamed: 246,Unnamed: 247,Unnamed: 248,Unnamed: 249,Unnamed: 250,Unnamed: 251,Unnamed: 252,Unnamed: 253,Unnamed: 254,Unnamed: 255,Unnamed: 256,Unnamed: 257,Unnamed: 258,Unnamed: 259,Unnamed: 260,Unnamed: 261,Unnamed: 262,Unnamed: 263,Unnamed: 264,Unnamed: 265,Unnamed: 266,Unnamed: 267,Unnamed: 268,Unnamed: 269,Unnamed: 270,Unnamed: 271,Unnamed: 272,Unnamed: 273,Unnamed: 274,Unnamed: 275,Unnamed: 276,Unnamed: 277,Unnamed: 278,Unnamed: 279,Unnamed: 280
2.152114,2.028554,1.993559,1.987198,1.956814,1.932819,1.941382,1.880592,1.906679,1.866416,1.871581,1.852335,1.871859,1.850745,1.848397,1.833752,1.830009,1.804168,1.79985,1.809288,1.806654,1.773883,1.790766,1.792052,1.806848,1.76396,1.783127,1.789814,1.766788,1.754636,1.746399,1.759363,1.764121,1.73796,1.740266,1.753544,1.740095,1.722487,1.715724,1.76597,1.722376,1.737431,1.740878,1.716268,1.718662,1.749359,1.732501,1.722687,1.755071,1.740671,1.735276,1.724004,1.720122,1.724823,1.730852,1.730272,1.723604,1.719972,1.700495,1.716179,1.725008,1.7134,1.698461,1.705502,1.697265,1.712878,1.70711,1.693161,1.690045,1.68458,1.681169,1.684515,1.691104,1.705996,1.662241,1.673525,1.680148,1.675148,1.682232,1.666455,1.66746,1.687097,1.66664,1.674685,1.690043,1.674698,1.660046,1.662326,1.69226,1.695503,1.64583,1.683241,1.65902,1.649494,1.66499,1.655039,1.647671,1.683932,1.661482,1.651024,1.650633,1.660287,1.664157,1.661974,1.665484,1.665771,1.634932,1.655426,1.635588,1.68943,1.636374,1.633669,1.64326,1.640031,1.616034,1.643837,1.67009,1.635635,1.654487,1.617578,1.630967,1.660505,1.630614,1.632994,1.631832,1.623522,1.631327,1.611445,1.621396,1.62334,1.641601,1.615785,1.626536,1.602321,1.610074,1.596295,1.626307,1.615276,1.637799,1.597715,1.580216,1.622834,1.618826,1.615512,1.614541,1.608238,1.60089,1.58097,1.612881,1.618214,1.610688,1.609163,1.589262,1.6042,1.603801,1.637407,1.59887,1.613482,1.608747,1.59741,1.603725,1.573398,1.5867,1.595456,1.595238,1.591547,1.607953,1.597285,1.597814,1.587527,1.585727,1.592194,1.593917,1.606783,1.586157,1.587481,1.592045,1.589451,1.584334,1.607458,1.611391,1.601719,1.580401,1.598474,1.578745,1.579534,1.599907,1.582632,1.579904,1.594044,1.606838,1.562717,1.571489,1.585639,1.575713,1.564874,1.562625,1.589055,1.586277,1.578004,1.567907,1.568757,1.569758,1.600543,1.57493,1.553144,1.564999,1.55785,1.588434,1.536908,1.569752,1.572933,1.574649,1.549909,1.560665,1.571808,1.580384,1.56421,1.566056,1.578015,1.546504,1.555601,1.555321,1.551289,1.563147,1.564957,1.570598,1.566175,1.53862,1.543864,1.55884,1.560917,1.544997,1.569014,1.542867,1.541215,1.541076,1.533355,1.554456,1.533858,1.594088,1.546436,1.533553,1.540707,1.553854,1.542397,1.579514,1.548467,1.546188,1.55033,1.526506,1.537284,1.549089,1.517966,1.515467,1.528352,1.545305,1.529703,1.530432,1.539025,1.541132,1.542345,1.544319,1.548126,1.521253,1.512552,1.525316,1.521324,1.53497,1.505861,1.555032,1.511757,1.512781,1.503987,1.528536,1.527526,1.509305,1.519937,1.520334,1.523293,1.490358
2.442976,2.473269,2.471941,2.495829,2.495815,2.5082,2.519586,2.527896,2.539431,2.473621,2.516187,2.509669,2.479848,2.491319,2.458403,2.49384,2.501227,2.491037,2.50992,2.545729,2.508191,2.539808,2.527175,2.537963,2.497195,2.52176,2.510753,2.486971,2.503293,2.481631,2.50821,2.551196,2.524237,2.5195,2.547797,2.532359,2.512362,2.510831,2.53446,2.533396,2.553328,2.513744,2.522681,2.538894,2.541373,2.534599,2.539781,2.538617,2.506205,2.533346,2.55433,2.5228,2.529597,2.527678,2.523381,2.578052,2.564188,2.556118,2.525132,2.55006,2.556271,2.515918,2.552215,2.533959,2.562377,2.554351,2.576342,2.554395,2.525108,2.539458,2.499089,2.584424,2.573466,2.564564,2.562497,2.567408,2.509408,2.57464,2.533474,2.567053,2.605172,2.574546,2.540594,2.536372,2.566849,2.554292,2.578693,2.560474,2.561924,2.583923,2.566758,2.556427,2.525645,2.568263,2.592912,2.61325,2.529801,2.557004,2.581252,2.560691,2.57889,2.591536,2.556649,2.565464,2.577199,2.562845,2.582866,2.568813,2.540068,2.582238,2.596077,2.567762,2.57489,2.608025,2.560285,2.582908,2.597183,2.548002,2.567789,2.57518,2.552398,2.566154,2.570942,2.562782,2.577514,2.567482,2.586547,2.577532,2.576647,2.583671,2.590684,2.555356,2.576437,2.574572,2.593638,2.566353,2.608177,2.581939,2.58656,2.587568,2.605949,2.589546,2.576438,2.587531,2.579851,2.587369,2.612383,2.591506,2.57455,2.586586,2.619092,2.610348,2.567886,2.605407,2.564745,2.58686,2.566853,2.5909,2.584365,2.592685,2.585303,2.581932,2.609359,2.566999,2.575443,2.574224,2.645978,2.594931,2.607153,2.58694,2.590597,2.571661,2.569579,2.625858,2.590884,2.594009,2.622285,2.572132,2.589122,2.609689,2.587445,2.618214,2.588033,2.597775,2.602813,2.576362,2.587322,2.601652,2.595082,2.632388,2.593556,2.582623,2.594061,2.597769,2.600776,2.585055,2.610588,2.6152,2.624557,2.572349,2.584731,2.615343,2.619131,2.617325,2.590011,2.582981,2.602216,2.620078,2.57942,2.586047,2.6027,2.598166,2.599621,2.619183,2.589125,2.620167,2.599184,2.610314,2.566966,2.603123,2.630419,2.58365,2.595253,2.601965,2.594572,2.597207,2.617274,2.594194,2.615123,2.580627,2.586665,2.608252,2.607596,2.611924,2.629581,2.631889,2.582738,2.583902,2.578857,2.608076,2.607757,2.627712,2.584707,2.582966,2.627996,2.589931,2.596489,2.611639,2.623537,2.617599,2.607616,2.631893,2.604098,2.583221,2.595048,2.643648,2.607356,2.590203,2.588784,2.62048,2.603611,2.598217,2.620902,2.578491,2.581339,2.607996,2.608644,2.607883,2.597636,2.597733,2.605806,2.624847,2.593233,2.605062,2.586619,2.593575,2.627722,2.614696,2.622908,2.621753,2.596821


### Evaluation

In [None]:
#padroniza os dados do dataset de teste
for variavel_climatica in ['humidity', 'pressure', 'temperature', 'wind']:
    scale = StandardScaler().fit(test_dataset[variavel_climatica].to_numpy().reshape(-1, 1))  #cria a escala para cada dado do dataset
    test_dataset[variavel_climatica] = scale.transform(test_dataset[variavel_climatica].to_numpy().reshape(-1, 1))  #transforma os dados do dataset

test_dataset

In [None]:
test_dataset[test_dataset['weather'] == 'sky_is_clear']

In [None]:
#testa uma amostra xi
xi = [0.373659,	-2.581229,	-0.317683,	-0.104134] # objeto teste
prediction = {key: round(probability*100, 3) for key , probability in enumerate(neural_network.predict(np.array(xi).reshape(1,-1))[0])}

for key, value in key_mapping.items():
    prediction[key] = prediction.pop(value)

prediction #retorna a probabilidade de ocorrencia para cada classe 


In [None]:
#calcula a matriz de confusão 
count = 0
for i in test_dataset.index: #testa cada amostra contida no dataset de teste
    xi = [test_dataset.loc[i, 'humidity'], test_dataset.loc[i, 'pressure'], test_dataset.loc[i, 'temperature'], test_dataset.loc[i, 'wind']]
    current_prediction = {key: round(probability*100, 3) for key , probability in enumerate(neural_network.predict(np.array(xi).reshape(1,-1), verbose = 0)[0])}

    #verifica a maior probabilidade calculada pelo modelo e a sua classe
    network_choice = [0,0,0,0,]
    classe1 = 0
    classe2 = 0
    classe3 = 0
    classe4 = 0


    #contagem de acertos do modelo (dada a complexidade do dataset, considera um acerto caso o groundtruth esteja entre as 3 maiores probabilidades)
    values_mapping = {probability : weather for weather, probability in current_prediction.items()} # salva os valores a predicao atual (para que as probabilidades sejam organizadas em ordem crescente para a comparação)

    for probability in sorted(current_prediction.values()):
        if probability > network_choice[0]:
            classe1, classe2, classe3, classe4 = values_mapping.get(probability), classe1, classe2, classe3
            network_choice[0], network_choice[1], network_choice[2], network_choice[3] = probability, network_choice[0], network_choice[1],  network_choice[2]

    # #contagem de acertos do modelo (considera um acerto caso o groundtruth esteja entre as 3 maiores probabilidades)
    # for key in current_prediction.keys():
    #     if current_prediction.get(key) > network_choice[0]:
    #         classe1, classe2, classe3, classe4 = key, classe1, classe2, classe3
    #         network_choice[0], network_choice[1], network_choice[2], network_choice[3] = current_prediction.get(key), network_choice[0], network_choice[1],  network_choice[2]
    
    
    #print(reverse_key_mapping.get(classe), test_dataset.loc[i, 'weather'])
    if reverse_key_mapping.get(classe1) == test_dataset.loc[i, 'weather'] or reverse_key_mapping.get(classe2) == test_dataset.loc[i, 'weather'] or reverse_key_mapping.get(classe3) == test_dataset.loc[i, 'weather']:
        print('SIM \n\n\n')
        count+=1
        print(count,i)

    

In [None]:
reverse_key_mapping

In [None]:
xi

current_prediction = {key: round(probability*100, 3) for key , probability in enumerate(neural_network.predict(np.array(xi).reshape(1,-1))[0])}



print(i, network_choice)
print(current_prediction)



In [None]:
current_prediction.get(5)

In [None]:
neural_network.predict(np.array(xi).reshape(1,-1))[0]

In [None]:
neural_network.predict(np.array(xi).reshape(1,-1))