In [20]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras 
import glob as gl
import os
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.neural_network import MLPClassifier
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

In [21]:
path = "src/results"

""" datetime = "/2011-03-24_15"
df = pd.read_csv(path + datetime + '.csv')  """


dfs = []

for file in gl.glob(path + '/20*.csv'):
    dfs.append(pd.read_csv(file))

df = pd.concat(dfs, ignore_index=True)
#df.count() #cada csv tiene 160920 filas

## Label Encoder

In [22]:
modelo = df.iloc[:, 8].values 
modelo = LabelEncoder().fit_transform(modelo) #codificación del modelo

In [23]:
df = df.drop(df.columns[8], axis=1) #se elimina la antigua con los strings del modelo
df['modelo'] = modelo #se añade la nueva codificada al final

X = df.iloc[:, 1:] 
X = X.drop(['datetime', 'timestamp', 'load', 'DC Array Output (W)' , 'Pavg', 'dif'], axis=1)
y = df.iloc[:, 0].values #valores de overflow

X.columns

Index(['cap', 'dist', 'origen_id', 'dest_id', 'len_origen_tag', 'len_dest_tag',
       'criterion', 'degree', 'total_balance', 'abs_flux', 'h',
       'Beam Irradiance (W/m2)', 'Diffuse Irradiance (W/m2)',
       'Ambient Temperature (C)', 'Plane of Array Irradiance (W/m2)',
       'Cell Temperature (C)', 'modelo'],
      dtype='object')

## Split

In [24]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

sc = StandardScaler() #escalado de datos
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

## ANN

In [25]:
"""model = keras.Sequential([
    
    #Adding the input layer and the first hidden layer
    #se prueba con 6, los valores se obtienen probando o con alguna técnica como k Fold Cross Validation
    keras.layers.Dense(6, input_shape=(X.shape[1],), activation='relu'), #nº de entradas
    
    #Adding the second hidden layer
    #mismo valor que arriba
    keras.layers.Dense(6, activation='relu'),
    
    #Adding the output layer -> 1 salida: 0 o 1
    #*si no es salida binaria, se indica el número de posibles salidas y activation='softmax'
    keras.layers.Dense(1, activation='sigmoid')
]) """

model = keras.Sequential([
    
    #Adding the input layer and the first hidden layer
    #se prueba con 6, los valores se obtienen probando o con alguna técnica como k Fold Cross Validation
    keras.layers.Dense(7, input_shape=(X.shape[1],), activation='relu'), #nº de entradas
    
    #Adding the second hidden layer
    #mismo valor que arriba
    keras.layers.Dense(7, activation='tanh'),
    
    #Adding the output layer -> 1 salida: 0 o 1
    #*si no es salida binaria, se indica el número de posibles salidas y activation='softmax'
    keras.layers.Dense(1, activation='sigmoid')
]) 

model.summary() #resumen de la estructura de la red neuronal
#los parámetros son los pesos

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_6 (Dense)             (None, 7)                 126       
                                                                 
 dense_7 (Dense)             (None, 7)                 56        
                                                                 
 dense_8 (Dense)             (None, 1)                 8         
                                                                 
Total params: 190 (760.00 Byte)
Trainable params: 190 (760.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [26]:
#binary_crossentropy -> clasificación binaria (si/no)
#*si no es salida binaria, se aplica categorical_crossentropy o sparse_categorical_crossentropy
model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
model.fit(X_train, y_train, batch_size = 50, epochs = 8)

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<keras.src.callbacks.History at 0x21088e66c70>

In [27]:
#se obtienen las pérdidas y la precisión del modelo
test_loss, test_accuracy = model.evaluate(X_test, y_test)
#si la precisión obtenida aquí es mucho menor que la de las épocas, se está sobreentrenando el modelo

#loss:  -> ANN1
#loss: 0.0689 - accuracy: 0.9786 -> ANN2



## Evaluación

### Matriz de confusión

In [28]:
y_pred = model.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

#[[376455    689] [  7515   1549]] 0.9787575606926837 -> ANN1



ValueError: Classification metrics can't handle a mix of binary and continuous targets

### K-Fold

In [None]:
""" ESTO NO ESTA HECHO
accuracies = cross_val_score(estimator = model, X = X_train, y = y_train, cv = 10) #se añade el clasificador o modelo, cv = K
print("Accuracy: {:.2f} %".format(accuracies.mean()*100))
print("Standard Deviation: {:.2f} %".format(accuracies.std()*100)) """

' from sklearn.model_selection import cross_val_score\n\naccuracies = cross_val_score(estimator = model, X = X_train, y = y_train, cv = 10) #se añade el clasificador o modelo, cv = K\nprint("Accuracy: {:.2f} %".format(accuracies.mean()*100))\nprint("Standard Deviation: {:.2f} %".format(accuracies.std()*100)) '

## Grid Search

In [None]:
""" 
parameters = {
    'hidden_layer_sizes': [(4,4,1), (5,5,1), (6,6,1), (7,7,1), (8,8,1)],
    'activation': ['sigmoid', 'tanh','relu'],
    'solver': ['adam'],
}

grid_search = GridSearchCV(MLPClassifier(max_iter=100), parameters, n_jobs=-1, cv=10, scoring = 'accuracy')
grid_search.fit(X_train, y_train)

best_accuracy = grid_search.best_score_
best_parameters = grid_search.best_params_
print("Best Accuracy: {:.2f} %".format(best_accuracy*100))
print("Best Parameters:", best_parameters)


########
Best Accuracy: 98.20 %
Best Parameters: {'activation': 'tanh', 'hidden_layer_sizes': (7, 7, 1), 'learning_rate': 'constant', 'solver': 'adam'} """

' from sklearn.model_selection import GridSearchCV\nfrom sklearn.neural_network import MLPClassifier\n\nparameters = {\n    \'hidden_layer_sizes\': [(5,5,1), (6,6,1), (7,7,1), (8,8,1)],\n    \'activation\': [\'logistic\', \'tanh\',\'relu\'],\n    \'solver\': [\'lbfgs\', \'sgd\',\'adam\'],\n    #\'batch_size\': [25, 50, 75, 100],\n    #\'epochs\': [10, 20, 30],\n    \'learning_rate\': [\'constant\',\'adaptive\'],\n}\n\ngrid_search = GridSearchCV(MLPClassifier(max_iter=100), parameters, n_jobs=-1, cv=10, scoring = \'accuracy\')\ngrid_search.fit(X_train, y_train)\n\nbest_accuracy = grid_search.best_score_\nbest_parameters = grid_search.best_params_\nprint("Best Accuracy: {:.2f} %".format(best_accuracy*100))\nprint("Best Parameters:", best_parameters)\n\n\n########\nBest Accuracy: 98.20 %\nBest Parameters: {\'activation\': \'tanh\', \'hidden_layer_sizes\': (7, 7, 1), \'learning_rate\': \'constant\', \'solver\': \'adam\'} '