In [195]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras 
import glob as gl
import os

In [196]:
path = "src/results"

""" datetime = "/2011-03-24_15"
df = pd.read_csv(path + datetime + '.csv')  """


dfs = []

for file in gl.glob(path + '/20*.csv'):
    dfs.append(pd.read_csv(file))

df = pd.concat(dfs, ignore_index=True)
df.count() #cada csv tiene 160920 filas

overflow                            482760
cap                                 482760
load                                482760
dist                                482760
origen_id                           482760
dest_id                             482760
len_origen_tag                      482760
len_dest_tag                        482760
modelo                              482760
criterion                           482760
degree                              482760
total_balance                       482760
abs_flux                            482760
timestamp                           482760
datetime                            482760
h                                   482760
Beam Irradiance (W/m2)              482760
Diffuse Irradiance (W/m2)           482760
Ambient Temperature (C)             482760
Plane of Array Irradiance (W/m2)    482760
Cell Temperature (C)                482760
DC Array Output (W)                 482760
Pavg                                482760
dif        

## Encoder

In [197]:
modelo = df.iloc[:, 8].values 
from sklearn.preprocessing import LabelEncoder
modelo = LabelEncoder().fit_transform(modelo) #codificación del modelo

In [198]:
df = df.drop(df.columns[8], axis=1) #se elimina la antigua con los strings del modelo
df['modelo'] = modelo #se añade la nueva codificada al final

X = df.iloc[:, 1:] 
X = X.drop(['datetime', 'timestamp', 'load', 'DC Array Output (W)' , 'Pavg', 'dif'], axis=1)
y = df.iloc[:, 0].values #valores de overflow

X.columns

Index(['cap', 'dist', 'origen_id', 'dest_id', 'len_origen_tag', 'len_dest_tag',
       'criterion', 'degree', 'total_balance', 'abs_flux', 'h',
       'Beam Irradiance (W/m2)', 'Diffuse Irradiance (W/m2)',
       'Ambient Temperature (C)', 'Plane of Array Irradiance (W/m2)',
       'Cell Temperature (C)', 'modelo'],
      dtype='object')

## ANN Model

In [199]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [200]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler() #escalado de datos
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [201]:
model = keras.Sequential([
    
    #Adding the input layer and the first hidden layer
    #se prueba con 6, los valores se obtienen probando o con alguna técnica como k Fold Cross Validation
    keras.layers.Dense(6, input_shape=(X.shape[1],), activation='relu'), #nº de entradas
    
    #Adding the second hidden layer
    #mismo valor que arriba
    keras.layers.Dense(6, activation='relu'),
    
    #Adding the output layer -> 1 salida: 0 o 1
    #*si no es salida binaria, se indica el número de posibles salidas y activation='softmax'
    keras.layers.Dense(1, activation='sigmoid')
])

model.summary() #resumen de la estructura de la red neuronal
#los parámetros son los pesos

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_21 (Dense)            (None, 6)                 108       
                                                                 
 dense_22 (Dense)            (None, 6)                 42        
                                                                 
 dense_23 (Dense)            (None, 1)                 7         
                                                                 
Total params: 157 (628.00 Byte)
Trainable params: 157 (628.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [202]:
#binary_crossentropy -> clasificación binaria (si/no)
#*si no es salida binaria, se aplica categorical_crossentropy o sparse_categorical_crossentropy
model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

In [203]:
model.fit(X_train, y_train, batch_size = 50, epochs = 8)

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<keras.src.callbacks.History at 0x20e23c49eb0>

In [204]:
#se obtienen las pérdidas y la precisión del modelo
test_loss, test_accuracy = model.evaluate(X_test, y_test)
#si la precisión obtenida aquí es mucho menor que la de las épocas, se está sobreentrenando el modelo



In [205]:
y_pred = model.predict(X_test)
y_pred = (y_pred > 0.5)

#comparación entre las predicciones y el test
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[0 0]
 [0 1]
 [0 0]
 ...
 [0 0]
 [0 0]
 [0 0]]


## Evaluation

In [206]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[94467   218]
 [ 1518   349]]


0.9820200513712818

In [207]:
from sklearn.model_selection import cross_val_score

accuracies = cross_val_score(estimator = model, X = X_train, y = y_train, cv = 10) #se añade el clasificador o modelo, cv = K
print("Accuracy: {:.2f} %".format(accuracies.mean()*100))
print("Standard Deviation: {:.2f} %".format(accuracies.std()*100))

TypeError: If no scoring is specified, the estimator passed should have a 'score' method. The estimator <keras.src.engine.sequential.Sequential object at 0x0000020E239DBDF0> does not.

## Grid Search

In [209]:
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPClassifier

parameters = {
    'hidden_layer_sizes': [(5,5,1), (6,6,1), (7,7,1), (8,8,1)],
    'activation': ['logistic', 'tanh','relu'],
    'solver': ['lbfgs', 'sgd','adam'],
    #'batch_size': [25, 50, 75, 100],
    #'epochs': [10, 20, 30],
    'learning_rate': ['constant','adaptive'],
}

grid_search = GridSearchCV(MLPClassifier(max_iter=100), parameters, n_jobs=-1, cv=10, scoring = 'accuracy')
grid_search.fit(X_train, y_train)

best_accuracy = grid_search.best_score_
best_parameters = grid_search.best_params_
print("Best Accuracy: {:.2f} %".format(best_accuracy*100))
print("Best Parameters:", best_parameters)

Best Accuracy: 98.20 %
Best Parameters: {'activation': 'tanh', 'hidden_layer_sizes': (7, 7, 1), 'learning_rate': 'constant', 'solver': 'adam'}
