# Código de set de datos AP5:
- Período de muestreo: T = 0.07043
- Agregando las variables categóricas: 
    - Pendiente en: SUBIDA, BAJADA, LINEA RECTA, CD0, CD1, CD2.
    - Velocidad: RAPIDA, MEDIA, LENTA.

## Creación del Dataset:

In [None]:
import TabSynth as TSS
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
muestras = 142
T = 0.07043#***************
resultados = []
PRBS = TSS.PRBS(muestras)
# ref_sec = np.ones(muestras)
prbs = PRBS.Ref_PRBS()
ref_sec = prbs
combina = TSS.Combinaciones()
c = combina.combinaciones(True)
np.random.seed(42)
for _ in range(len(c)): #m
    K = c[_][0]
    tau_p = c[_][1]
    Kp_m = c[_][2]
    Ti_m = c[_][3]
    metodoDirecto = TSS.AsignacionPolos(K, tau_p, T)
    param = metodoDirecto.parametros_controladorAP()
    Kp = param['Kp']
    Ti = param['Ti']
    T_asen = param['Tiempo_asentamiento']
    ess = param['Valor_estado_estacionario']
    sobrepaso = param['Sobrepaso']
    controlador = TSS.PIControlador(Kp,Ti,T)
    planta = TSS.PlantaPrimerOrden(K,tau_p,T)
    señal_control = []
    output_planta = []
    error = []
    #---------------
    # metrica = TSS.Metricas(K, tau_p, Kp, Ti, T, 'PeakTime')
    # met = metrica.metrica()
    categoria = TSS.Categoria(K, tau_p, K_nominal=3.0, tau_nominal=1.5)
    cat_1 = categoria.categoria_pendiente()
    cat_2 = categoria.categoria_(T_asen)
    #---------------
    for ref in ref_sec:
        if len(output_planta) == 0:
            v_med = 0
        else:
            v_med = output_planta[-1]
        u, e = controlador.actualizar_PI(ref,v_med)
        v_nuevo = planta.actualizar_Planta(u)
        señal_control.append(u)
        output_planta.append(v_nuevo)
        error.append(e)

    resultados.append((cat_1, cat_2, sobrepaso, T_asen, ess, Kp, Ti, Kp_m, Ti_m, tau_p, K, *señal_control, *output_planta, *error))

dataset2 = pd.DataFrame(resultados, columns=["Pendiente en", "Velocidad", "Overshoot", "Tiempo_asentamiento", "Valor_estacionario", "Kp", "Ti", "Kp_m", "Ti_m", "tau_p", "K"] + [f'Col_{i+1}' for i in range(426)])


In [None]:
dataset2.head()

Unnamed: 0,Pendiente en,Velocidad,Overshoot,Tiempo_asentamiento,Valor_estacionario,Kp,Ti,Kp_m,Ti_m,tau_p,...,Col_417,Col_418,Col_419,Col_420,Col_421,Col_422,Col_423,Col_424,Col_425,Col_426
0,BAJADA,RAPIDA,1.735971,3.02849,1.0,0.15,0.639765,1.610204,1.332653,1.0,...,-0.425864,-0.405597,-0.385684,-0.366122,0.652854,0.62826,-0.396476,-0.378283,-0.360246,-0.34243
1,BAJADA,RAPIDA,1.735971,3.02849,1.0,0.15,0.639765,2.516327,0.881633,1.0,...,-0.425864,-0.405597,-0.385684,-0.366122,0.652854,0.62826,-0.396476,-0.378283,-0.360246,-0.34243
2,LINEA RECTA,MEDIA,3.753585,4.9301,1.0,0.466667,0.995191,2.440816,0.708163,1.5,...,-0.39691,-0.368582,-0.341579,-0.316012,0.708217,0.664708,-0.377082,-0.350628,-0.325366,-0.301332
3,CD2,LENTA,9.236373,5.07096,1.0,0.633333,1.350616,0.855102,0.864286,3.0,...,-0.365554,-0.329317,-0.295607,-0.264222,0.764897,0.701452,-0.358303,-0.324062,-0.292072,-0.262191
4,CD2,LENTA,9.236373,5.07096,1.0,0.95,1.350616,3.422449,0.916327,3.0,...,-0.366997,-0.330722,-0.296891,-0.265393,0.763866,0.700591,-0.359012,-0.324551,-0.292313,-0.262305


### División del conjunto de datos:

In [None]:
from ai4water.utils.utils import TrainTestSplit

splitter = TrainTestSplit(seed=42,
                          test_fraction=0.3)
TrainX, TestX,_,_ = splitter.split_by_random(dataset2)
train_set, val_set, _, _ = splitter.split_by_random(TrainX)

print(f'Tamaño del dataset2 original: {dataset2.shape}\n')
print('Valores de los set antes de la partición del set de entrenamiento:')
print(f'\tSet de entrenamiento: {TrainX.shape}')
print(f'\tTipo: {type(TrainX)}')
print(f'\tSet de prueba: {TestX.shape}')
print(f'\tTipo: {type(TestX)}\n')
print('Valores de los set después de dividir el set de entrenamiento (validation)')
#Todos estos set de datos tienen incluidas las etiquetas.
print(f'\tSet de entrenamiento: {train_set.shape}')
print(f'\tTipo: {type(train_set)}')
# print(f'\tSet de prueba: {TestX.shape}')
print(f'\tSet de validación: {val_set.shape}')
print(f'\tTipo: {type(val_set)}')

  "Since version 1.0, "
  from .autonotebook import tqdm as notebook_tqdm


Tamaño del dataset2 original: (75000, 437)

Valores de los set antes de la partición del set de entrenamiento:
	Set de entrenamiento: (52500, 437)
	Tipo: <class 'pandas.core.frame.DataFrame'>
	Set de prueba: (22500, 437)
	Tipo: <class 'pandas.core.frame.DataFrame'>

Valores de los set después de dividir el set de entrenamiento (validation)
	Set de entrenamiento: (36750, 437)
	Tipo: <class 'pandas.core.frame.DataFrame'>
	Set de validación: (15750, 437)
	Tipo: <class 'pandas.core.frame.DataFrame'>


### Guardar el conjunto de datos:

In [None]:
## Para guardar conjunto de datos:
# Guardar dataset completo:
dataset2.to_csv('H:/Mi unidad/00_Magister/01_Tesis/Control_PID_Code_2/data/dataAP5/datasetAP5.csv', index=False)
# Guardar el conjunto de entrenamiento
TrainX.to_csv('H:/Mi unidad/00_Magister/01_Tesis/Control_PID_Code_2/data/dataAP5/train_set_all_AP5.csv', index=False)
# Guardar el conjunto de prueba
TestX.to_csv('H:/Mi unidad/00_Magister/01_Tesis/Control_PID_Code_2/data/dataAP5/test_set_all_AP5.csv', index=False)

# Guardar el set de entrenamiento
train_set.to_csv('H:/Mi unidad/00_Magister/01_Tesis/Control_PID_Code_2/data/dataAP5/train_set_AP5.csv', index=False)
# Guardar el conjunto de validación
val_set.to_csv('H:/Mi unidad/00_Magister/01_Tesis/Control_PID_Code_2/data/dataAP5/val_set_AP5.csv', index=False)

# Datos para MLP. OneHotEncoding

## Cargar el conjunto de datos:

In [None]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

In [None]:
# Cargar el set de datos total:
dataset = pd.read_csv('H:/Mi unidad/00_Magister/01_Tesis/Control_PID_Code_2/data/dataAP5/datasetAP5.csv')

In [None]:
dataset[dataset['Pendiente en'].isin(['CD3'])]

Unnamed: 0,Pendiente en,Velocidad,Overshoot,Tiempo_asentamiento,Valor_estacionario,Kp,Ti,Kp_m,Ti_m,tau_p,...,Col_417,Col_418,Col_419,Col_420,Col_421,Col_422,Col_423,Col_424,Col_425,Col_426


In [None]:
dataset.head(10)

Unnamed: 0,Pendiente en,Velocidad,Overshoot,Tiempo_asentamiento,Valor_estacionario,Kp,Ti,Kp_m,Ti_m,tau_p,...,Col_417,Col_418,Col_419,Col_420,Col_421,Col_422,Col_423,Col_424,Col_425,Col_426
0,SUBIDA,LENTA,6.005233,5.00053,1.0,2.2,1.172903,3.271429,1.297959,2.0,...,-0.612403,-0.564433,0.480947,0.445399,0.411687,-0.620285,-0.572082,0.473538,0.438202,0.404717
1,SUBIDA,LENTA,7.831368,5.07096,1.0,3.0,1.279531,1.685714,0.760204,2.5,...,-0.611063,-0.558706,0.490529,0.451098,0.413925,-0.621107,-0.568424,0.481131,0.442064,0.405216
2,CD2,LENTA,9.236373,5.07096,1.0,0.633333,1.350616,0.930612,1.176531,3.0,...,-0.612104,-0.556708,0.49523,0.453288,0.413872,-0.623139,-0.567428,0.484784,0.44317,0.404046
3,CD0,RAPIDA,1.735971,3.02849,1.0,0.6,0.639765,1.232653,1.12449,1.0,...,-0.613163,-0.586746,0.439479,0.422375,0.40518,-0.611986,-0.586006,0.439835,0.422391,0.404892
4,CD2,LENTA,6.005233,5.00053,1.0,0.55,1.172903,3.120408,1.12449,2.0,...,-0.612048,-0.564152,0.481127,0.445455,0.411667,-0.620406,-0.572328,0.473147,0.437647,0.404013
5,BAJADA,MEDIA,3.753585,4.9301,1.0,0.233333,0.995191,1.006122,1.037755,1.5,...,-0.611873,-0.570977,0.468201,0.43903,0.410894,-0.616152,-0.575406,0.463471,0.434033,0.405874
6,BAJADA,RAPIDA,1.735971,3.02849,1.0,0.12,0.639765,2.667347,1.107143,1.0,...,-0.613203,-0.586781,0.439448,0.422416,0.405352,-0.611833,-0.585734,0.440077,0.422673,0.405279
7,SUBIDA,MEDIA,3.753585,4.9301,1.0,1.4,0.995191,2.591837,1.089796,1.5,...,-0.612152,-0.571424,0.467529,0.43825,0.410156,-0.616778,-0.576069,0.462867,0.433572,0.405465
8,CD2,LENTA,9.236373,5.07096,1.0,0.76,1.350616,3.64898,1.193878,3.0,...,-0.610748,-0.555351,0.496495,0.454582,0.415104,-0.621898,-0.56615,0.486095,0.444535,0.405457
9,CD2,LENTA,9.236373,5.07096,1.0,0.76,1.350616,3.120408,0.656122,3.0,...,-0.610748,-0.555351,0.496495,0.454582,0.415104,-0.621898,-0.56615,0.486095,0.444535,0.405457


In [None]:
catego_onehot = ['Pendiente en', 'Velocidad']
# Creamos el OneHotEncoder
encoder = OneHotEncoder(sparse=False)
# Codificamos las columnas categóricas
encoded_arrays = encoder.fit_transform(dataset[catego_onehot])
# Extraemos solo los valores únicos sin incluir el prefijo de la columna:
column_name = encoder.categories_
flat_names = [name for sublist in column_name for name in sublist]
# Creamos un DataFrame para las columnas codificadas
encoded_dataset = pd.DataFrame(encoded_arrays, columns=flat_names)
# Concatenamos las columnas codificadas con el DataFrame original
df = pd.concat([dataset.reset_index(drop=True), encoded_dataset], axis=1)
# Eliminamos las columnas originales
df.drop(columns=catego_onehot, inplace=True)

In [None]:
df.head()

Unnamed: 0,Overshoot,Tiempo_asentamiento,Valor_estacionario,Kp,Ti,Kp_m,Ti_m,tau_p,K,Col_1,...,Col_425,Col_426,BAJADA,CD0,CD2,LINEA RECTA,SUBIDA,LENTA,MEDIA,RAPIDA
0,6.005233,5.00053,1.0,2.2,1.172903,3.271429,1.297959,2.0,1,2.266052,...,0.438202,0.404717,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0
1,7.831368,5.07096,1.0,3.0,1.279531,1.685714,0.760204,2.5,1,3.082565,...,0.442064,0.405216,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0
2,9.236373,5.07096,1.0,0.633333,1.350616,0.930612,1.176531,3.0,6,0.649846,...,0.44317,0.404046,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
3,1.735971,3.02849,1.0,0.6,0.639765,1.232653,1.12449,1.0,1,0.633026,...,0.422391,0.404892,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
4,6.005233,5.00053,1.0,0.55,1.172903,3.120408,1.12449,2.0,4,0.566513,...,0.437647,0.404013,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0


In [None]:
def reorder_columns(df, columns_order):
    remaining_columns = [col for col in df.columns if col not in columns_order]
    return df[columns_order + remaining_columns]

new_order = ['Kp','Ti','BAJADA', 'LINEA RECTA', 'SUBIDA', 'LENTA', 'MEDIA', 'RAPIDA','CD0', 'CD2','Overshoot','Tiempo_asentamiento','Valor_estacionario','Kp_m','Ti_m','tau_p','K' ]
dataset = reorder_columns(df, new_order)

In [None]:
dataset.head()

Unnamed: 0,Kp,Ti,BAJADA,LINEA RECTA,SUBIDA,LENTA,MEDIA,RAPIDA,CD0,CD2,...,Col_417,Col_418,Col_419,Col_420,Col_421,Col_422,Col_423,Col_424,Col_425,Col_426
0,2.2,1.172903,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,...,-0.612403,-0.564433,0.480947,0.445399,0.411687,-0.620285,-0.572082,0.473538,0.438202,0.404717
1,3.0,1.279531,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,...,-0.611063,-0.558706,0.490529,0.451098,0.413925,-0.621107,-0.568424,0.481131,0.442064,0.405216
2,0.633333,1.350616,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,...,-0.612104,-0.556708,0.49523,0.453288,0.413872,-0.623139,-0.567428,0.484784,0.44317,0.404046
3,0.6,0.639765,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,...,-0.613163,-0.586746,0.439479,0.422375,0.40518,-0.611986,-0.586006,0.439835,0.422391,0.404892
4,0.55,1.172903,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,...,-0.612048,-0.564152,0.481127,0.445455,0.411667,-0.620406,-0.572328,0.473147,0.437647,0.404013


### Eliminamos la columna de valor estacionario, Overshoot y tiempo de asentamiento:

In [None]:
dataset = dataset.drop(columns=['Overshoot','Tiempo_asentamiento','Valor_estacionario'], axis=1)

### Guardamos dataset con el One Hot Encoding:

In [None]:
dataset.head(2)

Unnamed: 0,Kp,Ti,BAJADA,LINEA RECTA,SUBIDA,LENTA,MEDIA,RAPIDA,CD0,CD2,...,Col_417,Col_418,Col_419,Col_420,Col_421,Col_422,Col_423,Col_424,Col_425,Col_426
0,2.2,1.172903,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,...,-0.612403,-0.564433,0.480947,0.445399,0.411687,-0.620285,-0.572082,0.473538,0.438202,0.404717
1,3.0,1.279531,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,...,-0.611063,-0.558706,0.490529,0.451098,0.413925,-0.621107,-0.568424,0.481131,0.442064,0.405216


In [None]:
dataset.to_csv('H:/Mi unidad/00_Magister/01_Tesis/Control_PID_Code_2/data/OneHot/datasetAP5_OneHot.csv', index=False)