In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.autograd import Variable

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
archivon = '/content/drive/My Drive/Social./Trabajo./InDataAnalitycs/sstn34.csv'
dfn = pd.read_csv(archivon)

In [None]:
archivoa = '/content/drive/My Drive/Social./Trabajo./InDataAnalitycs/ssta34.csv'
dfa = pd.read_csv(archivoa)

# **Estandar temperature**

In [None]:
dfn

Unnamed: 0,YEAR,Month,NINO3.4
0,1982,2,26.54
1,1982,3,27.09
2,1982,4,27.83
3,1982,5,28.37
4,1982,6,28.35
...,...,...,...
495,2023,5,28.35
496,2023,6,28.59
497,2023,7,28.37
498,2023,8,28.20


In [None]:
# Preprocesar los datos
# Agregar una columna "Date" que combine Year y Month para tener una fecha
dfn['Date'] = pd.to_datetime(dfn[['YEAR', 'Month']].assign(day=1))
# Seleccionar la columna "Date" como índice
dfn.set_index('Date', inplace=True)
# Normalizar los valores de las temperaturas en la zona  NIÑO3.4
NINO34_mean = dfn['NINO3.4'].mean()
NINO34_std = dfn['NINO3.4'].std()
dfn['NINO3.4'] = (dfn['NINO3.4'] - NINO34_mean) / NINO34_std


In [None]:
# Crear secuencias de datos de entrenamiento
sequence_length = 12  # Usar un año de datos para predecir el siguiente
sequences = []
for i in range(len(dfn) - sequence_length):
    sequence = dfn['NINO3.4'].iloc[i:i + sequence_length].values
    target = dfn['NINO3.4'].iloc[i + sequence_length]
    sequences.append((sequence, target))

In [None]:
# Dividir los datos en conjuntos de entrenamiento y prueba
train_size = int(0.8 * len(sequences))
train_data = sequences[:train_size]
test_data = sequences[train_size:]

In [None]:
# Definir una clase de modelo RNN simple
class NINO34Model(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(NINO34Model, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        h0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))
        c0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out


In [None]:
# Crear el modelo
input_size = 1  # Solo se predice el valor la temperatura en la zona NINO34
hidden_size = 64
num_layers = 2
model = NINO34Model(input_size, hidden_size, num_layers)


In [None]:
# Función de pérdida y optimizador
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
# Entrenar el modelo
num_epochs = 100
for epoch in range(num_epochs):
    for sequence, target in train_data:
        sequence = np.reshape(sequence, (1, sequence_length, 1))
        sequence = Variable(torch.Tensor(sequence))
        target = Variable(torch.Tensor(np.array([target])))

        # Forward pass
        outputs = model(sequence)
        loss = criterion(outputs, target)

        # Backward pass y optimización
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')


Using a target size (torch.Size([1])) that is different to the input size (torch.Size([1, 1])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.



Epoch [10/100], Loss: 0.2030
Epoch [20/100], Loss: 0.0822
Epoch [30/100], Loss: 0.0748
Epoch [40/100], Loss: 0.0011
Epoch [50/100], Loss: 0.0016
Epoch [60/100], Loss: 0.0070
Epoch [70/100], Loss: 0.0028
Epoch [80/100], Loss: 0.0014
Epoch [90/100], Loss: 0.0049
Epoch [100/100], Loss: 0.0079


In [None]:
# Evaluar el modelo en el conjunto de prueba
model.eval()
test_losses = []
with torch.no_grad():
    for sequence, target in test_data:
        sequence = np.reshape(sequence, (1, sequence_length, 1))
        sequence = Variable(torch.Tensor(sequence))
        target = Variable(torch.Tensor(np.array([target])))

        outputs = model(sequence)
        loss = criterion(outputs, target)
        test_losses.append(loss.item())

average_test_loss = np.mean(test_losses)
print(f'Loss mean in the test set: {average_test_loss:.4f}')


Loss mean in the test set: 0.1707


In [None]:
# Predecir los valores de la temperatura en la zona NINO34 para los próximos 8 años
future_predictions = []
last_sequence = test_data[-1][0]
for i in range(8 * 12):
    sequence = np.reshape(last_sequence, (1, sequence_length, 1))
    sequence = Variable(torch.Tensor(sequence))
    prediction = model(sequence)
    future_predictions.append(prediction.item())
    last_sequence = np.append(last_sequence[1:], prediction.item())

In [None]:
# Desnormalizar las predicciones
future_predictions = np.array(future_predictions) * NINO34_std + NINO34_mean


In [None]:
# Imprimir las predicciones para los próximos 8 años
for i, prediction in enumerate(future_predictions):
    print(f'YEAR {i // 12 + 1}, Mes {i % 12 + 1}: Prediction NINO34 = {prediction:.4f}')

YEAR 1, Mes 1: Prediction NINO34 = 28.3038
YEAR 1, Mes 2: Prediction NINO34 = 28.6286
YEAR 1, Mes 3: Prediction NINO34 = 28.6965
YEAR 1, Mes 4: Prediction NINO34 = 28.4884
YEAR 1, Mes 5: Prediction NINO34 = 28.2614
YEAR 1, Mes 6: Prediction NINO34 = 28.3656
YEAR 1, Mes 7: Prediction NINO34 = 28.4929
YEAR 1, Mes 8: Prediction NINO34 = 28.3820
YEAR 1, Mes 9: Prediction NINO34 = 27.1393
YEAR 1, Mes 10: Prediction NINO34 = 26.3750
YEAR 1, Mes 11: Prediction NINO34 = 25.8043
YEAR 1, Mes 12: Prediction NINO34 = 25.6035
YEAR 2, Mes 1: Prediction NINO34 = 25.2400
YEAR 2, Mes 2: Prediction NINO34 = 25.2087
YEAR 2, Mes 3: Prediction NINO34 = 24.9250
YEAR 2, Mes 4: Prediction NINO34 = 24.8867
YEAR 2, Mes 5: Prediction NINO34 = 25.5539
YEAR 2, Mes 6: Prediction NINO34 = 26.3660
YEAR 2, Mes 7: Prediction NINO34 = 27.0738
YEAR 2, Mes 8: Prediction NINO34 = 27.3239
YEAR 2, Mes 9: Prediction NINO34 = 27.2903
YEAR 2, Mes 10: Prediction NINO34 = 27.0909
YEAR 2, Mes 11: Prediction NINO34 = 26.5571
YEAR 2

In [None]:
from sklearn.metrics import mean_squared_error

# ...

# Evaluar el modelo en el conjunto de prueba
model.eval()
test_losses = []
true_values = []  # Almacena los valores reales
predicted_values = []  # Almacena las predicciones

with torch.no_grad():
    for sequence, target in test_data:
        sequence = np.reshape(sequence, (1, sequence_length, 1))
        sequence = Variable(torch.Tensor(sequence))
        target = Variable(torch.Tensor(np.array([target])))

        outputs = model(sequence)
        loss = criterion(outputs, target)
        test_losses.append(loss.item())

        # Almacenar valores reales y predicciones
        true_values.append(target.item())
        predicted_values.append(outputs.item())

average_test_loss = np.mean(test_losses)
print(f'Loss mean in the test set (MSE): {average_test_loss:.4f}')

# Calcular el MSE
mse = mean_squared_error(true_values, predicted_values)
print(f'Mean cuadratic error (MSE) in the test set: {mse:.4f}')


Loss mean in the test set (MSE): 0.1707
Mean cuadratic error (MSE) in the test set: 0.1707


In [None]:
threshold = 0.5  # Umbral de tolerancia
correct_predictions = 0

for i in range(len(predicted_values)):
    if abs(predicted_values[i] - test_data[i][1]) <= threshold:
        correct_predictions += 1

accuracy = correct_predictions / len(predicted_values)
print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 83.67%


In [None]:
import matplotlib.pyplot as plt
import plotly.express as px
import numpy as np
import plotly.graph_objects as go  # Importar el módulo go

# Crear un rango de fechas para los próximos 8 años (96 meses)
start_date = dfa.index[-1] + pd.DateOffset(months=1)
end_date = start_date + pd.DateOffset(months=95)
date_range = pd.date_range(start_date, end_date, freq='MS')

# Verificar que date_range y future_predictions tengan la misma longitud
if len(date_range) == len(future_predictions):
    # Crear un DataFrame para Plotly
    df = pd.DataFrame({'Date': date_range, 'Predictions': future_predictions})

    # Agregar una columna de colores en base a las condiciones
    # Crear una lista de colores para el relleno
    fill_colors = ['green' if x > 0.5 else 'purple' if x < -0.5 else 'rgba(255, 255, 255, 0)' for x in future_predictions]

    # Crear la figura interactiva con Plotly Express
    fig = px.line(df, x='Date', y='Predictions', line_shape='linear', title='NINO3.4 temperature predictions')

    # Agregar las líneas de referencia
    fig.add_hline(y=28, line_dash='dot', line_color='red', annotation_text='+28')
    #fig.add_hline(y=-0.5, line_dash='dot', line_color='red', annotation_text='-0.5')

    # Rellenar áreas verdes y moradas
    #fig.add_trace(go.Scatter(x=date_range, y=[0.5] * len(date_range), fill='tozeroy', fillcolor='rgba(0, 128, 0, 0.3)', line=dict(color='rgba(255, 255, 255, 0)')))
    #fig.add_trace(go.Scatter(x=date_range, y=[-0.5] * len(date_range), fill='tozeroy', fillcolor='rgba(128, 0, 128, 0.3)', line=dict(color='rgba(255, 255, 255, 0)')))

    # Mostrar la figura interactiva
    fig.show()
    fig.write_html("sstnp.html")
else:
    print("Las dimensiones de date_range y future_predictions no coinciden")




# **Anomalys**

In [None]:
dfa

Unnamed: 0_level_0,YEAR,Month,ANOM3.4
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1982-02-01,1982,2,-0.153391
1982-03-01,1982,3,-0.081545
1982-04-01,1982,4,0.110044
1982-05-01,1982,5,0.672838
1982-06-01,1982,6,0.864428
...,...,...,...
2023-05-01,2023,5,0.648890
2023-06-01,2023,6,1.139838
2023-07-01,2023,7,1.367350
2023-08-01,2023,8,1.642760


In [None]:
# Preprocesar los datos
# Agregar una columna "Date" que combine Year y Month para tener una fecha
dfa['Date'] = pd.to_datetime(dfa[['YEAR', 'Month']].assign(day=1))
# Seleccionar la columna "Date" como índice
dfa.set_index('Date', inplace=True)
# Normalizar los valores de las temperaturas en la zona  NIÑO3.4
ANOM34_mean = dfa['ANOM3.4'].mean()
ANOM34_std = dfa['ANOM3.4'].std()
dfa['ANOM3.4'] = (dfa['ANOM3.4'] - ANOM34_mean) / ANOM34_std


In [None]:
dfa['ANOM3.4']

Date
1982-02-01   -0.153391
1982-03-01   -0.081545
1982-04-01    0.110044
1982-05-01    0.672838
1982-06-01    0.864428
                ...   
2023-05-01    0.648890
2023-06-01    1.139838
2023-07-01    1.367350
2023-08-01    1.642760
2023-09-01    1.918170
Name: ANOM3.4, Length: 500, dtype: float64

In [None]:
# Crear secuencias de datos de entrenamiento
sequence_length = 12  # Usar un año de datos para predecir el siguiente
sequences = []
for i in range(len(dfa) - sequence_length):
    sequence = dfa['ANOM3.4'].iloc[i:i + sequence_length].values
    target = dfa['ANOM3.4'].iloc[i + sequence_length]
    sequences.append((sequence, target))

In [None]:
# Dividir los datos en conjuntos de entrenamiento y prueba
train_size = int(0.8 * len(sequences))
train_data = sequences[:train_size]
test_data = sequences[train_size:]

In [None]:
# Definir una clase de modelo RNN simple
class ANOM34Model(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(ANOM34Model, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        h0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))
        c0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out


In [None]:
# Crear el modelo
input_size = 1  # Solo se predice el valor la temperatura en la zona NINO34
hidden_size = 64
num_layers = 2
model = ANOM34Model(input_size, hidden_size, num_layers)


In [None]:
# Función de pérdida y optimizador
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
# Entrenar el modelo
num_epochs = 100
for epoch in range(num_epochs):
    for sequence, target in train_data:
        sequence = np.reshape(sequence, (1, sequence_length, 1))
        sequence = Variable(torch.Tensor(sequence))
        target = Variable(torch.Tensor(np.array([target])))

        # Forward pass
        outputs = model(sequence)
        loss = criterion(outputs, target)

        # Backward pass y optimización
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [10/100], Loss: 0.0008
Epoch [20/100], Loss: 0.0023
Epoch [30/100], Loss: 0.0070
Epoch [40/100], Loss: 0.0022
Epoch [50/100], Loss: 0.0093
Epoch [60/100], Loss: 0.0003
Epoch [70/100], Loss: 0.0001
Epoch [80/100], Loss: 0.0052
Epoch [90/100], Loss: 0.0024
Epoch [100/100], Loss: 0.0114


In [None]:
# Evaluar el modelo en el conjunto de prueba
model.eval()
test_losses = []
with torch.no_grad():
    for sequence, target in test_data:
        sequence = np.reshape(sequence, (1, sequence_length, 1))
        sequence = Variable(torch.Tensor(sequence))
        target = Variable(torch.Tensor(np.array([target])))

        outputs = model(sequence)
        loss = criterion(outputs, target)
        test_losses.append(loss.item())

average_test_loss = np.mean(test_losses)
print(f'Loss mean in the test set: {average_test_loss:.4f}')


Loss mean in the test set: 0.1632


In [None]:
# Predecir los valores de las anomalias en la zona NIÑO3.4 para los próximos 8 años
future_predictions = []
last_sequence = test_data[-1][0]
for i in range(8 * 12):
    sequence = np.reshape(last_sequence, (1, sequence_length, 1))
    sequence = Variable(torch.Tensor(sequence))
    prediction = model(sequence)
    future_predictions.append(prediction.item())
    last_sequence = np.append(last_sequence[1:], prediction.item())

In [None]:
# Desnormalizar las predicciones
future_predictions = np.array(future_predictions) * ANOM34_std + ANOM34_mean


In [None]:
# Imprimir las predicciones para los próximos 8 años
for i, prediction in enumerate(future_predictions):
    print(f'YEAR {i // 12 + 1}, Month {i % 12 + 1}: Prediction Anomalys = {prediction:.4f}')

YEAR 1, Month 1: Prediction Anomalys = 1.3369
YEAR 1, Month 2: Prediction Anomalys = 1.1242
YEAR 1, Month 3: Prediction Anomalys = 1.3818
YEAR 1, Month 4: Prediction Anomalys = 1.3012
YEAR 1, Month 5: Prediction Anomalys = 1.2949
YEAR 1, Month 6: Prediction Anomalys = 0.9774
YEAR 1, Month 7: Prediction Anomalys = 0.5820
YEAR 1, Month 8: Prediction Anomalys = 0.3196
YEAR 1, Month 9: Prediction Anomalys = -0.0097
YEAR 1, Month 10: Prediction Anomalys = -0.2748
YEAR 1, Month 11: Prediction Anomalys = -0.2338
YEAR 1, Month 12: Prediction Anomalys = -0.4075
YEAR 2, Month 1: Prediction Anomalys = -0.3421
YEAR 2, Month 2: Prediction Anomalys = -0.5442
YEAR 2, Month 3: Prediction Anomalys = -0.6332
YEAR 2, Month 4: Prediction Anomalys = -0.9403
YEAR 2, Month 5: Prediction Anomalys = -1.2919
YEAR 2, Month 6: Prediction Anomalys = -1.4866
YEAR 2, Month 7: Prediction Anomalys = -1.5520
YEAR 2, Month 8: Prediction Anomalys = -1.4483
YEAR 2, Month 9: Prediction Anomalys = -1.2710
YEAR 2, Month 10: 

In [None]:
from sklearn.metrics import mean_squared_error

# ...

# Evaluar el modelo en el conjunto de prueba
model.eval()
test_losses = []
true_values = []  # Almacena los valores reales
predicted_values = []  # Almacena las predicciones

with torch.no_grad():
    for sequence, target in test_data:
        sequence = np.reshape(sequence, (1, sequence_length, 1))
        sequence = Variable(torch.Tensor(sequence))
        target = Variable(torch.Tensor(np.array([target])))

        outputs = model(sequence)
        loss = criterion(outputs, target)
        test_losses.append(loss.item())

        # Almacenar valores reales y predicciones
        true_values.append(target.item())
        predicted_values.append(outputs.item())

average_test_loss = np.mean(test_losses)
print(f'Loss mean in the test set (MSE): {average_test_loss:.4f}')

# Calcular el MSE
mse = mean_squared_error(true_values, predicted_values)
print(f'Mean cuadratic error (MSE) in the test set: {mse:.4f}')


Loss mean in the test set (MSE): 0.1632
Mean cuadratic error (MSE) in the test set: 0.1632


In [None]:
threshold = 0.5  # Umbral de tolerancia
correct_predictions = 0

for i in range(len(predicted_values)):
    if abs(predicted_values[i] - test_data[i][1]) <= threshold:
        correct_predictions += 1

accuracy = correct_predictions / len(predicted_values)
print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 79.59%


In [None]:
import matplotlib.pyplot as plt
import plotly.express as px
import numpy as np
import plotly.graph_objects as go  # Importar el módulo go

# Crear un rango de fechas para los próximos 8 años (96 meses)
start_date = dfa.index[-1] + pd.DateOffset(months=1)
end_date = start_date + pd.DateOffset(months=95)
date_range = pd.date_range(start_date, end_date, freq='MS')

# Verificar que date_range y future_predictions tengan la misma longitud
if len(date_range) == len(future_predictions):
    # Crear un DataFrame para Plotly
    df = pd.DataFrame({'Date': date_range, 'Predictions': future_predictions})

    # Agregar una columna de colores en base a las condiciones
    # Crear una lista de colores para el relleno
    fill_colors = ['green' if x > 0.5 else 'purple' if x < -0.5 else 'rgba(255, 255, 255, 0)' for x in future_predictions]

    # Crear la figura interactiva con Plotly Express
    fig = px.line(df, x='Date', y='Predictions', line_shape='linear', title='ANOM3.4 value predictions')

    # Agregar las líneas de referencia
    fig.add_hline(y=0.5, line_dash='dot', line_color='red', annotation_text='+0.5')
    fig.add_hline(y=-0.5, line_dash='dot', line_color='red', annotation_text='-0.5')

    # Rellenar áreas verdes y moradas
    #fig.add_trace(go.Scatter(x=date_range, y=[0.5] * len(date_range), fill='tozeroy', fillcolor='rgba(0, 128, 0, 0.3)', line=dict(color='rgba(255, 255, 255, 0)')))
    #fig.add_trace(go.Scatter(x=date_range, y=[-0.5] * len(date_range), fill='tozeroy', fillcolor='rgba(128, 0, 128, 0.3)', line=dict(color='rgba(255, 255, 255, 0)')))

    # Mostrar la figura interactiva
    fig.show()
    fig.write_html("sstap.html")
else:
    print("Las dimensiones de date_range y future_predictions no coinciden")



