In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [3]:
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)

        out, _ = self.lstm(x, (h0, c0))

        out = self.fc(out[:, -1, :])

        return out

In [4]:
# Define hyperparameters
input_size = 1
hidden_size = 64
num_layers = 2
output_size = 1
num_epochs = 500
learning_rate = 0.01
step_ahead = 15

# Create the LSTM model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = LSTM(input_size, hidden_size, num_layers, output_size).to(device)

# Define loss and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [5]:
data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Dare_Not_to_avoid_the_Most_Probable_Ones/datasets/airline_passengers.csv')
print(data)

     values
0       112
1       118
2       132
3       129
4       121
..      ...
139     606
140     508
141     461
142     390
143     432

[144 rows x 1 columns]


In [6]:
data = np.array(data)
pred_real = data[-step_ahead:].squeeze()
data = data[:-step_ahead]
prob_thres = data.size - 25
d_lag = data[:-25]
d_rem = data[-25:]

In [7]:
maxim = max(d_lag).item()
minim = min(d_lag).item()
interval_width = (maxim - minim)/(prob_thres)

In [8]:
v = np.zeros(prob_thres)
p = np.zeros(prob_thres)

In [9]:
# counter = int(0)
for iter_data in range(d_lag.size - 1):
    for i in range (v.size):
        # print(i, "\n")
        if(d_lag[iter_data] >= (minim + (i * interval_width)) and d_lag[iter_data] < (minim + (i + 1) * (interval_width))):
            # print(d_lag[iter_data], "in interval", i, "between", (minim + (i * interval_width)), "and", (minim + (i + 1) * interval_width), "\n")
            # print(v[i], ".. before \n")
            v[i] = v[i] + 1
            # print(v[i], ".. after \n")
            # counter = counter + 1
            i = 0
            break
# print(counter)

In [10]:
p = v/np.sum(v)

In [11]:
p.sum() # must be 1

0.9999999999999998

In [12]:
p, v

(array([0.00970874, 0.        , 0.01941748, 0.00970874, 0.03883495,
        0.        , 0.01941748, 0.00970874, 0.03883495, 0.00970874,
        0.01941748, 0.00970874, 0.03883495, 0.00970874, 0.        ,
        0.00970874, 0.01941748, 0.00970874, 0.01941748, 0.02912621,
        0.        , 0.03883495, 0.02912621, 0.        , 0.01941748,
        0.01941748, 0.01941748, 0.02912621, 0.01941748, 0.        ,
        0.01941748, 0.        , 0.00970874, 0.        , 0.        ,
        0.03883495, 0.01941748, 0.03883495, 0.01941748, 0.02912621,
        0.        , 0.        , 0.        , 0.        , 0.00970874,
        0.01941748, 0.00970874, 0.02912621, 0.01941748, 0.01941748,
        0.        , 0.00970874, 0.        , 0.        , 0.00970874,
        0.        , 0.01941748, 0.01941748, 0.        , 0.01941748,
        0.01941748, 0.01941748, 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.01941748,
        0.        , 0.01941748, 0.00970874, 0.  

In [13]:
for iter_data in range(d_rem.size - 1):
    for i in range (v.size):
        if(d_rem[iter_data] >= (minim + (i * interval_width)) and d_rem[iter_data] < (minim + (i + 1) * (interval_width))):
            z_score = abs(p[i] - v.mean())/v.std()
            p[i] = p[i] + z_score
            for j in range (p.size):
                if i != j:
                    p[j] = p[j] - (z_score/(v.size))
            i = 0
            break

p[p < 0] = 0 # Getting rid of negatives

In [14]:
p

array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.73285255, 0.        , 0.78627189,
       0.        , 0.753621  , 0.        , 0.        , 0.        ,
       0.        , 0.9204198 , 0.74367042, 0.79231357, 0.89304654,
       0.        , 0.        , 0.        , 0.94880033, 0.76758

In [15]:
p.shape

(104,)

In [16]:
def create_sequences(data, seq_length):
    sequences = []
    for i in range(len(data) - seq_length):
        seq = data[i:i+seq_length]
        sequences.append(seq)
    return sequences

In [17]:
seq_length = 10
sequences = create_sequences(data, seq_length)
sequences = np.array(sequences)
X = torch.tensor(sequences[:, :-1]).float().to(device)
y = torch.tensor(sequences[:, -1]).float().to(device)

# Train the model
for epoch in range(num_epochs):
    model.train()
    outputs = model(X)
    loss = criterion(outputs, y)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # if (epoch+1) % 10 == 0:
    #     print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

preds = np.array(-1)
model.eval()
for _ in range(step_ahead): # Step ahead
    pred = model(X[-1, -seq_length+1:].unsqueeze(0))
    forecast = pred.round().detach().cpu().numpy()
    data = np.concatenate([data, forecast])
    preds = np.append(preds, forecast)
    seq_length = 10
    sequences = create_sequences(data, seq_length)
    sequences = np.array(sequences)
    X = torch.tensor(sequences[:, :-1]).float().to(device)
    y = torch.tensor(sequences[:, -1]).float().to(device)

In [18]:
preds = preds[1:]

In [19]:
smoothening_param = 0.875
for preds_iter in range(preds.size):
    for i in range (p.size):
        if(preds[preds_iter] >= (minim + (i * interval_width)) and preds[preds_iter] < (minim + (i + 1) * (interval_width))):
            preds[preds_iter] = smoothening_param * preds[preds_iter] + (1 - smoothening_param) * p[i]
            i = 0
            break