In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler


In [2]:
# Load the dataset
data = pd.read_csv('./csv/dummyData-1.csv', header=None, names=['date', 'avail'])

In [3]:
# Convert the 'date' column to datetime
data['date'] = pd.to_datetime(data['date'], errors='coerce')

# Drop rows with invalid dates
data = data.dropna(subset=['date'])

# Sort the data by date
data = data.sort_values('date')

# Ensure 'avail' is numeric
data['avail'] = pd.to_numeric(data['avail'], errors='coerce')

# Drop rows with NaN in 'avail'
data = data.dropna(subset=['avail'])

  data['date'] = pd.to_datetime(data['date'], errors='coerce')


In [4]:
# Extract additional time features
data['month'] = data['date'].dt.month
data['day_of_year'] = data['date'].dt.dayofyear / 365.0
data['year'] = data['date'].dt.year

In [5]:
# Normalize the 'avail' column
scaler = MinMaxScaler(feature_range=(0, 1))
data['avail'] = scaler.fit_transform(data[['avail']])

In [6]:
# Function to create sequences
def create_sequences(data, seq_length):
    xs, ys = [], []
    for i in range(len(data) - seq_length):
        x = data.iloc[i:i + seq_length][['avail', 'day_of_year']].values
        y = data.iloc[i + seq_length]['avail']
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)
# Create sequences
sequence_length = 7
X, y = create_sequences(data, sequence_length)

In [7]:
X.size

30576

In [8]:
class AvailabilityDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, index):
        return self.X[index], self.y[index]

dataset = AvailabilityDataset(X, y)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)


In [10]:
class RNNModel(nn.Module):
    def __init__(self, input_size=2, hidden_size=50, output_size=1, num_layers=2):
        super(RNNModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.rnn(x, h0)
        out = self.fc(out[:, -1, :])
        return torch.sigmoid(out)

model = RNNModel()


In [14]:
# Loss and optimizer
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 1000

for epoch in range(num_epochs):
    for X_batch, y_batch in dataloader:
        # Forward pass
        outputs = model(X_batch)
        # print(outputs.squeeze())
        loss = criterion(outputs.squeeze(), y_batch)
        
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


Epoch [10/1000], Loss: 0.7449
Epoch [20/1000], Loss: 0.8008
Epoch [30/1000], Loss: 0.5528
Epoch [40/1000], Loss: 0.5775
Epoch [50/1000], Loss: 0.6151
Epoch [60/1000], Loss: 0.6795
Epoch [70/1000], Loss: 0.9092
Epoch [80/1000], Loss: 0.2739
Epoch [90/1000], Loss: 0.3890
Epoch [100/1000], Loss: 0.4067
Epoch [110/1000], Loss: 0.3237
Epoch [120/1000], Loss: 0.9138
Epoch [130/1000], Loss: 0.5318
Epoch [140/1000], Loss: 0.6633
Epoch [150/1000], Loss: 0.2747
Epoch [160/1000], Loss: 0.5029
Epoch [170/1000], Loss: 0.9228
Epoch [180/1000], Loss: 0.4354
Epoch [190/1000], Loss: 0.2662
Epoch [200/1000], Loss: 0.4791
Epoch [210/1000], Loss: 0.2734
Epoch [220/1000], Loss: 0.2343
Epoch [230/1000], Loss: 0.4636
Epoch [240/1000], Loss: 0.1368
Epoch [250/1000], Loss: 0.4953
Epoch [260/1000], Loss: 0.2313
Epoch [270/1000], Loss: 0.6863
Epoch [280/1000], Loss: 0.3105
Epoch [290/1000], Loss: 0.1796
Epoch [300/1000], Loss: 0.2613
Epoch [310/1000], Loss: 0.4442
Epoch [320/1000], Loss: 0.2242
Epoch [330/1000],

In [15]:
def predict(model, X):
    model.eval()
    with torch.no_grad():
        X = torch.tensor(X, dtype=torch.float32)
        prediction = model(X)
        return prediction.item()

# Example usage
last_sequence = data.iloc[-sequence_length:][['avail', 'day_of_year']].values
next_day_avail = predict(model, last_sequence.reshape(1, sequence_length, 2))
print(last_sequence)
print(f'Predicted availability for the next day: {round(next_day_avail)}')


[[0.         0.98356164]
 [1.         0.98630137]
 [0.         0.9890411 ]
 [1.         0.99178082]
 [0.         0.99452055]
 [0.         0.99726027]
 [0.         1.        ]]
Predicted availability for the next day: 0


In [28]:
last_day = np.array([334,335,336,337,338,339,340]) / 365.0
avail = np.array([0,1,0,1,0,1,0])
last_sequence= np.vstack(( avail,last_day)).T

In [29]:
print(last_sequence)

[[0.         0.91506849]
 [1.         0.91780822]
 [0.         0.92054795]
 [1.         0.92328767]
 [0.         0.9260274 ]
 [1.         0.92876712]
 [0.         0.93150685]]


In [30]:
next_day_avail = predict(model, last_sequence.reshape(1, sequence_length, 2))
print(last_sequence)
print(f'Predicted availability for the next day: {round(next_day_avail)}')

[[0.         0.91506849]
 [1.         0.91780822]
 [0.         0.92054795]
 [1.         0.92328767]
 [0.         0.9260274 ]
 [1.         0.92876712]
 [0.         0.93150685]]
Predicted availability for the next day: 1


In [31]:
torch.save(model,"./model/RNNmodelDummySet-1-Loss_%16.h5")