In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler

In [3]:
# Load CSV data
data = pd.read_csv('./DailyDelhiClimateTrain.csv')

# Convert date column to datetime
data['date'] = pd.to_datetime(data['date'])

# Normalize data
scaler = MinMaxScaler()
data[['meantemp', 'humidity', 'wind_speed', 'meanpressure']] = scaler.fit_transform(data[['meantemp', 'humidity', 'wind_speed', 'meanpressure']])

# Define sequence length
sequence_length = 7

# Create sequences for time series data
def create_sequences(data, sequence_length):
    X, y = [], []
    for i in range(len(data) - sequence_length):
        X.append(data[i:(i + sequence_length)])
        y.append(data[i + sequence_length])
    return np.array(X), np.array(y)

# Create sequences
X, y = create_sequences(data[['meantemp', 'humidity', 'wind_speed', 'meanpressure']].values, sequence_length)

# Convert to PyTorch tensors
X_tensor = torch.Tensor(X)
y_tensor = torch.Tensor(y)

In [4]:
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("device:",device)

input_size = 4 # Number of features
hidden_size = 64
num_layers = 2
output_size = 4 # Same as input size

model = LSTM(input_size, hidden_size, num_layers, output_size).to(device)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


device: cpu


In [8]:
num_epochs = 100
batch_size = 16

for epoch in range(num_epochs):
    for i in range(0, len(X_tensor), batch_size):
        inputs = X_tensor[i:i+batch_size].to(device)
        targets = y_tensor[i:i+batch_size].to(device)

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


Epoch [10/100], Loss: 0.0132
Epoch [20/100], Loss: 0.0087
Epoch [30/100], Loss: 0.0078
Epoch [40/100], Loss: 0.0081
Epoch [50/100], Loss: 0.0079
Epoch [60/100], Loss: 0.0079
Epoch [70/100], Loss: 0.0080
Epoch [80/100], Loss: 0.0075
Epoch [90/100], Loss: 0.0072
Epoch [100/100], Loss: 0.0077


In [9]:
model.eval()
with torch.no_grad():
    inputs = X_tensor[-1].unsqueeze(0).to(device) # Take the last sequence as input
    future = 30 # Predict future 30 days
    for i in range(future):
        pred = model(inputs)
        inputs = torch.cat((inputs[:,1:,:], pred.unsqueeze(0)), dim=1)

    predicted_values = inputs.squeeze(0).cpu().numpy()


In [10]:
print(predicted_values)

[[0.49920207 0.65650034 0.15820476 0.13198799]
 [0.5137746  0.6548617  0.15875253 0.13193941]
 [0.529092   0.6534804  0.15932463 0.13187662]
 [0.5451319  0.65243495 0.15992114 0.13179609]
 [0.5618444  0.65182185 0.16054174 0.1316935 ]
 [0.5791468  0.65175796 0.16118506 0.13156378]
 [0.59692067 0.6523802  0.16184762 0.13140136]]


In [11]:
# Invert scaling for predicted values
predicted_values_unscaled = scaler.inverse_transform(predicted_values)
print(predicted_values_unscaled)

[[  22.33104     70.26274      6.679405  1010.9396   ]
 [  22.807768    70.12088      6.702532  1010.5664   ]
 [  23.308867    70.001305     6.726686  1010.084    ]
 [  23.833601    69.9108       6.7518706 1009.4654   ]
 [  24.380339    69.85772      6.7780724 1008.6772   ]
 [  24.946375    69.85219      6.805233  1007.68066  ]
 [  25.527834    69.90606      6.8332067 1006.43286  ]]


In [13]:
# Load test CSV data
test_data = pd.read_csv('./DailyDelhiClimateTest.csv')

# Convert date column to datetime
test_data['date'] = pd.to_datetime(test_data['date'])

# Normalize test data using the same scaler used for training data
test_data[['meantemp', 'humidity', 'wind_speed', 'meanpressure']] = scaler.transform(test_data[['meantemp', 'humidity', 'wind_speed', 'meanpressure']])

# Create sequences for test data
X_test, y_test = create_sequences(test_data[['meantemp', 'humidity', 'wind_speed', 'meanpressure']].values, sequence_length)

# Convert to PyTorch tensors
X_test_tensor = torch.Tensor(X_test)
y_test_tensor = torch.Tensor(y_test)

model.eval()
with torch.no_grad():
    test_inputs = X_test_tensor.to(device)
    test_targets = y_test_tensor.to(device)

    test_outputs = model(test_inputs)
    test_loss = criterion(test_outputs, test_targets)

print(f'Test Loss: {test_loss.item():.4f}')

with torch.no_grad():
    test_inputs = X_test_tensor.to(device)
    test_outputs = model(test_inputs)

    # Invert scaling for test predictions
    test_predictions = scaler.inverse_transform(test_outputs.cpu().numpy())


Test Loss: 0.0049
