In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import torch


In [19]:
data = pd.read_csv("clean_weather.csv")
data = data.ffill()
data['date'] = pd.to_datetime(data['Unnamed: 0'])
data = data[['date','tmax','tmin','rain','tmax_tomorrow']]
data = data.set_index('date')


In [21]:
data

Unnamed: 0_level_0,tmax,tmin,rain,tmax_tomorrow
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1970-01-01,60.0,35.0,0.0,52.0
1970-01-02,52.0,39.0,0.0,52.0
1970-01-03,52.0,35.0,0.0,53.0
1970-01-04,53.0,36.0,0.0,52.0
1970-01-05,52.0,35.0,0.0,50.0
...,...,...,...,...
2022-11-22,62.0,35.0,0.0,67.0
2022-11-23,67.0,38.0,0.0,66.0
2022-11-24,66.0,41.0,0.0,70.0
2022-11-25,70.0,39.0,0.0,62.0


In [22]:
DEVICE=  torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [23]:
DEVICE

device(type='cuda')

In [24]:
data.shape

(13509, 4)

In [35]:
from torch.utils.data import DataLoader,Dataset

class TimeSeriesDataset(Dataset):
    def __init__(self, data_x,data_y, sequence_length):
        self.data_x = data_x
        self.data_y = data_y
        self.sequence_length = sequence_length
        
    def __len__(self):
        return len(self.data_x) - self.sequence_length+1
        
    def __getitem__(self, idx):
        # Get sequence of data
        sequence_x = self.data_x[idx:idx + self.sequence_length]
        sequence_y = self.data_y[idx:idx + self.sequence_length]
        return sequence_x,sequence_y

def transform(data_x,data_y,batch_size,sequence_len,device,dtype=torch.float32):
    
    data_tensor_x = torch.tensor(data_x,dtype=dtype,device=device)
    data_tensor_y = torch.tensor(data_y,dtype=dtype,device=device)
    dataset = TimeSeriesDataset(data_tensor_x,data_tensor_y,sequence_len)
    
    dataloader = DataLoader(dataset,batch_size,shuffle=False)
    
    return dataloader
    

In [84]:
# Assuming 'date' is your date column and data is sorted by date
from sklearn.preprocessing import StandardScaler

predictors = ["tmax", "tmin", "rain"]
target = "tmax_tomorrow"
batch_size = 32

# Calculate split points
total_rows = len(data)
train_end = int(0.7 * total_rows)
train_end = train_end - (train_end%32)
valid_end = int(0.85 * total_rows)
valid_end = valid_end-(valid_end%32)

# Split the data
X_train = data[predictors].iloc[:train_end]
y_train = data[target].iloc[:train_end]

X_valid = data[predictors].iloc[train_end:valid_end]
y_valid = data[target].iloc[train_end:valid_end]

X_test = data[predictors].iloc[valid_end:]
y_test = data[target].iloc[valid_end:]

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_valid_scaled = scaler.transform(X_valid)
X_test_scaled = scaler.transform(X_test)

# Convert to a dataloader object with batch_size

X_train_scaled = np.array(X_train_scaled)
X_valid_scaled = np.array(X_valid_scaled)
X_test_scaled = np.array(X_test_scaled)

#We dont scale target data check on google for more info on why
y_train = np.array(y_train).reshape(-1,1)
y_valid = np.array(y_valid).reshape(-1,1)
y_test = np.array(y_test).reshape(-1,1)

In [81]:
y_train.shape

(295, 1)

In [85]:
print("x_train ",X_train_scaled.shape)
print("x_valid ",X_valid_scaled.shape)
print("x_test",X_test_scaled.shape)
print("y_train",y_train.shape)
print("y_valid",y_valid.shape)
print("y_test",y_test.shape)

x_train  (9440, 3)
x_valid  (2016, 3)
x_test (2053, 3)
y_train (9440, 1)
y_valid (2016, 1)
y_test (2053, 1)


In [86]:
train_data = transform(X_train_scaled,y_train,32,7,DEVICE)
valid_data = transform(X_valid_scaled,y_valid,32,7,DEVICE)
test_data = transform(X_test_scaled,y_test,32,7,DEVICE)


In [87]:
for x, y in train_data:
    print("x_train batch ",x.shape, "  ",x[4])
    print('y_train batch ',y.shape,"  ",y[4])
    break

print(train_data.batch_size)
len(train_data.dataset)

x_train batch  torch.Size([32, 7, 3])    tensor([[-1.5982, -2.3133, -0.2672],
        [-1.8390, -1.8620, -0.2672],
        [-1.5982, -1.1097, -0.2672],
        [-1.1165, -0.2070,  1.0794],
        [-1.3573, -0.0565,  1.9771],
        [-0.9961, -0.0565, -0.2672],
        [-0.9961, -0.0565,  1.4721]], device='cuda:0')
y_train batch  torch.Size([32, 7, 1])    tensor([[50.],
        [52.],
        [56.],
        [54.],
        [57.],
        [57.],
        [58.]], device='cuda:0')
32


9434

In [88]:
import torch.nn as nn
import torch.optim as optim
import math
from tqdm import tqdm

class SimpleRNN(nn.Module):
    def __init__(self, input_size,hidden_size,output_size):
        super(SimpleRNN,self).__init__()
        
        torch.manual_seed(0)
        
        k = 1/math.sqrt(hidden_size)
        
        #input to hidden layer
        self.i2h = nn.Linear(input_size,hidden_size)
        self.i2h.weight.data.uniform_(-k,k)
        self.i2h.bias.data.uniform_(-k,k)
        
        #hidden to hidden layer
        self.h2h = nn.Linear(hidden_size,hidden_size)
        self.h2h.weight.data.uniform_(-k,-k)
        self.h2h.bias.data.uniform_(-k,k)
        
        #hidden to ouput layer
        self.h2o = nn.Linear(hidden_size,output_size)
        self.h2o.weight.data.uniform_(-k,k)
        self.h2o.bias.data.uniform_(-k,k)
        
        #tanh
        self.tanh = nn.Tanh()
        self.hidden_size = hidden_size
        
    def init_hidden(self,batch_size,device=torch.device('cuda')):
        return torch.zeros(batch_size,self.hidden_size,device=device,requires_grad=True)
        
    def forward(self,x,hidden):
        batch_size = x.shape[0]
        seq_len = x.shape[1]
        
        if hidden is None:
            hidden = self.init_hidden(batch_size,device=x.device)
        
        outputs = []
        current_hidden = hidden
        
        for t in range(seq_len):
            #get current input
            x_t = x[:,t,:]
        
            #combine input and hidden state and apply activation
            combined = self.i2h(x_t) + self.h2h(current_hidden)
            current_hidden = self.tanh(combined)
            
            #output layer
            output = self.h2o(current_hidden)
            outputs.append(output)
            
        #stack outputs along sequence dimensions
        outputs = torch.stack(outputs,dim=1)
        
        return outputs, current_hidden
    
    
        

In [111]:
#improved???
import torch
import torch.nn as nn
import torch.optim as optim
import math
from tqdm import tqdm

class ImprovedRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, dropout_rate=0.2):
        super(ImprovedRNN, self).__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        # Weight initialization
        self.init_scale = 1.0 / math.sqrt(hidden_size)
        
        # Input to hidden layer
        self.i2h = nn.Linear(input_size, hidden_size)
        nn.init.xavier_uniform_(self.i2h.weight, gain=self.init_scale)
        nn.init.zeros_(self.i2h.bias)
        
        # Hidden to hidden layer
        self.h2h = nn.Linear(hidden_size, hidden_size)
        # Orthogonal initialization for RNN hidden-to-hidden connection
        nn.init.orthogonal_(self.h2h.weight, gain=self.init_scale)
        nn.init.zeros_(self.h2h.bias)
        
        # Hidden to output layer
        self.h2o = nn.Linear(hidden_size, output_size)
        nn.init.xavier_uniform_(self.h2o.weight, gain=self.init_scale)
        nn.init.zeros_(self.h2h.bias)
        
        # Regularization
        self.dropout = nn.Dropout(dropout_rate)
        self.layer_norm = nn.LayerNorm(hidden_size)
        
        # Activation
        self.tanh = nn.Tanh()
        
    def init_hidden(self, batch_size, device=torch.device('cuda')):
        # Initialize with small random values instead of zeros
        return torch.randn(batch_size, self.hidden_size, device=device, 
                         requires_grad=True) * 0.1
    
    def forward(self, x, hidden=None):
        batch_size, seq_len, _ = x.size()
        
        
        if hidden is None:
            hidden = self.init_hidden(batch_size, device=x.device)
            
        outputs = []
        current_hidden = hidden
        
        for t in range(seq_len):
            # Get current input
            x_t = x[:, t, :]
            
            # Apply layer normalization and dropout to input
            x_t = self.dropout(x_t)
            
            # Combine input and hidden state
            i2h_out = self.i2h(x_t)
            h2h_out = self.h2h(current_hidden)
            #print("input shape: ",x.shape)
            #print("i2h_out: ",i2h_out.shape," h2h : ",h2h_out.shape)
            combined = i2h_out + h2h_out
            
            # Apply layer normalization before activation
            combined = self.layer_norm(combined)
            
            # Apply activation and dropout
            current_hidden = self.dropout(self.tanh(combined))
            
            # Output layer
            output = self.h2o(current_hidden)
            outputs.append(output)
            
        # Stack outputs along sequence dimension
        outputs = torch.stack(outputs, dim=1)
        
        return outputs, current_hidden

In [116]:
def train_model(model,train_loader,valid_loader,num_epochs,learning_rate):
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        model = model.to(device)
        
        #mean squared error
        criterion = nn.MSELoss()
        optimizer = optim.Adam(model.parameters(),lr=learning_rate)
        
        #to store metrics
        train_losses = []
        val_losses = []
        
        for epoch in range(1,num_epochs+1):
            model.train()
            total_train_loss = 0
            hidden = None
            
            #Training loop 
            for batch_idx, (x_batch,y_batch) in enumerate(tqdm(train_loader)):
                x_batch = x_batch.to(device) #shape (32,7,3)
                y_batch = y_batch.to(device) #shape(32,7)
                if x_batch.shape[0] != 32:
                    continue
                
                #forward pass 
                outputs, hidden = model(x_batch,hidden) #outputs shape(32, 7, 1)
                
                #calculate loss
                #print("ouputs: ",outputs.shape," y_batch ",y_batch.shape)
                loss = criterion(outputs,y_batch)
                
                #backward pass and optimize
                optimizer.zero_grad()
                loss.backward()
                
                # Optional: Gradient clipping to prevent exploding gradients
                nn.utils.clip_grad_norm_(model.parameters(),max_norm=1.0)
                
                optimizer.step()
                
                # Detach hidden state to prevent backprop across batches
                hidden = hidden.detach()
                
                total_train_loss += loss.item()
            
            avg_train_loss = total_train_loss/len(train_loader)
            
            train_losses.append(avg_train_loss)
            
            
            #validation loop
            model.eval()
            total_val_loss = 0
            hidden = None
            
            with torch.no_grad():
                for x_val,y_val in valid_loader:
                    x_val = x_val.to(device)
                    y_val = y_val.to(device)
                    if x_val.shape[0]!=32:
                        continue
                    
                    outputs, hidden = model(x_val,hidden)
                    # outputs = outputs.squeeze(-1)
                    val_loss = criterion(outputs,y_val)
                    total_val_loss += val_loss.item()
                    
                    hidden = hidden.detach()
            
            avg_val_loss = total_val_loss/len(valid_loader)
            val_losses.append(avg_val_loss)
            if(epoch%50==0):
                print(f'Epoch [{epoch+1}/{num_epochs}]')
                print(f'Training Loss: {avg_train_loss:.4f}')
                print(f'Validation Loss: {avg_val_loss:.4}')
            
        return train_losses, val_losses   

In [117]:
model = ImprovedRNN(input_size=3,hidden_size=4,output_size=1)

train_model(model,train_data,valid_data,10,0.001)

100%|██████████| 295/295 [00:02<00:00, 129.23it/s]


Epoch [2/10]
Training Loss: 4170.5295
Validation Loss: 4.233e+03


100%|██████████| 295/295 [00:03<00:00, 96.64it/s]


Epoch [3/10]
Training Loss: 3909.0547
Validation Loss: 3.997e+03


100%|██████████| 295/295 [00:02<00:00, 110.93it/s]


Epoch [4/10]
Training Loss: 3697.3962
Validation Loss: 3.795e+03


100%|██████████| 295/295 [00:02<00:00, 123.31it/s]


Epoch [5/10]
Training Loss: 3507.9080
Validation Loss: 3.608e+03


100%|██████████| 295/295 [00:02<00:00, 124.05it/s]


Epoch [6/10]
Training Loss: 3330.3430
Validation Loss: 3.429e+03


100%|██████████| 295/295 [00:02<00:00, 122.59it/s]


Epoch [7/10]
Training Loss: 3160.3635
Validation Loss: 3.257e+03


100%|██████████| 295/295 [00:02<00:00, 99.24it/s] 


Epoch [8/10]
Training Loss: 2996.9891
Validation Loss: 3.091e+03


100%|██████████| 295/295 [00:02<00:00, 121.53it/s]


Epoch [9/10]
Training Loss: 2838.1688
Validation Loss: 2.93e+03


100%|██████████| 295/295 [00:02<00:00, 127.19it/s]


Epoch [10/10]
Training Loss: 2687.7174
Validation Loss: 2.774e+03


100%|██████████| 295/295 [00:02<00:00, 127.13it/s]


Epoch [11/10]
Training Loss: 2539.2857
Validation Loss: 2.624e+03


([4170.529536877649,
  3909.0547230865996,
  3697.3961880958686,
  3507.9080218816207,
  3330.343027509269,
  3160.363484010858,
  2996.989063741393,
  2838.1687793796345,
  2687.717354260461,
  2539.2856548761915],
 [4232.866075303819,
  3996.9097338479664,
  3795.470009455605,
  3607.6668836805557,
  3428.7284497457836,
  3256.7952822730654,
  3090.7939879402284,
  2930.13137672061,
  2774.451894608755,
  2623.5345439608136])