In [3]:
import numpy as np
import torch
from torch.utils.data import TensorDataset
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

from sklearn.model_selection import train_test_split
import pandas as pd

### Generating Sequences

In [28]:
def create_sequences(df, seq_length):
    xs, ys = [], []
    for i in range(len(df) - seq_length):
        x = df.iloc[i:(i + seq_length), 1].values
        y = df.iloc[i + seq_length, 1]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)


In [29]:
df = pd.read_csv("Data/water_potability.csv")
df = df.dropna()

train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)



In [30]:
X_train, y_train = create_sequences(train_data,24*4)
print(X_train.shape, y_train.shape)


dataset_train = TensorDataset(
    torch.from_numpy(X_train).float(),
    torch.from_numpy(y_train).float(),
)
print(len(dataset_train))

(1512, 96) (1512,)
1512


### Recurrent Neural networks 

RNNs are similar to sequential but have connections pointing back.


Sequence-to-sequence architecture (real-time speech recognition model)
- Pass sequence as input, use the entire output sequence.

Sequence-to-vector architecture (text topic classification)
- Sequence as input but only use the last output, let the model process the whole input and then only one output-

Vector-to-sequence architecture (text generation)
- one input, use all output sequence.

Encoder-decoder architecture (machine translation)
- process all input sequence and then output sequence.


In [19]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        # Define RNN layer
        self.rnn = nn.RNN(
            input_size=1,
            hidden_size=32,
            num_layers=2,
            batch_first=True,
        )
        self.fc = nn.Linear(32, 1)

    def forward(self, x):
        # Initialize first hidden state with zeros
        h0 = torch.zeros(2, x.size(0), 32)
        # Pass x and h0 through recurrent layer
        out, _ = self.rnn(x, h0)  
        # Pass recurrent layer's last output through linear layer
        out = self.fc(out[:, -1, :])
        return out

RNN = short-term memory
solve this problem
- LSTM (Long short-term memory)
- GRU (Gated Recurrent Unit)



LSTM 
- two hidden states
-Three inputs and outputs
	- h: short-term state
	- c: long-term state
	- Forget gate (what to rm from long-term memory), input gate (what to save long-term memory) and output gate (what to return at the current time-

### LSTM Network

In [35]:
class Net1(nn.Module):
    def __init__(self, input_size):
        super().__init__()
        # Define lstm layer
        self.lstm = nn.LSTM(
            input_size=1,
            hidden_size=32,
            num_layers=2,
            batch_first=True,
        )
        self.fc = nn.Linear(32, 1)

    def forward(self, x):
        h0 = torch.zeros(2, x.size(0), 32)
        # Initialize long-term memory
        c0 = torch.zeros(2, x.size(0), 32)
        # Pass all inputs to lstm layer
        out, _ = self.lstm(x,(h0, c0))
        out = self.fc(out[:, -1, :])
        return out

GRU Class
- one hidden state
- no output gate 

GRU vs LSTM vs RNN
- RNN not used much
- GRU simpler
- LSTM more computation

### RNN Network (GRU)

In [34]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.rnn = nn.GRU(
            input_size=1,
            hidden_size=32,
            num_layers=2,
            batch_first=True,
        )
        self.fc = nn.Linear(32, 1)

    def forward(self, x):
        h0 = torch.zeros(2, x.size(0), 32).to(x.device)
        out, _ = self.rnn(x, h0.squeeze(0).unsqueeze(0))
        out = self.fc(out[:, -1, :])
        return out