In [None]:
import torch
from torch import nn
import numpy as np
import pandas as pd

In [None]:
from scipy.signal import detrend
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
import seaborn as sns
from math import sqrt
from sklearn.metrics import mean_absolute_error, mean_squared_error

sns.set(rc={'figure.figsize':(6., 3.336)})
sns.set_style("whitegrid", {'axes.grid' : False})

In [None]:
df = pd.read_csv('cultivable_pca.csv')
df['Date'] = pd.to_datetime(df['Date'])
features = df.iloc[:, 13:]
cols = features.columns.tolist()

emi2 = [18, 59, 57]
ds = df[[cols[i] for i in emi2] + ['anomalia_emiliani2']]

ds['anomalia_emiliani2'] = detrend(ds['anomalia_emiliani2'])
ds['anomalia_emiliani2_t'] = ds['anomalia_emiliani2'].shift(1)
ds['anomalia_emiliani2_t-1'] = ds['anomalia_emiliani2'].shift(2)

In [None]:
columns = ds.columns.tolist()
for col in columns[:3]:
    for i in range(1,24):
        ds[col+'_t-'+str(i)] = ds[col].shift(i)
        
for col in columns[3:4]:
    for i in range(3,26):
        ds[col+'_t-'+str(i-1)] = ds[col].shift(i)
        
ds = ds.iloc[25:].reset_index(drop=True)

pcs = ds.filter(regex='PC')
anomalia = ds.filter(regex='emiliani2_t')
ds = ds[pcs.columns.tolist() + anomalia.columns.tolist()[1:] + anomalia.columns.tolist()[0:1]]

In [None]:
# divide into train/test
train = ds.iloc[0:int(len(ds)*0.6)]
validation = ds.iloc[int(len(ds)*0.6):int(len(ds)*0.8)]
test = ds.iloc[int(len(ds)*0.8):]

scaler = StandardScaler()
scaler.fit(train)
ds[ds.columns] = scaler.transform(ds[ds.columns])

train = ds.iloc[0:int(len(ds)*0.6)]
validation = ds.iloc[int(len(ds)*0.6):int(len(ds)*0.8)]
test = ds.iloc[int(len(ds)*0.8):]

In [None]:
def split_series(series, n_past, n_future, offset=1, hist=False):
  #
  # n_past ==> no of past observations
  #
  # n_future ==> no of future observations 
  #
  # offset ==> window stride

  X = []
  y = []

  for window_start in range(len(series)):
    past_end = window_start*offset + n_past 
    future_end = past_end + n_future
    if future_end > len(series):
      break
    
    if hist:
      past, future = series[window_start*offset:past_end, np.r_[0:72,-1:-2:-1]], series[past_end-1:future_end-1, -1] # prendo serie storica vars
    else:
      past, future = series[window_start*offset:past_end, [0,1,2,-1]], series[past_end-1:future_end-1, -1] 
    X.append(past)
    y.append(future)
  return np.array(X), np.array(y)

In [None]:
seq_length = 5
X_train, y_train = split_series(train.values, seq_length, 1, seq_length, hist=False)
X_val, y_val = split_series(validation.values, seq_length, 1, seq_length, hist=False)
X_test, y_test = split_series(test.values, seq_length, 1, seq_length, hist=False)

X_train = torch.from_numpy(X_train).float()
y_train = torch.from_numpy(y_train)


X_val = torch.from_numpy(X_val).float()
y_val = torch.from_numpy(y_val)

X_test = torch.from_numpy(X_test).float()
y_test = torch.from_numpy(y_test)

## Modello per la regressione

In [None]:
class RNNModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim=1):
        super(RNNModel, self).__init__()
        
        # Hidden dimensions 
        self.hidden_dim = hidden_dim

        # Number of hidden layers 
        self.layer_dim = layer_dim

        # batch_first=True causes input/output tensors to be of shape
        # (batch_dim, seq_dim, input_dim)
        # batch_dim = number of samples per batch
        self.rnn = nn.RNN(input_dim, hidden_dim, layer_dim, batch_first=True, nonlinearity='tanh')
     
        # Readout layer
        #self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        # Initialize hidden state with zeros
        h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_()

        # We need to detach the hidden state to prevent exploding/vanishing gradients
        # This is part of truncated backpropagation through time (BPTT)
        out, hn = self.rnn(x, h0.detach())

        # Index hidden state of last time step
        # out.size() --> 100, 28, 10
        # out[:, -1, :] --> 100, 10 --> just want last time step hidden states! 
        #out = self.fc(out[:, -1, :]) 
        
        return out, hn

In [None]:
input_size =  3  # Number of features used as input. (Number of columns)
hidden_size = 1  # Number of features in last hidden state ie. number of output time-steps to predict.
num_layers =  1  # Number of stacked rnn layers

# Instantiate the model with hyperparameters
model = RNNModel(input_dim=input_size, hidden_dim=hidden_size, layer_dim=1)
model = model.float()

# Define hyperparameters
lr=0.01

# Define Loss, Optimizer
loss_fn = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr)  

In [None]:
num_epochs = 50
min_valid_loss = np.inf
losses = []
valid_losses = []
n_batches = X_train.shape[0]

train_loss = []
validation_loss = []

val_predictions = []
true_val_vals = []

for epoch in range(num_epochs):
  
  for batch_idx, seq in enumerate(X_train):
    model.train()
    cumulative_loss = 0.0
    
    for i in range(len(seq)):
      # Load time step of sequence as tensor with gradient accumulation abilities
      X = (
          seq[i, :-1].view(-1, 1, input_size).requires_grad_()
      )  # only variables value, no NDVI fed in input

      optimizer.zero_grad()

      # Forward pass to get output/logits
      # outputs.size() --> 100, 10
      outputs, hn = model(X)

      true_target = seq[i, -1].view(-1, 1)
      cumulative_loss += loss_fn(true_target, outputs.view(-1,1))
      losses.append(cumulative_loss.item())

    # Update weights after reading all sequences in batch
    loss = cumulative_loss
    # Getting gradients w.r.t. parameters
    loss.backward()

    # Updating parameters
    optimizer.step()

  # Print Loss
  #print("Epoch: {}. Last batch training loss: {}".format(epoch+1, loss.item()) + "Last batch validation loss: {}".format(epoch+1, valid_loss.item()))
  
  model.eval()
  for batch_idx, seq in enumerate(X_val): 
    cumulative_valid_loss = 0.0
    for i in range(len(seq)):
      X = (seq[i, :-1].view(-1, 1, input_size))  

      # Forward pass to get output/logits
      # outputs.size() --> 100, 10
      outputs, hn = model(X)

      true_target = seq[i, -1].view(-1, 1)
      cumulative_valid_loss += loss_fn(true_target, outputs.view(-1,1))
    
      valid_losses.append(cumulative_valid_loss.item())

    if min_valid_loss > np.mean(valid_losses):
      print(f'Validation Loss Decreased({min_valid_loss}--->{np.mean(valid_losses)}) \t Saving The Model')
      min_valid_loss = np.mean(valid_losses)
      # Saving State Dict
      torch.save(model.state_dict(), 'saved_model.pth')

  # Print Loss
  #print("Epoch: {}. Last batch validation loss: {}".format(epoch+1, valid_loss.item()))

  train_loss.append(np.mean(losses))
  validation_loss.append(np.mean(valid_losses))
  print("Epoch: {}. Mean training loss: {}".format(epoch+1, np.mean(losses)) + "   Mean validation loss: {}".format(np.mean(valid_losses)))



In [None]:
train_predictions = []
true_train_vals  =[]

# Iterate through train dataset
for batch_idx, seq in enumerate(X_train):
    X = seq[:, :-1].view(-1, 1, input_size) # only variables value, no NDVI fed in input

    # Forward pass only to get logits/output
    outputs,hn = model(X)
    true_target = seq[:, -1].view(-1,1)

    #if i == len(seq)-1: # prendo ultimo sample della sequenza per plottare
    train_predictions.append(outputs)
    true_train_vals.append(true_target)


preds = [p.detach().numpy().flatten() for p in train_predictions]
trues = [v.numpy().flatten() for v in true_train_vals]
preds = [item for p in preds for item in p]
trues = [item for t in trues for item in t]

### Performance in train

In [None]:
fig, ax = plt.subplots(figsize=(15,8))
ax.plot(trues, label='true')
ax.plot(preds, label='predictions')
ax.axhline(y=0, color='red')
ax.set_title('Performance su test set MAE : ' + str(mean_absolute_error(trues,preds)))
ax.legend()

### Performance in validation

In [None]:
val_predictions = []
true_val_vals  =[]
model.load_state_dict(torch.load('saved_model.pth'))
# Iterate through train dataset
for batch_idx, seq in enumerate(X_val):
    X = seq[:, :-1].view(-1, 1, input_size) # only variables value, no NDVI fed in input

    # Forward pass only to get logits/output
    outputs,hn = model(X)
    true_target = seq[:, -1].view(-1,1)

    #if i == len(seq)-1: # prendo ultimo sample della sequenza per plottare
    val_predictions.append(outputs)
    true_val_vals.append(true_target)


preds = [p.detach().numpy().flatten() for p in val_predictions]
trues = [v.numpy().flatten() for v in true_val_vals]
preds = [item for p in preds for item in p]
trues = [item for t in trues for item in t]

In [None]:
fig, ax = plt.subplots(figsize=(15,8))
ax.plot(trues, label='true')
ax.plot(preds, label='predictions')
ax.axhline(y=0, color='red')
ax.set_title('Performance su validation set MAE : ' + str(mean_absolute_error(trues,preds)))
ax.legend()

In [None]:
train_val = pd.concat([train,validation])

test_set = train_val.iloc[int(len(train_val)*0.2):, :]

seq_length = 5
X_train, y_train = split_series(test_set.values, seq_length, 1, seq_length, hist=True)
X_train = torch.from_numpy(X_train).float()
y_train = torch.from_numpy(y_train).float()

In [None]:
input_size =  3  # Number of features used as input. (Number of columns)
hidden_size = 1  # Number of features in last hidden state ie. number of output time-steps to predict.
num_layers =  1  # Number of stacked rnn layers

# Instantiate the model with hyperparameters
model = RNNModel(input_dim=input_size, hidden_dim=hidden_size, layer_dim=1)
model = model.float()

# Define hyperparameters
lr=0.01

# Define Loss, Optimizer
loss_fn = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr)  

In [None]:
num_epochs = 50
min_valid_loss = np.inf
losses = []
valid_losses = []
n_batches = X_train.shape[0]

for epoch in range(num_epochs):
  
  for batch_idx, seq in enumerate(X_train):
    model.train()
    cumulative_loss = 0.0
    
    for i in range(len(seq)):
      # Load time step of sequence as tensor with gradient accumulation abilities
      X = (
          seq[i, :-1].view(-1, 1, input_size).requires_grad_()
      )  # only variables value, no NDVI fed in input

      optimizer.zero_grad()

      # Forward pass to get output/logits
      # outputs.size() --> 100, 10
      outputs, hn = model(X)

      true_target = seq[i, -1].view(-1, 1)
      cumulative_loss += loss_fn(true_target, outputs.view(-1,1))
      losses.append(cumulative_loss.item())

    # Update weights after reading all sequences in batch
    loss = cumulative_loss
    # Getting gradients w.r.t. parameters
    loss.backward()

    # Updating parameters
    optimizer.step()




### Performance in test

In [None]:
predictions = []
true_vals  =[]

# Iterate through train dataset
for batch_idx, seq in enumerate(X_test):
    X = seq[:, :-1].view(-1, 1, input_size) # only variables value, no NDVI fed in input

    # Forward pass only to get logits/output
    outputs,hn = model(X)
    true_target = seq[:, -1].view(-1,1)

    #if i == len(seq)-1: # prendo ultimo sample della sequenza per plottare
    predictions.append(outputs)
    true_vals.append(true_target)


preds = [p.detach().numpy().flatten() for p in predictions]
trues = [v.numpy().flatten() for v in true_vals]
preds = [item for p in preds for item in p]
trues = [item for t in trues for item in t]

In [None]:
mean_absolute_error(trues,preds)

In [None]:
mean_squared_error(trues,preds)

In [None]:
sqrt(mean_squared_error(trues,preds))

In [None]:
plt.plot(trues, label='Original')
plt.plot(preds, label='Recurrent Network')
#plt.axhline(y=0, color='red')
#ax.set_title('Performance su test set MAE : ' + str(mean_absolute_error(trues,preds)))
plt.xlabel('Sample index',labelpad=15)
plt.ylabel('NDVI Anomaly',labelpad=15)
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.savefig('regression_recurrent+serie+storica+var.eps', format='eps', bbox_inches='tight')
plt.show()

## Classificazione

questo è un approccio di classificazione con il modello presentato sopra per la regressione, ma ottiene risultati non brillanti 

In [None]:
from sklearn.metrics import confusion_matrix

def plot_conmat(true, pred, title):
    conmat = confusion_matrix(true, pred)
    val = np.mat(conmat) 

    classnames = ['Normal', 'Good', 'Bad']

    df_cm = pd.DataFrame(
        val, index=classnames, columns=classnames, 
        )
    
    df_cm = df_cm.astype('float') / df_cm.sum(axis=1)[:, np.newaxis]  
    heatmap = sns.heatmap(df_cm, annot=True, cmap="Blues")

    heatmap.yaxis.set_ticklabels(heatmap.yaxis.get_ticklabels(), rotation=0, ha='right')

    heatmap.xaxis.set_ticklabels(heatmap.xaxis.get_ticklabels(), rotation=45, ha='right')

    plt.ylabel('True label')

    plt.xlabel('Predicted label')

    plt.title(title)

    plt.show()  


In [None]:
class RNNClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim=1):
        super(RNNClassifier, self).__init__()
        
        # Hidden dimensions 
        self.hidden_dim = hidden_dim

        # Number of hidden layers 
        self.layer_dim = layer_dim

        # batch_first=True causes input/output tensors to be of shape
        # (batch_dim, seq_dim, input_dim)
        # batch_dim = number of samples per batch
        self.rnn = nn.RNN(input_dim, hidden_dim, layer_dim, batch_first=True, nonlinearity='tanh')
     
        # Readout layer
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        # Initialize hidden state with zeros
        h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_()

        # We need to detach the hidden state to prevent exploding/vanishing gradients
        # This is part of truncated backpropagation through time (BPTT)
        out, hn = self.rnn(x, h0.detach())

        # Index hidden state of last time step
        # out.size() --> 100, 28, 10
        # out[:, -1, :] --> 100, 10 --> just want last time step hidden states! 
        out = self.fc(out[:, -1, :])
        
        return out

In [None]:
train_data = train.anomalia_emiliani2_t
val_data = validation.anomalia_emiliani2_t
test_data  = test.anomalia_emiliani2_t

# Calcolo dei terzili
y_min = train.anomalia_emiliani2_t.quantile(0.33)
y_max = train.anomalia_emiliani2_t.quantile(0.66)

train['Label'] = train_data.apply(lambda x: 'Good' if x>y_max else 'Bad' if x<y_min else 'Normal')
validation['Label'] = val_data.apply(lambda x: 'Good' if x>y_max else 'Bad' if x<y_min else 'Normal')
test['Label'] = test_data.apply(lambda x: 'Good' if x>y_max else 'Bad' if x<y_min else 'Normal')

In [None]:
fig, ax = plt.subplots(1,3, figsize=(10,5))

sns.countplot(x = 'Label', data=train, ax=ax[0])
ax[0].set_ylabel('Count',labelpad=15)
ax[0].set_xlabel('Train Labels',labelpad=15)

sns.countplot(x = 'Label', data=validation, ax=ax[1])
ax[1].set_ylabel('',labelpad=15)
ax[1].set_xlabel('Validation Labels',labelpad=15)

sns.countplot(x = 'Label', data=test, ax=ax[2])
ax[2].set_ylabel('',labelpad=15)
ax[2].set_xlabel('Test Labels',labelpad=15)

plt.savefig('classes_distrib.eps', format='eps')

In [None]:
sns.countplot(x = 'Label', data=validation)

In [None]:
sns.countplot(x = 'Label', data=test)

In [None]:
train_label = np.eye(3)[train['Label']] # [a ,b ,c] dove a: 0, b:1, c:-1
validation_label = np.eye(3)[validation['Label']]
test_label = np.eye(3)[test['Label']]

In [None]:
def classification_series(series, labels, n_past, n_future, offset=1, hist=False):
  #
  # n_past ==> no of past observations
  #
  # n_future ==> no of future observations 
  #
  # offset ==> window stride

  X = []
  y = []

  for window_start in range(len(series)):
    past_end = window_start*offset + n_past 
    future_end = past_end + n_future
    if future_end > len(series):
      break

    if hist:
      past, future = series[window_start*offset:past_end, :72], labels[window_start*offset:past_end]
    else:
      past, future = series[window_start*offset:past_end, [0,1,2]], labels[window_start*offset:past_end] 
    X.append(past)
    y.append(future)
  return np.array(X), np.array(y)

In [None]:
seq_length = 5
X_train, y_train = classification_series(train.values, train_label, seq_length, 1, seq_length, hist=True)
X_val, y_val = classification_series(validation.values, validation_label, seq_length, 1, seq_length, hist=True)
X_test, y_test = classification_series(test.values, test_label, seq_length, 1, seq_length, hist=True)

X_train = torch.from_numpy(X_train).float()
y_train = torch.from_numpy(y_train)


X_val = torch.from_numpy(X_val).float()
y_val = torch.from_numpy(y_val)

X_test = torch.from_numpy(X_test).float()
y_test = torch.from_numpy(y_test)

In [None]:
input_size =  72  # Number of features used as input. (Number of columns)
hidden_size = 1  # Number of features in last hidden state ie. number of output time-steps to predict.
output_size = 3  # Number of classes

# Instantiate the model with hyperparameters
model = RNNClassifier(input_dim=input_size, hidden_dim=hidden_size, layer_dim=1, output_dim=3)
model = model

# Define hyperparameters
lr=0.05

# Define Loss, Optimizer
criterion = nn.CrossEntropyLoss()
learning_rate = 0.005
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [None]:
def multi_acc(y_pred, y_test):
    y_pred_softmax = torch.log_softmax(y_pred, dim = 1)
    _, y_pred_tags = torch.max(y_pred_softmax, dim = 1)    
    
    correct_pred = (y_pred_tags == y_test).float()
    acc = correct_pred.sum() / len(correct_pred)
    
    acc = torch.round(acc * 100)
    
    return acc

In [None]:
num_epochs = 20
min_valid_loss = np.inf
losses = []
valid_losses = []
n_batches = X_train.shape[0]

train_loss = []
validation_loss = []

val_predictions = []
true_val_vals = []

train_preds_acc = []

for epoch in range(num_epochs):
  
  for batch_idx, seq in enumerate(X_train):
    model.train()
    cumulative_loss = 0.0
    
    for i in range(len(seq)):
      # Load time step of sequence as tensor with gradient accumulation abilities
      X = (
          seq[i, :].view(-1, 1, 72).requires_grad_()
      )  # only variables value, no NDVI fed in input

      optimizer.zero_grad()

      # Forward pass to get output/logits
      # outputs.size() --> 100, 10
      outputs = model(X)
    
      true_target = y_train[batch_idx, i].view(-1, 1)
      cumulative_loss += criterion(true_target, outputs.view(-1,1))
      losses.append(cumulative_loss.item())
      train_preds_acc.append( multi_acc(true_target, outputs.view(-1,1)))

    # Update weights after reading all sequences in batch
    loss = cumulative_loss
    # Getting gradients w.r.t. parameters
    loss.backward()

    # Updating parameters
    optimizer.step()

  # Print Loss
  #print("Epoch: {}. Last batch training loss: {}".format(epoch+1, loss.item()) + "Last batch validation loss: {}".format(epoch+1, valid_loss.item()))

  model.eval()
  for batch_idx, seq in enumerate(X_val): 
    cumulative_valid_loss = 0.0
    for i in range(len(seq)):
      X = (seq[i, :].view(-1, 1, input_size))  

      # Forward pass to get output/logits
      # outputs.size() --> 100, 10
      outputs = model(X)

      true_target = y_val[batch_idx, i].view(-1, 1)
      cumulative_valid_loss += criterion(true_target, outputs.view(-1,1))
    
      valid_losses.append(cumulative_valid_loss.item())
  # Print Loss
  #print("Epoch: {}. Last batch validation loss: {}".format(epoch+1, valid_loss.item()))

  train_loss.append(np.mean(losses))
  validation_loss.append(np.mean(valid_losses))
  print("Epoch: {}. Mean training loss: {}".format(epoch+1, np.mean(losses)) + "   Mean validation loss: {}".format(np.mean(valid_losses)))


In [None]:
train_predictions = []
true_train_vals  =[]

# Iterate through train dataset
for batch_idx, seq in enumerate(X_train):
    X = seq[:, :].view(-1, 1, input_size) # only variables value, no NDVI fed in input

    # Forward pass only to get logits/output
    outputs = model(X)
    true_target = y_train[batch_idx]

    #if i == len(seq)-1: # prendo ultimo sample della sequenza per plottare
    train_predictions.append(outputs)
    true_train_vals.append(true_target)


preds = [p.detach().numpy() for p in train_predictions]
trues = [v.numpy() for v in true_train_vals]
preds = [item for p in preds for item in p]
trues = [item for t in trues for item in t]

In [None]:
plot_conmat(np.array(trues).argmax(axis=1), np.array(preds).argmax(axis=1), 'Recurrent classification results on train set in percentage')

In [None]:
val_predictions = []
true_val_vals  =[]

# Iterate through train dataset
for batch_idx, seq in enumerate(X_val):
    X = seq[:, :].view(-1, 1, input_size) # only variables value, no NDVI fed in input

    # Forward pass only to get logits/output
    outputs = model(X)
    true_target =  y_val[batch_idx]

    #if i == len(seq)-1: # prendo ultimo sample della sequenza per plottare
    val_predictions.append(outputs)
    true_val_vals.append(true_target)


preds = [p.detach().numpy() for p in train_predictions]
trues = [v.numpy() for v in true_train_vals]
preds = [item for p in preds for item in p]
trues = [item for t in trues for item in t]

In [None]:
plot_conmat(np.array(trues).argmax(axis=1), np.array(preds).argmax(axis=1), 'Recurrent classification results on validation set in percentage')

In [None]:
predictions = []
true_values  =[]

# Iterate through train dataset
for batch_idx, seq in enumerate(X_test):
    X = seq[:, :].view(-1, 1, input_size) # only variables value, no NDVI fed in input

    # Forward pass only to get logits/output
    outputs = model(X)
    true_target = y_test[batch_idx]

    #if i == len(seq)-1: # prendo ultimo sample della sequenza per plottare
    predictions.append(outputs)
    true_values.append(true_target)


preds = [p.detach().numpy() for p in predictions]
trues = [v.numpy() for v in true_values]
preds = [item for p in preds for item in p]
trues = [item for t in trues for item in t]

In [None]:
plot_conmat(np.array(trues).argmax(axis=1), np.array(preds).argmax(axis=1), 'Recurrent classification results on test set in percentage')

In [None]:
train_val = pd.concat([train,validation])

train_set = train_val.iloc[int(len(train_val)*0.2):, :]
train_set_label = np.vstack([train_label, validation_label])[int(len(train_val)*0.2):]

seq_length = 5
X_train, y_train = classification_series(train_set.values, train_set_label, seq_length, 1, seq_length, hist=True)
X_test, y_test = classification_series(test.values, test_label, seq_length, 1, seq_length, hist=True)

X_train = torch.from_numpy(X_train).float()
y_train = torch.from_numpy(y_train)


X_val = torch.from_numpy(X_val).float()
y_val = torch.from_numpy(y_val)

X_test = torch.from_numpy(X_test).float()
y_test = torch.from_numpy(y_test)

In [None]:
class RNN(nn.Module):
    # implement RNN from scratch rather than using nn.RNN
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()
        
        self.hidden_size = hidden_size
        self.i2h = nn.Linear(input_size + hidden_size, hidden_size)
        self.i2o = nn.Linear(input_size + hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)
        
    def forward(self, input_tensor, hidden_tensor):
        combined = torch.cat((input_tensor, hidden_tensor), 1)
        
        hidden = self.i2h(combined)
        output = self.i2o(combined)
        output = self.softmax(output)
        return output, hidden
    
    def init_hidden(self):
        return torch.zeros(1, self.hidden_size)

In [None]:
X_train[0,0]

In [None]:
n_hidden = 72
rnn = RNN(72, n_hidden, 3)

# one step
input_tensor = X_train[0,0].view(1,-1)
hidden_tensor = rnn.init_hidden()

output, next_hidden = rnn(input_tensor, hidden_tensor)

print(output.size())
print(next_hidden.size())

In [None]:
categories = ['Normal', 'Good' , 'Bad']

def category_from_output(output):
    category_idx = torch.argmax(output).item()
    return categories[category_idx]

In [None]:
criterion = nn.NLLLoss()
learning_rate = 0.005
optimizer = torch.optim.SGD(rnn.parameters(), lr=learning_rate)

In [None]:
def train(seq, target_seq):
    hidden = rnn.init_hidden()
    cumulative_loss = 0.0
    for i in range(len(seq)):
      input_tensor = seq[i].view(1,-1)
      output, next_hidden = rnn(input_tensor, hidden_tensor)
      
      loss = criterion(output, target_seq[i].view(1,-1))
      cumulative_loss += loss
    
    return cumulative_loss.item()

In [None]:
output.long()

In [None]:
criterion(output.lon.flatten(), y_train[0,0])

In [None]:
current_loss = 0
train_loss = []
n_iters = 50
for i in range(n_iters):
    
    for batch, seq in enumerate(X_train):
      optimizer.zero_grad()
      cumulative_loss = train(seq, y_train[batch])
      train_loss.append(cumulative_loss.item())
      # Update weights after reading all sequences in batch
      # Getting gradients w.r.t. parameters
      cumulative_loss.backward()

      # Updating parameters
      optimizer.step()
        