# AMAL - TP 4 - Réseaux récurrents

In [1]:
import numpy as np
import torch
from datamaestro import prepare_dataset 
from torch.utils.tensorboard import SummaryWriter
import time
import csv
import pandas as pd
import os



In [2]:
EPS = 0.1

# Implémentation du RNN

In [3]:
class RNN_old(torch.nn.Module):
    def __init__(self, dim, latent):
        super(RNN, self).__init__()
        
        self.Wi = torch.nn.Linear(dim, latent, bias=False)
        self.Wh = torch.nn.Linear(latent, latent)
        self.latent = latent
    
    def one_step(self, x, h):
        return torch.nn.Tanh()(self.Wi(x) + self.Wh(h)) 
       # return torch.nn.Tanh()(self.Wi(x) + self.Wh(h)) + h ####
    
    def forward(self, x, h):
        length, batch = x.shape[0], x.shape[1] 
        hs = torch.zeros(length, batch, self.latent)
        for i in range(length):
            h = self.one_step(x[i], h)
            hs[i] = h
        return hs

In [3]:
class RNN(torch.nn.Module):
    def __init__(self, dim, latent):
        super(RNN, self).__init__()
        
        self.Wi = torch.nn.Linear(dim, latent, bias=False)
        self.Wh = torch.nn.Linear(latent, latent)
        self.latent = latent
    
    def one_step(self, x, h):
        return torch.nn.Tanh()(self.Wi(x) + self.Wh(h)) 
       # return torch.nn.Tanh()(self.Wi(x) + self.Wh(h)) + h ####
    
    def forward(self, x, h):
        length, batch = x.shape[0], x.shape[1] 
        hs = torch.zeros(length, batch, self.latent)
        hs[0] = self.one_step(x[0], h)
        for i in range(1, length):
            hs[i] = self.one_step(x[i], hs[i-1])
        return hs

Test:

In [4]:
def test():
    batch = 10
    dim = 5
    latent = 3
    length = 6
    x = torch.randn(length, batch, dim, requires_grad=True,dtype=torch.float)
    h = torch.randn(batch, latent, requires_grad=True,dtype=torch.float)
    rnn = RNN(dim, latent)
    print(rnn.forward(x, h).shape)
    
test()

torch.Size([6, 10, 3])


## Exo 1

In [46]:
n_cities = 2
dim = 1
batch = 1
latent = 2
length = 1

In [47]:
temp_data_init = pd.read_csv('tempAMAL_train.csv')
temp_data_init.head()

Unnamed: 0,datetime,Vancouver,Portland,San Francisco,Seattle,Los Angeles,San Diego,Las Vegas,Phoenix,Albuquerque,...,Detroit,Jacksonville,Charlotte,Miami,Pittsburgh,Toronto,Philadelphia,New York,Montreal,Boston
0,2012-10-01 13:00:00,284.63,282.08,289.48,281.8,291.87,291.53,293.41,296.6,285.12,...,284.03,298.17,288.65,299.72,281.0,286.26,285.63,288.22,285.83,287.17
1,2012-10-01 14:00:00,284.629041,282.083252,289.474993,281.797217,291.868186,291.533501,293.403141,296.608509,285.154558,...,284.069789,298.20523,288.650172,299.732518,281.024767,286.262541,285.663208,288.247676,285.83465,287.186092
2,2012-10-01 15:00:00,284.626998,282.091866,289.460618,281.789833,291.862844,291.543355,293.392177,296.631487,285.233952,...,284.173965,298.299595,288.650582,299.766579,281.088319,286.269518,285.756824,288.32694,285.84779,287.231672
3,2012-10-01 16:00:00,284.624955,282.100481,289.446243,281.782449,291.857503,291.553209,293.381213,296.654466,285.313345,...,284.27814,298.393961,288.650991,299.800641,281.15187,286.276496,285.85044,288.406203,285.860929,287.277251
4,2012-10-01 17:00:00,284.622911,282.109095,289.431869,281.775065,291.852162,291.563063,293.370249,296.677445,285.392738,...,284.382316,298.488326,288.651401,299.834703,281.215421,286.283473,285.944057,288.485467,285.874069,287.322831


In [48]:
temp_data = pd.read_csv('tempAMAL_train.csv')
temp_data = temp_data.iloc[:,1: 1+n_cities]
data = np.asarray(temp_data)
cols = temp_data.columns

n_times = data.shape[0]
data[np.isnan(data)] = np.mean(data[~np.isnan(data)])
data = data / 280
#data *= 10 * n_times/ (np.linalg.norm(data)*length)
data -= np.mean(data)

In [49]:
class SelectLast(torch.nn.Module):
    def __init__(self):
        super(SelectLast, self).__init__()
        
    def forward(self, x):
        return x[-1]

In [50]:
def test_select_last():
    select_last = SelectLast()
    x = torch.randn(5, 3)
    print(select_last(x).shape)

test_select_last()

torch.Size([3])


In [51]:
class mySequential(torch.nn.Sequential):
    def forward(self, *inputs):
        for module in self._modules.values():
            if type(inputs) == tuple:
                inputs = module(*inputs)
            else:
                inputs = module(inputs)
        return inputs

In [52]:
increment = np.repeat(np.arange(0, length), batch).reshape((length, batch))

In [53]:
model = mySequential(
    RNN(dim, latent),
    SelectLast(),
    torch.nn.Linear(latent, n_cities),
    torch.nn.Tanh()
    )

In [54]:
print(model[0].Wi.weight)
print(model[0].Wh.weight)

Parameter containing:
tensor([[ 0.2005],
        [-0.0374]], requires_grad=True)
Parameter containing:
tensor([[0.5842, 0.1970],
        [0.6545, 0.7026]], requires_grad=True)


In [55]:
model[2].weight

Parameter containing:
tensor([[ 0.5062, -0.4948],
        [ 0.3467,  0.6599]], requires_grad=True)

In [56]:
h0 = torch.zeros(batch, latent, requires_grad=True)
optim = torch.optim.SGD(params=model.parameters(),lr=10*EPS)
#optim = torch.optim.Adam(params=[x for x in model.parameters()] + [h0], lr=0.1,etas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)
loss = torch.nn.CrossEntropyLoss()

In [57]:
batch * length * 100 / (data.shape[0]*data.shape[1])

0.00449842555105713

In [58]:
n_train = 100
writer = SummaryWriter()

for i in range(n_train):
    # each time: we see batch * length data
    optim.zero_grad()
    random_cities = np.random.randint(0, n_cities, size=(batch))
 #   random_times = np.random.randint(0, n_times-length, size=(batch))
    random_times = np.zeros(batch, dtype=np.int)
    x_numpy = data[np.repeat(np.reshape(random_times, (1, batch)), length, axis=0)+increment, random_cities][:, :, np.newaxis]
    x = torch.from_numpy(x_numpy).float()
    l = loss(model(x, h0), torch.from_numpy(random_cities))
    l.backward()
   # print(model[0].Wh.weight.grad)
    optim.step()
    writer.add_scalar('Loss/train', l, i)

In [59]:
print(l)

tensor(0.6953, grad_fn=<NllLossBackward>)


In [60]:
print(model[0].Wi.weight.grad)
print(model[0].Wi.weight)

tensor([[-7.4252e-06],
        [ 1.6971e-05]])
Parameter containing:
tensor([[ 0.1982],
        [-0.0557]], requires_grad=True)


In [61]:
n_test = 100
# writer = SummaryWriter()
p_true = 0

for i in range(n_test):
    with torch.no_grad():
        random_cities = np.random.randint(0, n_cities, size=(batch))
      #  random_times = np.random.randint(0, n_times-length, size=(batch))
        random_times = np.zeros(batch, dtype=np.int)
        x_numpy = data[np.repeat(np.reshape(random_times, (1, batch)), length, axis=0)+increment, random_cities][:, :, np.newaxis]
        x = torch.from_numpy(x_numpy).float()
        predicted_cities = model(x, h0).detach().numpy()
        ok = np.sum(np.argmax(predicted_cities, axis=1) == random_cities) / batch
        p_true += ok
        
print(p_true / n_test)
        #random_cities
       # print(model[0].Wh.weight.grad)
       # writer.add_scalar('Loss/train', l, i)

0.39
