In [0]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import folium
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable

In [0]:
df=pd.read_csv('/content/train.csv')
df.head(10)

Unnamed: 0,EventId,Occurrence Local Date Time,Reporting Agency,Cause,Subcause,Status,longitude,latitude,road_segment_id
0,60558,01/01/16 00:53,Cam,Stationary Vehicle,Vehicle On Shoulder,Closed,18.5408955032,-33.888275,S0B3CGQ
1,60559,01/01/16 00:54,CAMERA,Accident,With A Fixed Object,Closed,18.9307563219,-34.140857,RYJYAPI
2,60560,01/01/16 02:26,Law Enforcement,Accident,Multi Vehicle,Closed,18.5533575029,-33.959154,U3KP57C
3,60561,01/01/16 02:56,CAMERA,Stationary Vehicle,Vehicle On Shoulder,Closed,18.6775561589,-33.895258,RY0TRQ8
4,60562,01/01/16 03:40,CAMERA,Accident,Multi Vehicle,Closed,18.8371319682,-34.087051,8LOVJZ3
5,60564,01/01/16 06:32,,Stationary Vehicle,Vehicle On Shoulder,Closed,18.6384711081,-33.885498,X4UA382
6,60565,01/01/16 07:05,camera,Accident,Single Vehicle,Closed,18.4637854567,-33.943158,0QR8FDW
7,60567,01/01/16 07:39,camera,Police and Military,Road Rage,Closed,18.6359671258,-34.002366,DZABHQW
8,60568,01/01/16 08:00,camera,Stationary Vehicle,Vehicle On Shoulder,Closed,18.6350138684,-34.002237,EKZN1VM
9,60569,01/01/16 08:44,SAPS,Accident,Single Vehicle,Closed,18.4906240725,-33.949284,H9XYX9Q


In [0]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 53845 entries, 0 to 53844
Data columns (total 9 columns):
EventId                       53845 non-null int64
Occurrence Local Date Time    53845 non-null object
Reporting Agency              52226 non-null object
Cause                         53845 non-null object
Subcause                      53845 non-null object
Status                        53845 non-null object
longitude                     53845 non-null object
latitude                      53845 non-null float64
road_segment_id               53845 non-null object
dtypes: float64(1), int64(1), object(7)
memory usage: 3.7+ MB


In [0]:
df.drop(['Reporting Agency'],axis=1,inplace=True)

In [0]:
df['Occurrence Local Date Time']=pd.to_datetime(df['Occurrence Local Date Time'])

In [0]:
df['Occurrence Local Date Time']=df['Occurrence Local Date Time'].dt.strftime('%Y-%m-%d %H:00:00')

In [0]:
df.longitude=pd.to_numeric(df.longitude,errors='coerce')

In [0]:
df.describe()

Unnamed: 0,EventId,longitude,latitude
count,53845.0,53840.0,53845.0
mean,106497.340403,18.604685,-33.94796
std,27960.354177,0.113499,0.511146
min,60558.0,18.424719,-34.153087
25%,83043.0,18.52364,-33.993621
50%,107083.0,18.600709,-33.95025
75%,131294.0,18.656355,-33.916789
max,152149.0,19.149927,18.839681


In [0]:
df['road_segment_id']

0        S0B3CGQ
1        RYJYAPI
2        U3KP57C
3        RY0TRQ8
4        8LOVJZ3
          ...   
53840    CZBWW0Z
53841    7AKUBY4
53842    YQCBO2Y
53843    Z2OSI9E
53844    GFX7S4B
Name: road_segment_id, Length: 53845, dtype: object

In [0]:
crossdata=pd.crosstab(df['Occurrence Local Date Time'],df['road_segment_id'])
crossdata=crossdata.reindex(df['road_segment_id'].unique(),axis=1)

In [0]:
crossdata

In [0]:
crossdata.index.min() +"  "+crossdata.index.max()

'2016-01-01 00:00:00  2018-12-31 23:00:00'

In [0]:
def create_inout_sequences_extarnal_data(input_data, tw):
    inout_seq_external_data = []
    L = len(input_data)
    for i in range(L-tw):
        train_seq_external_data = input_data[i:i+tw]
        train_label = input_data[i+tw:i+tw+1]
        inout_seq_external_data.append((train_seq_external_data,train_label))   
    return inout_seq_external_data

In [0]:
timesseries=[]
for zone in crossdata.columns :
   timesseries.append(create_inout_sequences_extarnal_data(torch.tensor(crossdata[zone].values),8))

timesseries    

In [0]:
torch.cat(timesseries[0][0][1])

In [0]:


inputlstm=torch.cat(timesseries)

In [0]:
inputlstm[0]

In [0]:
class Args:
    def __init__(self):
        self.cuda = True
        self.no_cuda = False
        self.seed = 1
        self.batch_size = 50
        self.test_batch_size = 1000
        self.epochs = 10
        self.lr = 0.01
        self.momentum = 0.5
        self.log_interval = 10
        self.inputsize=1
        self.hidden_size=128
        self.output_size_linear_lstm=1
        self.batsh_size=499
        self.seq_len=8
        self.date_rng = pd.to_datetime(pd.date_range(start='2019-08-01 00:00:00', end='2019-10-29 00:00:00', freq='H'))
        self.segment_id=499

args = Args()

In [0]:

#model DMLVST_DATA
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
        
    def forward(self,y_hats,y):
        return torch.sum((y-y_hats)**2)
        #return torch.sqrt(self.mse(yhat,y))

args.cuda = not args.no_cuda and torch.cuda.is_available()

torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)

kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}

class HackLstm(nn.Module):
    def __init__(self,inputsize,hidden_size,output_size_linear_lstm,batsh_size,seq_len):
        super(HackLstm, self).__init__()
        self.batsh_size=batsh_size
        self.inputsize=inputsize
        self.seq_len=seq_len
        self.hidden_size=hidden_size
        self.output_size_linear_lstm=output_size_linear_lstm
        self.lstm = nn.LSTM(inputsize,self.hidden_size)

        self.linear = nn.Linear(self.hidden_size,output_size_linear_lstm)

        self.hidden_cell = (torch.zeros(1,self.batsh_size,self.hidden_size),
                            torch.zeros(1,self.batsh_size,self.hidden_size))
    
       
    def forward(self, x):
      
        lstm_out, self.hidden_cell = self.lstm(input_lstm, self.hidden_cell)
        predictions = self.linear(self.hidden_cell[0].view(self.batsh_size, -1))
      
        return  predictions 





 


In [0]:
model=HackLstm(args.inputsize,args.hidden_size,args.output_size_linear_lstm,args.batsh_size,args.seq_len)
criterion =RMSELoss()
#criterion = nn.MSELoss()
#optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001 )

for epoch in range (3) :
   total_loss = []
   #model.train()
   for i in range(12000)  :
        input_data=[]
        y=[]
        for j in range(args.segment_id) :
            input_data.append(timesseries[j][i][0])
            y.append(timesseries[j][i][1])
        x_train_zones_seq=torch.cat(input_data)
        x_train_zones_seq=x_train_zones_seq.view(args.seq_len,args.segment_id,1)
        y_train_zones_seq=torch.from_numpy(np.array(y)).view(args.segment_id,-1) 
        model.hidden_cell[0].detach_()
        model.hidden_cell[1].detach_()
        optimizer.zero_grad()
        output = model(x_train_zones_seq.float())
        loss  = criterion(output, y_train_zones_seq.float())
        loss.backward(retain_graph=False)
        optimizer.step()
        total_loss.append(loss.item())
   print('sequence : ',i,'Avrage sequences loss :',(sum(total_loss)/len(total_loss)), 'The loss of the last sequence :', loss.item())    
   if epoch % 3 == 0:
           print('epoch: ',epoch,'Avrage sequences loss :',(sum(total_loss)/len(total_loss)), 'The loss of the last sequence :', loss.item())