In [None]:
import csv
import math
import pandas as pd
import random
import gzip
import torch
from sklearn import metrics
from gensim.models import Word2Vec
from gensim.models.word2vec import LineSentence
import gensim
import multiprocessing
import numpy as np
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import os
import argparse
import warnings
import torch.nn.functional as F
from torch import optim
import seaborn as sns
import matplotlib.pyplot as plt
warnings.filterwarnings("ignore")
from sklearn.model_selection import train_test_split, cross_val_score, cross_val_predict
import csv
from statsmodels.tsa.stattools import acf
from numpy import array
from numpy import hstack

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

# Functions

In [None]:
# split a multivariate sequence into samples
def Convert_format1(data, n_steps_in, n_steps_out):
    n0=np.where(data['PeriodsSepLastTwoNnZeroDemands']>0)[0][1]
    n1=n0+int((data.shape[0]-n0)*90/100)
    in_seq1=data['ZNZDemand'].values[n0:]
    in_seq2=data['LastQtte'].values[n0:]
    in_seq3=data['WeekDay'].values[n0:]
    in_seq4=data['Interval'].values[n0:]
    in_seq5=data['PeriodsSepLastTwoNnZeroDemands'].values[n0:]
    in_seq6=data['Month'].values[n0:]
    in_seq1 = in_seq1.reshape((len(in_seq1), 1))
    in_seq2 = in_seq2.reshape((len(in_seq2), 1))
    in_seq3 = in_seq3.reshape((len(in_seq3), 1))
    in_seq4 = in_seq4.reshape((len(in_seq4), 1))
    in_seq5 = in_seq5.reshape((len(in_seq5), 1))
    in_seq6 = in_seq6.reshape((len(in_seq6), 1))
    # horizontally stack columns
    dataset = hstack((in_seq1, in_seq2,in_seq3,in_seq4,in_seq5,in_seq6))
    X = []
    for i in range(len(dataset)):
        # find the end of this pattern
        end_ix = i + n_steps_in
        out_end_ix = end_ix + n_steps_out
        # check if we are beyond the dataset
        if (out_end_ix-1) > len(dataset):
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = list(dataset[i:end_ix,1:]), list(dataset[end_ix-1:out_end_ix-1, 0])
        X.append([seq_x,seq_y])
    size=int((n1) / 5)
    train_data=X[:n1]
    test_data=X[n1:]
    firsttrain = X[:4*size]
    firstvalid = X[4*size:n1]
    alldata=train_data+test_data
    return firsttrain,firstvalid,train_data,test_data,alldata

In [None]:
class dataset_load(Dataset):
    def __init__(self,xy=None):
        self.x_data=np.asarray([el[0] for el in xy],dtype=np.float32)
        self.y_data =np.asarray([el[1] for el in xy ],dtype=np.float32)
        self.x_data = torch.from_numpy(self.x_data)
        self.y_data = torch.from_numpy(self.y_data)
        self.len=len(self.x_data)
      

    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]

    def __len__(self):
        return self.len

In [None]:
def make_confusion_matrix(cf,labels,pred,group_names=None,categories='auto',count=True,percent=True,cbar=True,xyticks=True,xyplotlabels=True,sum_stats=True,figsize=None,cmap='Blues',title=None):
    blanks = ['' for i in range(cf.size)]

    if group_names and len(group_names)==cf.size:
        group_labels = ["{}\n".format(value) for value in group_names]
    else:
        group_labels = blanks

    if count:
        group_counts = ["{0:0.0f}\n".format(value) for value in cf.flatten()]
    else:
        group_counts = blanks

    if percent:
        group_percentages = ["{0:.2%}".format(value) for value in cf.flatten()/np.sum(cf)]
    else:
        group_percentages = blanks

    box_labels = [f"{v1}{v2}{v3}".strip() for v1, v2, v3 in zip(group_labels,group_counts,group_percentages)]
    box_labels = np.asarray(box_labels).reshape(cf.shape[0],cf.shape[1])


    # CODE TO GENERATE SUMMARY STATISTICS & TEXT FOR SUMMARY STATS
    if sum_stats:
        #if it is a binary confusion matrix, show some more stats
        if len(cf)==2:
            #Metrics for Binary Confusion Matrices
            precision, recall, thresholds = precision_recall_curve(labels, pred)
            Auc_PRC=metrics.auc(recall, precision)
            Auc_ROC=metrics.roc_auc_score(labels, pred)
            stats_text = "\n\nAU_PRC={:0.3f}\nAU_ROC={:0.3f}".format(Auc_PRC,Auc_ROC)
        else:
            stats_text = "\n\nAccuracy={:0.3f}".format(accuracy)
    else:
        stats_text = ""


    # SET FIGURE PARAMETERS ACCORDING TO OTHER ARGUMENTS
    if figsize==None:
        #Get default figure size if not set
        figsize = plt.rcParams.get('figure.figsize')

    if xyticks==False:
        #Do not show categories if xyticks is False
        categories=False


    # MAKE THE HEATMAP VISUALIZATION
    plt.figure(figsize=figsize)
    sns.heatmap(cf,annot=box_labels,fmt="",cmap=cmap,cbar=cbar,xticklabels=categories,yticklabels=categories)

    if xyplotlabels:
        plt.ylabel('True label')
        plt.xlabel('Predicted label' + stats_text)
    else:
        plt.xlabel(stats_text)
    
    if title:
        plt.title(title)

# Network

In [None]:
class Deepnet(nn.Module):
    def __init__ (self,RNN,RNN_hidden_size,RNN_sigma,layer_size,N_sigma,dropprob,n_features):
        super(Deepnet,self).__init__()
        self.RNN=RNN
        self.RNN_hidden_size=RNN_hidden_size
        self.RNN_sigma=RNN_sigma
        
        self.layer_size=layer_size
        self.N_sigma=N_sigma
        
        self.input_channels=n_features
        self.dropprob=dropprob
        
        if self.RNN=='LSTM':
            self.rnn = nn.LSTM(self.input_channels, RNN_hidden_size, num_layers=1, bidirectional=False).to(device)
            self.FC_size= RNN_hidden_size
        elif self.RNN=='BiLSTM':
            self.rnn = nn.LSTM(self.input_channels, RNN_hidden_size, num_layers=1, bidirectional=True).to(device)
            self.FC_size= 2*RNN_hidden_size
        elif self.RNN=='GRU':
            self.rnn = nn.GRU(self.input_channels, RNN_hidden_size, num_layers=1, bidirectional=False).to(device)
            self.FC_size= RNN_hidden_size
        elif self.RNN=='BiGRU':
            self.rnn = nn.GRU(self.input_channels, RNN_hidden_size, num_layers=1, bidirectional=True).to(device)
            self.FC_size= 2*RNN_hidden_size
        for layer_p in self.rnn._all_weights:
            for p in layer_p:
                if 'weight' in p:
                    torch.nn.init.normal_(self.rnn.__getattr__(p),mean=0,std=RNN_sigma)
        
        #weights between LSTM or CNN or GRU layers and fully connected layer
        self.wHidden = torch.randn(self.FC_size, self.layer_size).to(device)
        self.wHiddenBias = torch.randn(self.layer_size).to(device)
        self.wHidden.requires_grad = True
        self.wHiddenBias.requires_grad = True
        
        #weights between the fully connected layer and the output node
        self.wNeu=torch.randn(self.layer_size,1).to(device)
        self.wNeuBias=torch.randn(1).to(device) 
        torch.nn.init.xavier_uniform(self.wNeu)
        torch.nn.init.xavier_uniform(self.wHidden)
        self.wNeu.requires_grad = True
        self.wNeuBias.requires_grad = True
        
        torch.nn.init.normal_(self.wNeu,mean=0,std=self.N_sigma)
        torch.nn.init.normal_(self.wNeuBias,mean=0,std=self.N_sigma)
        torch.nn.init.normal_(self.wHidden,mean=0,std=self.N_sigma)
        torch.nn.init.normal_(self.wHiddenBias,mean=0,std=self.N_sigma)
        
        self.dropout = torch.nn.Dropout(p=dropprob, inplace=False) #Dropout Layer (Dropout rate= p)
        self.max = torch.nn.MaxPool1d(3, stride=1) #Pooling layer (pooling size =3)
    def get_weights(self):
        ll = []
        for layer_p in self.rnn._all_weights:
            for p in layer_p:
                if 'weight' in p:
                    ll.append(self.rnn.__getattr__(p))
        return ll
    def forward(self,x):
        x=x.permute(1,0,2)
        output,_=self.rnn(x)
        if self.RNN=='BiLSTM' or self.RNN=='BiGRU':
            Normal_RNN=output[-1, :, :self.RNN_hidden_size]
            Rev_RNN=output[0, :, self.RNN_hidden_size:]
            x = torch.cat((Normal_RNN, Rev_RNN), 1)
            x=self.dropout(x)
        else:
            x = output[-1, :, :]
            x=self.dropout(x)
        x=x @ self.wHidden + self.wHiddenBias
        x=x.clamp(min=0)
        x=self.dropout(x)
        x=x @ self.wNeu + self.wNeuBias
        return (torch.sigmoid(x))

# Calibration

In [None]:
def logsampler(a,b):
        x=np.random.uniform(low=0,high=1)
        y=10**((math.log10(b)-math.log10(a))*x + math.log10(a))
        return y

In [None]:
def Calibration(RNN,w1,w0,firsttrain_loader,firstvalid_loader,n_features,metric):
    print('start')
    best_AUC = 0
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    print(device)
    learning_steps_list = [50, 100, 150, 200, 250, 300, 350, 400]
    for number in range(40):
        # hyper-parameters
        RNN_hidden_size_list = [20, 50, 80, 100]
        RNN_hidden_size = random.choice(RNN_hidden_size_list)
        dropoutList = [0, 0.15, 0.3, 0.45]
        dropprob = random.choice(dropoutList)
        layer_size_list = [32, 64]
        layer_size = random.choice(layer_size_list)
        learning_rate_list = [10**-5,10**-4,10**-3,10**-2]
        learning_rate=random.choice(learning_rate_list)
        RNN_sigma = logsampler(10 ** -4, 10 ** -2)
        N_sigma = logsampler(10 ** -4, 10 ** -2)
        model_auc = []
        model = Deepnet(RNN,RNN_hidden_size,RNN_sigma,layer_size,N_sigma,dropprob,n_features).to(device)
        optimizer = torch.optim.Adam(
                model.get_weights() + [model.wNeu, model.wNeuBias, model.wHidden, model.wHiddenBias],
                lr=learning_rate)

        learning_steps = 0
        while learning_steps <= 400:
            auc = []
            model.train()
            for i, (data, target) in enumerate(firsttrain_loader):

                data = data.to(device)
                target = target.to(device)

                # Forward pass
                output = model(data)
                loss = F.binary_cross_entropy(output, target,weight=((torch.abs((target)) * w1) - (torch.subtract(target,1) * w0)))
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                if (learning_steps % 50 == 0):

                    with torch.no_grad():
                        model.eval()
                        auc = []
                        for j, (data1, target1) in enumerate(firstvalid_loader):
                            data1 = data1.to(device)
                            target1 = target1.to(device)

                            # Forward pass
                            output = model(data1)

                            pred = output.cpu().detach().numpy().reshape(output.shape[0])
                            labels = target1.cpu().numpy().reshape(output.shape[0])
                            if output.shape[0] > 60:
                                if (metric=='ROC'):
                                    auc.append(metrics.roc_auc_score(labels, pred))
                                elif (metric=='PRC'):
                                    precision, recall, _ = precision_recall_curve(labels, pred)
                                    auc.append(metrics.auc(recall, precision))
                                else :
                                    print('Choose proper metric')
                                    break;
                        model_auc.append(np.mean(auc))

                        model.train()
            learning_steps += 1

        for n in range(8):
            AUC = model_auc[n]
            # print(AUC)
            if AUC > best_AUC:
                best_AUC = AUC
                best_learning_steps = learning_steps_list[n]
                best_LearningRate = learning_rate
                best_RNN_hidden_size=RNN_hidden_size
                best_dropprob = dropprob
                best_layer_size= layer_size
                best_RNN_sigma = RNN_sigma
                best_N_sigma=N_sigma

    print('best_AUC=', best_AUC)
    print('best_learning_steps=', best_learning_steps)
    print('best_LearningRate=', best_LearningRate)
    print('best_dropprob=', best_dropprob)
    print('best_RNN_hidden_size=', best_RNN_hidden_size)
    print('best_layer_size=', best_layer_size)
    print('best_RNN_sigma=', best_RNN_sigma)
    print('best_N_sigma=', best_N_sigma)

    best_hyperparameters = {'best_learning_steps': best_learning_steps, 
                            'best_LearningRate': best_LearningRate,
                            'best_dropprob': best_dropprob, 
                            'best_RNN_hidden_size': best_RNN_hidden_size,
                            'best_layer_size': best_layer_size, 
                            'best_RNN_sigma': best_RNN_sigma,
                            'best_N_sigma':best_N_sigma}
    return best_hyperparameters

In [None]:
def Train_model(RNN,w1,w0,best_hyperparameters,train_loader,n_features,metric):
    best_learning_steps=best_hyperparameters['best_learning_steps']
    best_LearningRate=best_hyperparameters['best_LearningRate']
    best_RNN_hidden_size=best_hyperparameters['best_RNN_hidden_size']
    best_dropprob=best_hyperparameters['best_dropprob']
    best_N_sigma=best_hyperparameters['best_N_sigma']
    best_RNN_sigma=best_hyperparameters['best_RNN_sigma']
    best_layer_size=best_hyperparameters['best_layer_size']
    best_AUC=0
    best_threshold=0.5
    for number_models in range(5):
        model = Deepnet(RNN,best_RNN_hidden_size,best_RNN_sigma,best_layer_size,best_N_sigma,best_dropprob,n_features).to(device)
        optimizer = torch.optim.Adam(
                model.get_weights() + [model.wNeu, model.wNeuBias, model.wHidden, model.wHiddenBias],
                lr=best_LearningRate)

        learning_steps=0
        model.train()
        while learning_steps<=best_learning_steps:
            
            for i, (data, target) in enumerate(train_loader):
                data = data.to(device)
                target = target.to(device)

                # Forward pass
                output = model(data)
                loss = F.binary_cross_entropy(output, target,weight=((torch.abs((target)) * w1) - (torch.subtract(target,1) * w0)))

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
            learning_steps+=1

        with torch.no_grad():
            model.eval()
            auc=[]
            threshold=[]
            for i, (data, target) in enumerate(train_loader):
                data = data.to(device)
                target = target.to(device)

                # Forward pass
                output = model(data)

                pred=output.cpu().detach().numpy().reshape(output.shape[0])
                labels=target.cpu().numpy().reshape(output.shape[0])
                if output.shape[0]>60:
                    if (metric=='ROC'):
                        auc.append(metrics.roc_auc_score(labels, pred))
                        fpr, tpr, thresholds = roc_curve(labels, pred)
                        gmeans=np.sqrt(tpr*(1-fpr))
                    elif (metric=='PRC'):
                        precision, recall, thresholds = precision_recall_curve(labels, pred)
                        auc.append(metrics.auc(recall, precision))
                        gmeans = (2*precision*recall)/(precision+recall)
                    else :
                        print('Choose proper metric')
                        break;
                    ix = np.argmax(gmeans)
                    threshold.append(thresholds[ix])
            #             
            AUC_training=np.mean(auc)
            print('AUC on training data for model ',number_models+1,' = ',AUC_training)
            if AUC_training>best_AUC:
                best_AUC=AUC_training
                best_threshold=np.mean(threshold)
                best_model=model
    return best_model,best_threshold

In [None]:
def test_predict(best_model,test_loader):

    with torch.no_grad():
        best_model.eval()
        auc = []

        for i, (data, target) in enumerate(test_loader):
            data = data.to(device)
            target = target.to(device)

            # Forward pass
            output = best_model(data)
            pred = output.cpu().detach().numpy().reshape(output.shape[0])
            myprob = "\n".join(map(str, pred[:]))
            labels = target.cpu().numpy().reshape(output.shape[0])
            if output.shape[0] > 50:
                auc.append(metrics.roc_auc_score(labels, pred))

        AUC_test = np.mean(auc)
    return (labels,pred)

In [None]:
evaluate_performance=True

# Product data path
path="./gen data/"
product="Prd1"
DATA_Micro=pd.read_csv(path+product+".csv", sep=';',encoding = "ISO-8859-1")

# choose a number of time steps
Nout=1
Nin=1
metric='ROC'
# Data process
n_steps_in, n_steps_out = Nin, Nout
firsttrain,firstvalid,train_data,test_data,alldata=Convert_format1(DATA_Micro,n_steps_in,n_steps_out)
n_features=firsttrain[0][0][0].shape[0]
firsttrain_dataset=dataset_load(firsttrain)
firstvalid_dataset=dataset_load(firstvalid)
train_dataset=dataset_load(train_data)
test_dataset=dataset_load(test_data)
all_dataset=dataset_load(alldata)
batch_size=64
firsttrain_loader = DataLoader(dataset=firsttrain_dataset,batch_size=batch_size,shuffle=False)
firstvalid_loader = DataLoader(dataset=firstvalid_dataset,batch_size=batch_size,shuffle=False)
train_loader = DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=False)
test_loader = DataLoader(dataset=test_dataset,batch_size=batch_size,shuffle=False)
alldata_loader = DataLoader(dataset=all_dataset,batch_size=10000,shuffle=False)

# Generate Confusion matrix

In [None]:
best_hyperparameters=Calibration(RNN,w1,w0,firsttrain_loader,firstvalid_loader,n_features,metric)
best_model,best_threshold=Train_model(RNN,w1,w0,best_hyperparameters,train_loader,n_features,metric)
labels,pred=test_predict(best_model,test_loader)
prediction_test=np.where(pred<best_threshold,0,pred)
prediction_test=np.where(prediction_test>best_threshold,1,prediction_test)
labels_train,pred_train=train_predict(best_model,alldata_loader)
prediction_train=np.where(pred_train<best_threshold,0,pred_train)
prediction_train=np.where(prediction_train>best_threshold,1,prediction_train)

In [None]:
sns.set(font_scale=1.5)
cf = confusion_matrix(labels_train,prediction_train)
Matrix_labels = ["TN","FP","FN","TP"]
categories = ["N", "P"]
make_confusion_matrix(cf,labels_train,pred_train, group_names=Matrix_labels,categories=categories, cmap="binary")

In [None]:
sns.set(font_scale=1.5)
cf = confusion_matrix(labels,prediction_test)
Matrix_labels = ["TN","FP","FN","TP"]
categories = ["N", "P"]
make_confusion_matrix(cf,labels,pred, group_names=Matrix_labels,categories=categories, cmap="binary")

# generate results for different w1 and save them in a csv file

In [None]:
# Model process
header = ['product', 'w1','LSTM', 'BiLSTM', 'GRU','BiGRU']
RNN_list=['LSTM','BiLSTM','GRU','BiGRU']
w1_list=[1,DATA_Micro['ZNZDemand'].value_counts()[0]/DATA_Micro['ZNZDemand'].value_counts()[1]/2,DATA_Micro['ZNZDemand'].value_counts()[0]/DATA_Micro['ZNZDemand'].value_counts()[1]]
w0=1
for w1 in w1_list:
    AUC_list=[]
    for RNN in RNN_list:
        best_hyperparameters=Calibration(RNN,w1,w0,firsttrain_loader,firstvalid_loader,n_features,metric)
        best_model,best_threshold=Train_model(RNN,w1,w0,best_hyperparameters,train_loader,n_features,metric)
        auc=test_predict(best_model,test_loader)
        AUC_list.append(auc)
    dicti={
    'product':product,
    'w1':w1,
    'LSTM':AUC_list[0],
    'BiLSTM':AUC_list[1],
    'GRU':AUC_list[2],
    'BiGRU':AUC_list[3]
    }
    # saving results
    try:
        df=pd.read_csv('Bi_LSTM_GRU.csv')
        new=False
    except:
        new=True
    if new:
        with open('Bi_LSTM_GRU.csv','w') as fd:
            writer = csv.writer(fd)
            writer.writerow(header)
            writer = csv.DictWriter(fd, fieldnames=header)
            writer.writerow(dicti)
    else:
        with open('Bi_LSTM_GRU.csv','a',newline='') as fd:
            writer = csv.DictWriter(fd, fieldnames=header)
            writer.writerow(dicti)