In [1]:
import pandas as pd
import plotly.express as px
from plotly.offline import plot

# Purpose of this notebook

### This notebook plot the results of my investigation on the computation time in inference and backward on differents recurrents neural networks using Pytorch.

We have 4 differents models : 
<br>
<br>
GRU -> Gated Recurrent Unit
<br>
LSTM -> Long Short Term Memory 
<br>
LiGRU -> Ligth Gated Recurrent Unit
<br>
Compiled Ligru -> Ligth Gated Recurrent Unit compiled with torchscript
<br>
<br>
We investigate the results of these models on differents time step (from 5 to 10k) and input shape (100).

# Benchmark models

To benchmark these models I have created a pipeline in src/ you juste have to modify values in the config.py and run the benchmark.py with python benchmark.py

The benchmark.py will call the train method in engine.py that will create the csv if he is not created in /data and if he is already there he will juste append a new row

If you modify the time step & the input shape the csv will be automatically generated has : 
<b>CSV_PATH = "../data/SeqLen" + str(SEQ_LEN_MAX) + "InputShape" + str(INPUT_SHAPE) + ".csv"</b>


# Setup Benchmark

To benchmark these models I have used : 
<br>
GPU : 1070 Ti 8 Gb VRAM
<br>
CPU : i5-8500
<br>
RAM : 16 Gb 
<br>

On each models the same X & Y vectors has been compute with a seed of 27. (you can change the seed in engine.py)

# Config.py general settings

### seq_len parameter 
SEQ_LEN_MAX = 10_005
<br>
SEQ_LEN_STEP = 1_000
<br>
SEQ_LEN = 5

### input parameters
BATCH_SIZE = 1
<br>
INPUT_SHAPE = 100
<br>
SEED_NUMBER = 27
### model parameters
NUM_LAYERS = 5
<br>
HIDDEN_SIZE = 32
<br>
INPUT_NUMBER = 1

## Load CSV

In [10]:
df_kernel_forward = pd.read_csv("../data/SeqLen6000InputShape1000v1.csv")

 ## ----------------------------------------- TRAINING TIME RESULTS -----------------------------------------

## Plot training time results

In [11]:
fig = px.line(df_kernel_forward,y="Time_Inference",
                 x="Time_Step",
                 color='Model')

fig.update_yaxes(title_text="Time Training in seconds")

fig.update_layout(title="Traning time - 8k Time Step - 1000 Input Shape",
                 font_family="Rockwell")

In [4]:
df = pd.read_csv("../data/SeqLen8000InputShape2000cupy_jit_true.csv")

In [5]:
df = df[(df["Model"] == "LSTM") | (df["Model"] == "ligru") | (df["Model"] == "ligru_compiled") | (df["Model"] == "ligru_cupy_jit")]

In [6]:
fig = px.line(df,y="Training (s)",
                 x="Timestep",
                 color='Model')

fig.update_yaxes(title_text="Seconds")

fig.update_layout(title="Backward Pass - 4 Layers, 1024 Hidden Size, 2000 Input Shape",
                 font_family="Rockwell")

In [7]:
fig = px.line(df,y="Inference (s)",
                 x="Timestep",
                 color='Model')

fig.update_yaxes(title_text="Seconds")

fig.update_layout(title="Forward Pass - 4 Layers, 1024 Hidden Size, 2000 Input Shape",
                 font_family="Rockwell")

In [8]:
df["inf_train"] = df["Training (s)"] + df["Inference (s)"]

In [9]:
fig = px.line(df,y="inf_train",
                 x="Timestep",
                 color='Model')

fig.update_yaxes(title_text="Seconds")

fig.update_layout(title="Training (Inference + Backward)",
                 font_family="Rockwell")

In [31]:
c_mean_train = df[df["Model"] == "ligru_compiled"]["Training (s)"].mean()  

In [32]:
a_mean_train = df[df["Model"] == "ligru_cupy_jit"]["Training (s)"].mean()

In [33]:
c_mean_inference = df[df["Model"] == "ligru_compiled"]["Inference (s)"].mean()  

In [34]:
a_mean_inference = df[df["Model"] == "ligru_cupy_jit"]["Inference (s)"].mean()

In [35]:
val_compiled = c_mean_train + c_mean_inference

In [36]:
val_cupy = a_mean_train + a_mean_inference

In [37]:
val_compiled 

4.7596998244524

In [40]:
val_cupy * 2.75

4.536660875193775

In [41]:
b_mean_train = df[df["Model"] == "ligru"]["Training (s)"].mean()  
b_mean_inference = df[df["Model"] == "ligru"]["Inference (s)"].mean()  
v_ligru = b_mean_train + b_mean_inference

In [42]:
v_ligru / val_cupy

4.640487077363094

In [46]:
val_cupy * 4.6

7.588596373051405

In [44]:
v_ligru

7.655387696623802

In [55]:
l_mean_train = df_autograd[df_autograd["Model"] == "ligru"]["Training (s)"].mean()  
l_mean_inference = df_autograd[df_autograd["Model"] == "ligru"]["Inference (s)"].mean()  

In [56]:
(l_mean_train + l_mean_inference) / val_cupy

3.9134546354473168

In [57]:
val_compiled / val_cupy

2.4018110812064952

In [15]:
df_autograd["inf_train"] = df_autograd["Training (s)"] + df_autograd["Inference (s)"]

In [16]:
fig = px.line(df_autograd,y="inf_train",
                 x="Timestep",
                 color='Model')

fig.update_yaxes(title_text="Inference + Backward")

fig.update_layout(title="T8k Time Step - 1000 Input Shape",
                 font_family="Rockwell")

 ## --------------------------------------- INFERENCE TIME RESULTS ---------------------------------------

## Plot inference time results

In [4]:
fig = px.line(dfCpu,y="Time Inference",
                 x="Time Step",
                 color='Model')

fig.update_yaxes(title_text="Time Inference in seconds")

fig.update_layout(title="Time Inference - CPU VERSION - 10k Time Step - 100 Input Shape - LSTM, GRU, LiGRU, CLiGRU",
                 font_family="Rockwell")

NameError: name 'dfCpu' is not defined

# *---------------------------------------- GPU RESULTS ----------------------------------------*

## Load CSV

In [5]:
# config given in introduction
dfGpu = pd.read_csv("../data/SeqLen10005InputShape100FinalGPU.csv")

# same config just 5 layers and 1024 hidden size
dfGpuHeavy = pd.read_csv("../data/../data/SeqLen10005InputShape100FinalGpuHeavy.csv")

 ## ----------------------------------------- TRAINING TIME RESULTS -----------------------------------------

## Plot training time results version light

In [6]:
fig = px.line(dfGpu,y="Time Training",
                 x="Time Step",
                 color='Model')

fig.update_yaxes(title_text="Time Training in seconds")

fig.update_layout(title="Time Training - GPU VERSION - 10k Time Step - 100 Input Shape - LSTM, GRU, LiGRU, CLiGRU",
                 font_family="Rockwell")

## Plot training time results version heavy

In [7]:
fig = px.line(dfGpuHeavy,y="Time Training",
                 x="Time Step",
                 color='Model')

fig.update_yaxes(title_text="Time Training in seconds")

fig.update_layout(title="Time Training - GPU VERSION - 10k Time Step - 100 Input Shape - LSTM, GRU, LiGRU, CLiGRU",
                 font_family="Rockwell")

 ## --------------------------------------- INFERENCE TIME RESULTS ---------------------------------------

## Plot inference time results version light

In [8]:
fig = px.line(dfGpu,y="Time Inference",
                 x="Time Step",
                 color='Model')

fig.update_yaxes(title_text="Time Inference in seconds")

fig.update_layout(title="Time Inference - GPU VERSION - 10k Time Step - 100 Input Shape - LSTM, GRU, LiGRU, CLiGRU",
                 font_family="Rockwell")

## Plot inference time results version heavy

In [9]:
fig = px.line(dfGpuHeavy,y="Time Inference",
                 x="Time Step",
                 color='Model')

fig.update_yaxes(title_text="Time Inference in seconds")

fig.update_layout(title="Time Inference - GPU VERSION - 10k Time Step - 100 Input Shape - LSTM, GRU, LiGRU, CLiGRU",
                 font_family="Rockwell")

# *-------------------------------- GPU AND CPU RESULTS ---------------------------------*

## Merge Both CSV

In [10]:
# load both csv files
dfCpu = pd.read_csv("../data/SeqLen10005InputShape100FinalCPU.csv")
dfGpu = pd.read_csv("../data/SeqLen10005InputShape100FinalGPU.csv")

# modify 'Model' column to let us know wich example is a cpu or gpu 
dfCpu["Model"] = dfCpu["Model"].apply(lambda x : x+"_cpu")
dfGpu["Model"] = dfGpu["Model"].apply(lambda x : x+"_gpu")

# create a new DataFrame 
df = dfCpu.append(dfGpu)

# new column gpu 
df["GPU"] = 1

# the first elements of our df are cpu so we set at 0 
df["GPU"][len(dfCpu):] = 0



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



 ## ----------------------------------------- TRAINING TIME RESULTS -----------------------------------------

## Plot training time results

In [11]:
fig = px.line(df,y="Time Training",
                 x="Time Step",
                 color='Model',line_dash="GPU")

fig.update_yaxes(title_text="Training Time in seconds")

fig.update_layout(title="Training Time - GPU & CPU VERSION - 10k Time Step - 100 Input Shape - LSTM, GRU, LiGRU, CLigru",
                 font_family="Rockwell")

 ## --------------------------------------- INFERENCE TIME RESULTS ---------------------------------------

## Plot inference time results

In [12]:
fig = px.line(df,y="Time Inference",
                 x="Time Step",
                 color='Model',line_dash="GPU")

fig.update_yaxes(title_text="Inference Time in seconds")

fig.update_layout(title="Inference Time - GPU & CPU VERSION - 10k Time Step - 100 Input Shape - LSTM, GRU, LiGRU, CLigru",
                 font_family="Rockwell")

# *--------------- GPU RESULTS  (5 layers and 1024 neurons)  -----------------------*

In [17]:
# same config just 5 layers and 1024 hidden size
dfGpuHeavy = pd.read_csv("../data/../data/SeqLen10005InputShape100GpuFinal.csv")

In [18]:
fig = px.line(dfGpuHeavy,y="Time Training",
                 x="Time Step",
                 color='Model')

fig.update_yaxes(title_text="Training Time in seconds")

fig.update_layout(title="Training Time - GPU VERSION - 10k Time Step - 100 Input Shape - LSTM, GRU, LiGRU, CLigru",
                 font_family="Rockwell")

In [20]:
fig = px.line(dfGpuHeavy,y="Time Inference",
                 x="Time Step",
                 color='Model')

fig.update_yaxes(title_text="Inference Time in seconds")

fig.update_layout(title="Inference Time - GPU VERSION - 10k Time Step - 100 Input Shape - LSTM, GRU, LiGRU, CLigru",
                 font_family="Rockwell")