### Import Statements

In [32]:
import os
import sys
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from numpy import array
import datetime
import time
import calendar
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from sklearn.model_selection import train_test_split

sys.path.append(os.path.abspath(os.path.join('..')))
from scripts.helper_functions import data_normalize

In [2]:
df = pd.read_csv('../data/clean_data.csv')

In [3]:
df.head()

Unnamed: 0,campaign_id,campaign_name,submission_date,description,campaign_objectives,kpis,placements,start_date,end_date,serving_locations,...,volume_agreed,gross_cost,agency_fee,percentage,net_cost,click-through-event,first_dropped,impression,engagement_rate,click_through_rate
0,12dc55z,Fox 9-1-1 S4 Premiere | Sensory Video | AV,19/12/2019 17:38,Hey @alicia below are the details for the upco...,Brand Awareness,Engagement Rate,320x480 (Fullscreen mobile / interstitial),13/01/2020,20/12/2019,US,...,0.0,0.0,Percentage,15.0,0.0,660,1196,8144,0.146857,0.551839
1,22yeess,Lionsgate Spiral Movie | Sensory Engagement | ...,21/04/2021 16:54,Hello Design Team - Below is the info for the ...,Brand Awareness\nAudience Engagement,VTR\nCTR\nEngagement Rate,320x480 (Fullscreen mobile / interstitial),30/04/2021,16/05/2021,US National,...,151515.15,50000.0,Percentage,0.0,50000.0,98,567,9335,0.060739,0.17284
2,2z6pk1p,VCA_FRIVOLE WAVE 2_2021_SG - CPE Algo,15/12/2020 23:57,Storyboard Stage : \r\nThe storyboards are cur...,Brand Awareness\nProduct Awareness\nMessage Re...,Engagement Rate,320x480 (Fullscreen mobile / interstitial)\n30...,11/01/2021,31/01/2021,Singapore,...,40336.0,25411.68,Percentage,15.0,21599.93,99,904,8997,0.100478,0.109513
3,3ej4hd8,Ad Sequence - Sensory | CPE | P1B7VZ3 - CPE Algo,22/04/2020 11:25,Hey Kentaro - below is the latest live campaig...,Brand Awareness\nProduct Awareness\nConsiderat...,CTR\nEngagement Rate,320x480 (Fullscreen mobile / interstitial),04/05/2020,31/08/2020,USA,...,5963333.33,1789000.0,Percentage,0.0,1789000.0,0,6,15,0.4,0.0
4,5qtwg2a,Lexus East | Philadelphia | Sensory Video | C...,05/10/2020 12:50,Hey team see details for the campaign below: \...,Brand Awareness,CTR\nEngagement Rate,320x480 (Fullscreen mobile / interstitial),12/10/2020,31/12/2020,USA,...,0.0,0.0,Percentage,15.0,0.0,1116,1369,7515,0.182169,0.815194


In [4]:
# convert submission_date to year, month, day, hour and minute
df['submission_date'] = pd.to_datetime(df['submission_date'])
df['submission_date_year'] = df['submission_date'].dt.year
df['submission_date_month'] = df['submission_date'].dt.month
df['submission_date_date'] = df['submission_date'].dt.day
df['submission_date_hour'] = df['submission_date'].dt.hour
df['submission_date_minute'] = df['submission_date'].dt.minute
df = df.drop(['submission_date'], axis=1)

In [5]:
# convert start_date to year, month, day, hour and minute
df['start_date'] = pd.to_datetime(df['start_date'], infer_datetime_format=True)
df['start_date_year'] = df['start_date'].dt.year
df['start_date_month'] = df['start_date'].dt.month
df['start_date_date'] = df['start_date'].dt.day
df = df.drop(['start_date'], axis=1)

In [6]:
# convert end_date to year, month, day, hour and minute
df['end_date'] = pd.to_datetime(df['end_date'], infer_datetime_format=True)
df['end_date_year'] = df['end_date'].dt.year
df['end_date_month'] = df['end_date'].dt.month
df['end_date_date'] = df['end_date'].dt.day
df = df.drop(['end_date'], axis=1)

In [7]:
data = df.drop(['campaign_id', 'campaign_name', 'description', 'campaign_objectives', 'kpis', 'placements', 'serving_locations', 'cost_centre', 'currency', 'agency_fee', 'percentage'], axis=1)

In [8]:
data

Unnamed: 0,black_white_audience_list_included,buy_rate,volume_agreed,gross_cost,net_cost,click-through-event,first_dropped,impression,engagement_rate,click_through_rate,...,submission_date_month,submission_date_date,submission_date_hour,submission_date_minute,start_date_year,start_date_month,start_date_date,end_date_year,end_date_month,end_date_date
0,0,0.3,0.0,0.0,0.0,660,1196,8144,0.146857,0.551839,...,12,19,17,38,2020,1,13,2019,12,20
1,1,0.33,151515.15,50000.0,50000.0,98,567,9335,0.060739,0.17284,...,4,21,16,54,2021,4,30,2021,5,16
2,0,0.63,40336.0,25411.68,21599.93,99,904,8997,0.100478,0.109513,...,12,15,23,57,2021,1,11,2021,1,31
3,1,0.3,5963333.33,1789000.0,1789000.0,0,6,15,0.4,0.0,...,4,22,11,25,2020,5,4,2020,8,31
4,1,0.4,0.0,0.0,0.0,1116,1369,7515,0.182169,0.815194,...,5,10,12,50,2020,10,12,2020,12,31
5,1,0.3,5963333.33,1789000.0,1789000.0,0,0,58,0.0,,...,4,22,11,25,2020,5,4,2020,8,31
6,1,0.4,0.0,0.0,0.0,850,1112,8038,0.138343,0.764388,...,5,10,12,50,2020,10,12,2020,12,31
7,1,0.4,531179.0,212471.6,212471.6,851,1256,7893,0.159128,0.677548,...,2,14,14,48,2020,3,1,2020,11,30
8,1,0.35,214285.71,75000.0,75000.0,198,1667,8126,0.205144,0.118776,...,12,5,18,54,2021,5,22,2021,6,20
9,1,0.49,64826.0,31764.74,27000.03,48,348,9604,0.036235,0.137931,...,12,22,6,24,2021,1,8,2021,2,18


In [9]:
data.dropna(inplace=True)
data = data.drop([48])

In [10]:
data

Unnamed: 0,black_white_audience_list_included,buy_rate,volume_agreed,gross_cost,net_cost,click-through-event,first_dropped,impression,engagement_rate,click_through_rate,...,submission_date_month,submission_date_date,submission_date_hour,submission_date_minute,start_date_year,start_date_month,start_date_date,end_date_year,end_date_month,end_date_date
0,0,0.3,0.0,0.0,0.0,660,1196,8144,0.146857,0.551839,...,12,19,17,38,2020,1,13,2019,12,20
1,1,0.33,151515.15,50000.0,50000.0,98,567,9335,0.060739,0.17284,...,4,21,16,54,2021,4,30,2021,5,16
2,0,0.63,40336.0,25411.68,21599.93,99,904,8997,0.100478,0.109513,...,12,15,23,57,2021,1,11,2021,1,31
3,1,0.3,5963333.33,1789000.0,1789000.0,0,6,15,0.4,0.0,...,4,22,11,25,2020,5,4,2020,8,31
4,1,0.4,0.0,0.0,0.0,1116,1369,7515,0.182169,0.815194,...,5,10,12,50,2020,10,12,2020,12,31
6,1,0.4,0.0,0.0,0.0,850,1112,8038,0.138343,0.764388,...,5,10,12,50,2020,10,12,2020,12,31
7,1,0.4,531179.0,212471.6,212471.6,851,1256,7893,0.159128,0.677548,...,2,14,14,48,2020,3,1,2020,11,30
8,1,0.35,214285.71,75000.0,75000.0,198,1667,8126,0.205144,0.118776,...,12,5,18,54,2021,5,22,2021,6,20
9,1,0.49,64826.0,31764.74,27000.03,48,348,9604,0.036235,0.137931,...,12,22,6,24,2021,1,8,2021,2,18
10,1,0.4,0.0,0.0,0.0,775,1066,8159,0.130653,0.727017,...,5,10,12,50,2020,10,12,2020,12,31


In [11]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 42 entries, 0 to 51
Data columns (total 21 columns):
 #   Column                              Non-Null Count  Dtype  
---  ------                              --------------  -----  
 0   black_white_audience_list_included  42 non-null     int64  
 1   buy_rate                            42 non-null     float64
 2   volume_agreed                       42 non-null     float64
 3   gross_cost                          42 non-null     float64
 4   net_cost                            42 non-null     float64
 5   click-through-event                 42 non-null     int64  
 6   first_dropped                       42 non-null     int64  
 7   impression                          42 non-null     int64  
 8   engagement_rate                     42 non-null     float64
 9   click_through_rate                  42 non-null     float64
 10  submission_date_year                42 non-null     int64  
 11  submission_date_month               42 non-null

In [13]:
data_n = data_normalize(data)

In [14]:
target_n = data_n['click_through_rate']
len(target_n)

42

In [15]:
data_n.drop(['click_through_rate'], axis=1, inplace=True)
data_n.head()

Unnamed: 0,black_white_audience_list_included,buy_rate,volume_agreed,gross_cost,net_cost,click-through-event,first_dropped,impression,engagement_rate,submission_date_year,submission_date_month,submission_date_date,submission_date_hour,submission_date_minute,start_date_year,start_date_month,start_date_date,end_date_year,end_date_month,end_date_date
0,0.0,0.001082,0.0,0.0,0.0,0.445645,0.671356,0.831441,0.355018,0.0,1.0,0.571429,0.73913,0.641509,0.0,0.0,0.413793,0.0,1.0,0.56
1,1.0,0.002164,0.025408,0.027949,0.027949,0.066172,0.317389,0.953258,0.1356,1.0,0.272727,0.642857,0.695652,0.943396,1.0,0.272727,1.0,1.0,0.363636,0.4
2,0.0,0.012982,0.006764,0.014204,0.012074,0.066847,0.507034,0.918687,0.23685,0.5,1.0,0.428571,1.0,1.0,1.0,0.0,0.344828,1.0,0.0,1.0
3,1.0,0.001082,1.0,1.0,1.0,0.0,0.001688,0.0,1.0,0.5,0.272727,0.678571,0.478261,0.396226,0.0,0.363636,0.103448,0.5,0.636364,1.0
4,1.0,0.004688,0.0,0.0,0.0,0.753545,0.768711,0.767106,0.44499,0.5,0.363636,0.25,0.521739,0.867925,0.0,0.818182,0.37931,0.5,1.0,1.0


In [16]:
data_n.values.tolist()
target_n.tolist()
x_train,x_test,y_train,y_test = train_test_split(data_n,target_n, test_size=0.2, random_state=4)
x_train.shape

(33, 20)

In [17]:
x_train= np.array(x_train)
y_train= np.array(y_train)

x_train = x_train.reshape((1,33, 20))
y_train = y_train.reshape((1,33, 1))

x_test= np.array(x_test)
y_test= np.array(y_test)

x_test = x_test.reshape((1,9, 20))
y_test = y_test.reshape((1,9, 1))

In [18]:
model = Sequential()

2022-08-10 16:15:27.067184: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [19]:
model.add(LSTM(16, input_shape=(x_train.shape[1], x_train.shape[2]),return_sequences=True))
model.add(LSTM(1, return_sequences=False))
model.add(Dense(1))

model.compile(optimizer = 'adam', loss = 'mean_squared_error')

In [20]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 33, 16)            2368      
                                                                 
 lstm_1 (LSTM)               (None, 1)                 72        
                                                                 
 dense (Dense)               (None, 1)                 2         
                                                                 
Total params: 2,442
Trainable params: 2,442
Non-trainable params: 0
_________________________________________________________________


In [22]:
history = model.fit(x_train, y_train, epochs=50, batch_size=32)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [24]:
model.predict(x_train)



array([[0.30824724]], dtype=float32)

In [25]:
results = model.predict(x_test)

