In [64]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import lightgbm as lgb
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout, SimpleRNN

In [65]:
db = pd.read_csv('./campaign1.csv')
db.columns

Index(['campaign_item_id', 'no_of_days', 'time', 'ext_service_id',
       'ext_service_name', 'creative_id', 'creative_width', 'creative_height',
       'search_tags', 'template_id', 'landing_page', 'advertiser_id',
       'advertiser_name', 'network_id', 'approved_budget',
       'advertiser_currency', 'channel_id', 'channel_name', 'max_bid_cpm',
       'network_margin', 'campaign_budget_usd', 'impressions', 'clicks',
       'stats_currency', 'currency_code', 'exchange_rate', 'media_cost_usd',
       'position_in_content', 'unique_reach', 'total_reach', 'search_tag_cat',
       'cmi_currency_code', 'timezone', 'weekday_cat', 'keywords'],
      dtype='object')

In [66]:
db.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 72612 entries, 0 to 72611
Data columns (total 35 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   campaign_item_id     72612 non-null  int64  
 1   no_of_days           72612 non-null  int64  
 2   time                 72612 non-null  object 
 3   ext_service_id       72612 non-null  int64  
 4   ext_service_name     72612 non-null  object 
 5   creative_id          72612 non-null  int64  
 6   creative_width       69200 non-null  float64
 7   creative_height      69200 non-null  float64
 8   search_tags          72612 non-null  object 
 9   template_id          69200 non-null  float64
 10  landing_page         72612 non-null  object 
 11  advertiser_id        72612 non-null  int64  
 12  advertiser_name      72612 non-null  object 
 13  network_id           72612 non-null  int64  
 14  approved_budget      72206 non-null  float64
 15  advertiser_currency  72612 non-null 

In [67]:
db.drop(['campaign_item_id', 
         'ext_service_id', 
         'creative_id',
         'advertiser_id',
         'channel_id', 
         'channel_name', 
         'timezone', 
         'search_tags', 
         'template_id', 
         'network_margin', 
         'exchange_rate', 
         'network_id', 
         'landing_page',
         'cmi_currency_code',
         'advertiser_name', 
         'keywords',
         'search_tag_cat',
         'stats_currency',
         'position_in_content',
         'total_reach',
         'max_bid_cpm',
         'unique_reach' 
         ], axis=1, inplace=True)
db

Unnamed: 0,no_of_days,time,ext_service_name,creative_width,creative_height,approved_budget,advertiser_currency,campaign_budget_usd,impressions,clicks,currency_code,media_cost_usd,weekday_cat
0,7,2022-05-01,Facebook Ads,300.0,250.0,400.0,SGD,652.173913,837,8,SGD,14.058514,week_end
1,8,2022-05-02,DV360,300.0,250.0,400.0,SGD,652.173913,2634,44,SGD,99.633496,week_day
2,9,2022-05-03,Facebook Ads,300.0,250.0,400.0,SGD,652.173913,2135,32,SGD,109.419677,week_day
3,10,2022-05-04,Facebook Ads,300.0,250.0,400.0,SGD,652.173913,2327,48,SGD,115.209499,week_day
4,11,2022-05-05,Google Ads,300.0,250.0,400.0,SGD,652.173913,1538,20,SGD,66.990104,week_day
...,...,...,...,...,...,...,...,...,...,...,...,...,...
72607,11,2022-11-28,Google Ads,,,442054.0,INR,306.635594,1059,56,INR,4.858090,week_day
72608,12,2022-11-29,Facebook Ads,,,442054.0,INR,306.635594,865,41,INR,3.536262,week_day
72609,13,2022-11-30,Facebook Ads,,,442054.0,INR,306.635594,646,21,INR,1.947816,week_day
72610,14,2022-12-01,Google Ads,,,442054.0,INR,306.635594,658,20,INR,1.711467,week_day


In [68]:
db.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 72612 entries, 0 to 72611
Data columns (total 13 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   no_of_days           72612 non-null  int64  
 1   time                 72612 non-null  object 
 2   ext_service_name     72612 non-null  object 
 3   creative_width       69200 non-null  float64
 4   creative_height      69200 non-null  float64
 5   approved_budget      72206 non-null  float64
 6   advertiser_currency  72612 non-null  object 
 7   campaign_budget_usd  72612 non-null  float64
 8   impressions          72612 non-null  int64  
 9   clicks               72612 non-null  int64  
 10  currency_code        72612 non-null  object 
 11  media_cost_usd       72612 non-null  float64
 12  weekday_cat          72612 non-null  object 
dtypes: float64(5), int64(3), object(5)
memory usage: 7.2+ MB


In [69]:
# Convert time columns to datetime
db['time'] = pd.to_datetime(db['time'], format='%Y-%m-%d')
db['year'] = db['time'].dt.year
db['month'] = db['time'].dt.month
db['day'] = db['time'].dt.day
db.drop('time', axis=1, inplace=True)
db.dropna(inplace=True)

db_dummies = pd.get_dummies(db, columns=['weekday_cat', 'ext_service_name', 'currency_code', 'advertiser_currency'], drop_first=True)
db_dummies.rename(columns={'weekday_cat_week_day': 'weekDay', 'weekday_cat_week_end': 'weekeEnd'}, inplace=True)
db_dummies.info()

<class 'pandas.core.frame.DataFrame'>
Index: 68880 entries, 0 to 69199
Data columns (total 22 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   no_of_days                     68880 non-null  int64  
 1   creative_width                 68880 non-null  float64
 2   creative_height                68880 non-null  float64
 3   approved_budget                68880 non-null  float64
 4   campaign_budget_usd            68880 non-null  float64
 5   impressions                    68880 non-null  int64  
 6   clicks                         68880 non-null  int64  
 7   media_cost_usd                 68880 non-null  float64
 8   year                           68880 non-null  int32  
 9   month                          68880 non-null  int32  
 10  day                            68880 non-null  int32  
 11  weekeEnd                       68880 non-null  bool   
 12  ext_service_name_Facebook Ads  68880 non-null  bool

In [70]:
# Copying the databases
df_budget = db_dummies['campaign_budget_usd']
df_impression = db_dummies['impressions']
df_clicks = db_dummies['clicks']
df_media_cost_usd = db_dummies['media_cost_usd']

In [71]:
# train-test split
X_budget = db_dummies.copy().drop(['campaign_budget_usd'], axis=1)
X_impression = db_dummies.copy().drop(['impressions'], axis=1)
X_clicks = db_dummies.copy().drop(['clicks'], axis=1)
X_media_cost_usd = db_dummies.copy().drop(['media_cost_usd'], axis=1)

X_train_budget, X_test_budget, y_train_budget, y_test_budget = train_test_split(X_budget, df_budget, test_size=0.2, random_state=42)
X_train_impression, X_test_impression, y_train_impression, y_test_impression = train_test_split(X_impression, df_impression, test_size=0.2, random_state=42)
X_train_clicks, X_test_clicks, y_train_clicks, y_test_clicks = train_test_split(X_clicks, df_clicks, test_size=0.2, random_state=42)
X_train_media_cost_usd, X_test_media_cost_usd, y_train_media_cost_usd, y_test_media_cost_usd = train_test_split(X_media_cost_usd, df_media_cost_usd, test_size=0.2, random_state=42)


# Predicting Budget

In [72]:
X_train = X_train_budget
X_test = X_test_budget
y_train = y_train_budget
y_test = y_test_budget

In [73]:
# Random Forest
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Make predictions
y_pred_rf = rf.predict(X_test)

# Evaluate the model
mse_rf = mean_squared_error(y_test, y_pred_rf)
print(f'Random Forest MSE: {mse_rf}')

r2_rf = r2_score(y_test, y_pred_rf)
print(f'Random Forest R^2: {r2_rf}')


Random Forest MSE: 34577.87175471353
Random Forest R^2: 0.90084835719238


In [74]:
# RNN

# Reshape data for RNN
X_train_rnn = X_train.values.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test_rnn = X_test.values.reshape((X_test.shape[0], 1, X_test.shape[1]))

# Initialize and train the model
rnn = Sequential()
rnn.add(SimpleRNN(50, activation='relu', input_shape=(1, X_train.shape[1])))
rnn.add(Dense(1))
rnn.compile(optimizer='adam', loss='mean_squared_error')

X_train_rnn = X_train_rnn.astype('float32')
y_train = y_train.astype('float32')
X_test_rnn = X_test_rnn.astype('float32')
y_test = y_test.astype('float32')
rnn.fit(X_train_rnn, y_train, epochs=50, batch_size=32)

# Make predictions
y_pred_rnn = rnn.predict(X_test_rnn)

# Evaluate the model
mse_rnn = mean_squared_error(y_test, y_pred_rnn)
print(f'RNN MSE: {mse_rnn}')

r2_rnn = r2_score(y_test, y_pred_rnn)
print(f'RNN R^2: {r2_rnn}')

Epoch 1/50


  super().__init__(**kwargs)


[1m1722/1722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 618us/step - loss: 36399764.0000
Epoch 2/50
[1m1722/1722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 602us/step - loss: 341948.7188
Epoch 3/50
[1m1722/1722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 661us/step - loss: 11354824.0000
Epoch 4/50
[1m1722/1722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 616us/step - loss: 1143314.7500
Epoch 5/50
[1m1722/1722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 605us/step - loss: 5668776.0000
Epoch 6/50
[1m1722/1722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 603us/step - loss: 6596005.0000
Epoch 7/50
[1m1722/1722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 613us/step - loss: 2050152.5000
Epoch 8/50
[1m1722/1722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 618us/step - loss: 232454.9375
Epoch 9/50
[1m1722/1722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 608us/step - loss: 458050.1562
Epoch 10/50


In [75]:
# SVR
from sklearn.svm import SVR

# Initialize and train the model
svr = SVR(kernel='rbf')
svr.fit(X_train, y_train)

# Make predictions
y_pred_svr = svr.predict(X_test)

# Evaluate the model
mse_svr = mean_squared_error(y_test, y_pred_svr)
print(f'SVR MSE: {mse_svr}')
r2_svr = r2_score(y_test, y_pred_svr)
print(f'SVR R^2: {r2_svr}')

SVR MSE: 367000.44033471425
SVR R^2: -0.052369480880178676


In [76]:
# LightGBM
import lightgbm as lgb

# Initialize and train the model
lgb_reg = lgb.LGBMRegressor(objective='regression', colsample_bytree=0.3, learning_rate=0.1, max_depth=5, n_estimators=100)
lgb_reg.fit(X_train, y_train)

# Make predictions
y_pred_lgb = lgb_reg.predict(X_test)

# Evaluate the model
mse_lgb = mean_squared_error(y_test, y_pred_lgb)
print(f'LightGBM MSE: {mse_lgb}')
r2_lgb = r2_score(y_test, y_pred_lgb)
print(f'LightGBM R^2: {r2_lgb}')

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002641 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 992
[LightGBM] [Info] Number of data points in the train set: 55104, number of used features: 18
[LightGBM] [Info] Start training from score 532.107544
LightGBM MSE: 96883.21124118548
LightGBM R^2: 0.7221885220984923


In [77]:
print(f'Random Forest MSE: {mse_rf}')
print(f'RNN MSE: {mse_rnn}')
print(f'SVR MSE: {mse_svr}')
print(f'LightGBM MSE: {mse_lgb}')

Random Forest MSE: 34577.87175471353
RNN MSE: 166135.328125
SVR MSE: 367000.44033471425
LightGBM MSE: 96883.21124118548


In [78]:
print(f'Random Forest R^2: {r2_rf}')
print(f'RNN R^2: {r2_rnn}')
print(f'SVR R^2: {r2_svr}')
print(f'LightGBM R^2: {r2_lgb}')

Random Forest R^2: 0.90084835719238
RNN R^2: 0.523608922958374
SVR R^2: -0.052369480880178676
LightGBM R^2: 0.7221885220984923


# Predicting Impressions

In [79]:
X_train = X_train_impression
X_test = X_test_impression
y_train = y_train_impression
y_test = y_test_impression

In [80]:
# Random Forest
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Make predictions
y_pred_rf = rf.predict(X_test)

# Evaluate the model
mse_rf = mean_squared_error(y_test, y_pred_rf)
print(f'Random Forest MSE: {mse_rf}')

r2_rf = r2_score(y_test, y_pred_rf)
print(f'Random Forest R^2: {r2_rf}')

Random Forest MSE: 203867.73175307052
Random Forest R^2: 0.9726360831208117


In [81]:
# RNN

# Reshape data for RNN
X_train_rnn = X_train.values.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test_rnn = X_test.values.reshape((X_test.shape[0], 1, X_test.shape[1]))

# Initialize and train the model
rnn = Sequential()
rnn.add(SimpleRNN(50, activation='relu', input_shape=(1, X_train.shape[1])))
rnn.add(Dense(1))
rnn.compile(optimizer='adam', loss='mean_squared_error')

X_train_rnn = X_train_rnn.astype('float32')
y_train = y_train.astype('float32')
X_test_rnn = X_test_rnn.astype('float32')
y_test = y_test.astype('float32')
rnn.fit(X_train_rnn, y_train, epochs=50, batch_size=32)

# Make predictions
y_pred_rnn = rnn.predict(X_test_rnn)

# Evaluate the model
mse_rnn = mean_squared_error(y_test, y_pred_rnn)
print(f'RNN MSE: {mse_rnn}')

r2_rnn = r2_score(y_test, y_pred_rnn)
print(f'RNN R^2: {r2_rnn}')

Epoch 1/50


  super().__init__(**kwargs)


[1m1722/1722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 744us/step - loss: 14669861.0000
Epoch 2/50
[1m1722/1722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 590us/step - loss: 5567679.5000
Epoch 3/50
[1m1722/1722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 644us/step - loss: 14054346.0000
Epoch 4/50
[1m1722/1722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 646us/step - loss: 9241056.0000
Epoch 5/50
[1m1722/1722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 602us/step - loss: 6495663.5000
Epoch 6/50
[1m1722/1722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 604us/step - loss: 11633268.0000
Epoch 7/50
[1m1722/1722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 595us/step - loss: 3947645.2500
Epoch 8/50
[1m1722/1722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 641us/step - loss: 8431471.0000
Epoch 9/50
[1m1722/1722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 597us/step - loss: 6682678.0000
Epoch 10/

In [82]:
# SVR
from sklearn.svm import SVR

# Initialize and train the model
svr = SVR(kernel='rbf')
svr.fit(X_train, y_train)

# Make predictions
y_pred_svr = svr.predict(X_test)

# Evaluate the model
mse_svr = mean_squared_error(y_test, y_pred_svr)
print(f'SVR MSE: {mse_svr}')
r2_svr = r2_score(y_test, y_pred_svr)
print(f'SVR R^2: {r2_svr}')

SVR MSE: 6870996.65314368
SVR R^2: 0.07774820626573986


In [83]:
# LightGBM
import lightgbm as lgb

# Initialize and train the model
lgb_reg = lgb.LGBMRegressor(objective='regression', colsample_bytree=0.3, learning_rate=0.1, max_depth=5, n_estimators=100)
lgb_reg.fit(X_train, y_train)

# Make predictions
y_pred_lgb = lgb_reg.predict(X_test)

# Evaluate the model
mse_lgb = mean_squared_error(y_test, y_pred_lgb)
print(f'LightGBM MSE: {mse_lgb}')
r2_lgb = r2_score(y_test, y_pred_lgb)
print(f'LightGBM R^2: {r2_lgb}')

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005815 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 55104, number of used features: 18
[LightGBM] [Info] Start training from score 1127.295351
LightGBM MSE: 908273.1647399026
LightGBM R^2: 0.8780880565559851


In [84]:
print(f'Random Forest MSE: {mse_rf}')
print(f'RNN MSE: {mse_rnn}')
print(f'SVR MSE: {mse_svr}')
print(f'LightGBM MSE: {mse_lgb}')

Random Forest MSE: 203867.73175307052
RNN MSE: 1877349.625
SVR MSE: 6870996.65314368
LightGBM MSE: 908273.1647399026


In [85]:
print(f'Random Forest R^2: {r2_rf}')
print(f'RNN R^2: {r2_rnn}')
print(f'SVR R^2: {r2_svr}')
print(f'LightGBM R^2: {r2_lgb}')

Random Forest R^2: 0.9726360831208117
RNN R^2: 0.7480148077011108
SVR R^2: 0.07774820626573986
LightGBM R^2: 0.8780880565559851


# Predicting Clicks

In [86]:
X_train = X_train_clicks
X_test = X_test_clicks
y_train = y_train_clicks
y_test = y_test_clicks

In [87]:
# Random Forest
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Make predictions
y_pred_rf = rf.predict(X_test)

# Evaluate the model
mse_rf = mean_squared_error(y_test, y_pred_rf)
print(f'Random Forest MSE: {mse_rf}')

r2_rf = r2_score(y_test, y_pred_rf)
print(f'Random Forest R^2: {r2_rf}')

Random Forest MSE: 8110.960041339211
Random Forest R^2: 0.7795614006676789


In [88]:
# RNN

# Reshape data for RNN
X_train_rnn = X_train.values.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test_rnn = X_test.values.reshape((X_test.shape[0], 1, X_test.shape[1]))

# Initialize and train the model
rnn = Sequential()
rnn.add(SimpleRNN(50, activation='relu', input_shape=(1, X_train.shape[1])))
rnn.add(Dense(1))
rnn.compile(optimizer='adam', loss='mean_squared_error')

X_train_rnn = X_train_rnn.astype('float32')
y_train = y_train.astype('float32')
X_test_rnn = X_test_rnn.astype('float32')
y_test = y_test.astype('float32')
rnn.fit(X_train_rnn, y_train, epochs=50, batch_size=32)

# Make predictions
y_pred_rnn = rnn.predict(X_test_rnn)

# Evaluate the model
mse_rnn = mean_squared_error(y_test, y_pred_rnn)
print(f'RNN MSE: {mse_rnn}')

r2_rnn = r2_score(y_test, y_pred_rnn)
print(f'RNN R^2: {r2_rnn}')

Epoch 1/50


  super().__init__(**kwargs)


[1m1722/1722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 552us/step - loss: 16108528.0000
Epoch 2/50
[1m1722/1722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 576us/step - loss: 64480.9883
Epoch 3/50
[1m1722/1722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 543us/step - loss: 13175037.0000
Epoch 4/50
[1m1722/1722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 541us/step - loss: 1745407.3750
Epoch 5/50
[1m1722/1722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 556us/step - loss: 6713369.0000
Epoch 6/50
[1m1722/1722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 541us/step - loss: 1759061.7500
Epoch 7/50
[1m1722/1722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 541us/step - loss: 18942582.0000
Epoch 8/50
[1m1722/1722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 542us/step - loss: 33649.5000
Epoch 9/50
[1m1722/1722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 541us/step - loss: 13506346.0000
Epoch 10/50


In [89]:
# SVR
from sklearn.svm import SVR

# Initialize and train the model
svr = SVR(kernel='rbf')
svr.fit(X_train, y_train)

# Make predictions
y_pred_svr = svr.predict(X_test)

# Evaluate the model
mse_svr = mean_squared_error(y_test, y_pred_svr)
print(f'SVR MSE: {mse_svr}')
r2_svr = r2_score(y_test, y_pred_svr)
print(f'SVR R^2: {r2_svr}')

SVR MSE: 35318.09438202734
SVR R^2: 0.0401295016890062


In [90]:
# LightGBM
import lightgbm as lgb

# Initialize and train the model
lgb_reg = lgb.LGBMRegressor(objective='regression', colsample_bytree=0.3, learning_rate=0.1, max_depth=5, n_estimators=100)
lgb_reg.fit(X_train, y_train)

# Make predictions
y_pred_lgb = lgb_reg.predict(X_test)

# Evaluate the model
mse_lgb = mean_squared_error(y_test, y_pred_lgb)
print(f'LightGBM MSE: {mse_lgb}')
r2_lgb = r2_score(y_test, y_pred_lgb)
print(f'LightGBM R^2: {r2_lgb}')

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001880 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 55104, number of used features: 18
[LightGBM] [Info] Start training from score 33.488912
LightGBM MSE: 16571.654443392305
LightGBM R^2: 0.5496177671321041


In [91]:
print(f'Random Forest MSE: {mse_rf}')
print(f'RNN MSE: {mse_rnn}')
print(f'SVR MSE: {mse_svr}')
print(f'LightGBM MSE: {mse_lgb}')

Random Forest MSE: 8110.960041339211
RNN MSE: 117672.796875
SVR MSE: 35318.09438202734
LightGBM MSE: 16571.654443392305


In [92]:
print(f'Random Forest R^2: {r2_rf}')
print(f'RNN R^2: {r2_rnn}')
print(f'SVR R^2: {r2_svr}')
print(f'LightGBM R^2: {r2_lgb}')

Random Forest R^2: 0.7795614006676789
RNN R^2: -2.1980955600738525
SVR R^2: 0.0401295016890062
LightGBM R^2: 0.5496177671321041


# Predicting Media Cost ($)

In [93]:
X_train = X_train_media_cost_usd
X_test = X_test_media_cost_usd
y_train = y_train_media_cost_usd
y_test = y_test_media_cost_usd

In [94]:
# Random Forest
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Make predictions
y_pred_rf = rf.predict(X_test)

# Evaluate the model
mse_rf = mean_squared_error(y_test, y_pred_rf)
print(f'Random Forest MSE: {mse_rf}')

r2_rf = r2_score(y_test, y_pred_rf)
print(f'Random Forest R^2: {r2_rf}')

Random Forest MSE: 17.578946335901556
Random Forest R^2: 0.9794667467638998


In [95]:
# RNN

# Reshape data for RNN
X_train_rnn = X_train.values.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test_rnn = X_test.values.reshape((X_test.shape[0], 1, X_test.shape[1]))

# Initialize and train the model
rnn = Sequential()
rnn.add(SimpleRNN(50, activation='relu', input_shape=(1, X_train.shape[1])))
rnn.add(Dense(1))
rnn.compile(optimizer='adam', loss='mean_squared_error')

X_train_rnn = X_train_rnn.astype('float32')
y_train = y_train.astype('float32')
X_test_rnn = X_test_rnn.astype('float32')
y_test = y_test.astype('float32')
rnn.fit(X_train_rnn, y_train, epochs=50, batch_size=32)

# Make predictions
y_pred_rnn = rnn.predict(X_test_rnn)

# Evaluate the model
mse_rnn = mean_squared_error(y_test, y_pred_rnn)
print(f'RNN MSE: {mse_rnn}')

r2_rnn = r2_score(y_test, y_pred_rnn)
print(f'RNN R^2: {r2_rnn}')

Epoch 1/50


  super().__init__(**kwargs)


[1m1722/1722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 552us/step - loss: 90790288.0000
Epoch 2/50
[1m1722/1722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 552us/step - loss: 3178282.2500
Epoch 3/50
[1m1722/1722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 544us/step - loss: 1919058.5000
Epoch 4/50
[1m1722/1722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 547us/step - loss: 10968.8525
Epoch 5/50
[1m1722/1722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 545us/step - loss: 13583471.0000
Epoch 6/50
[1m1722/1722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 739us/step - loss: 13409425.0000
Epoch 7/50
[1m1722/1722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 559us/step - loss: 413207.7812
Epoch 8/50
[1m1722/1722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 651us/step - loss: 2194928.0000
Epoch 9/50
[1m1722/1722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 610us/step - loss: 761225.0000
Epoch 10/50


In [96]:
# SVR
from sklearn.svm import SVR

# Initialize and train the model
svr = SVR(kernel='rbf')
svr.fit(X_train, y_train)

# Make predictions
y_pred_svr = svr.predict(X_test)

# Evaluate the model
mse_svr = mean_squared_error(y_test, y_pred_svr)
print(f'SVR MSE: {mse_svr}')
r2_svr = r2_score(y_test, y_pred_svr)
print(f'SVR R^2: {r2_svr}')

SVR MSE: 770.1692109580182
SVR R^2: 0.10039663394230902


In [97]:
# LightGBM
import lightgbm as lgb

# Initialize and train the model
lgb_reg = lgb.LGBMRegressor(objective='regression', colsample_bytree=0.3, learning_rate=0.1, max_depth=5, n_estimators=100)
lgb_reg.fit(X_train, y_train)

# Make predictions
y_pred_lgb = lgb_reg.predict(X_test)

# Evaluate the model
mse_lgb = mean_squared_error(y_test, y_pred_lgb)
print(f'LightGBM MSE: {mse_lgb}')
r2_lgb = r2_score(y_test, y_pred_lgb)
print(f'LightGBM R^2: {r2_lgb}')

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002680 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 988
[LightGBM] [Info] Number of data points in the train set: 55104, number of used features: 18
[LightGBM] [Info] Start training from score 8.825605
LightGBM MSE: 137.88229545261987
LightGBM R^2: 0.8389452923538133


In [98]:
print(f'Random Forest MSE: {mse_rf}')
print(f'RNN MSE: {mse_rnn}')
print(f'SVR MSE: {mse_svr}')
print(f'LightGBM MSE: {mse_lgb}')

Random Forest MSE: 17.578946335901556
RNN MSE: 163.70155334472656
SVR MSE: 770.1692109580182
LightGBM MSE: 137.88229545261987


In [99]:
print(f'Random Forest R^2: {r2_rf}')
print(f'RNN R^2: {r2_rnn}')
print(f'SVR R^2: {r2_svr}')
print(f'LightGBM R^2: {r2_lgb}')

Random Forest R^2: 0.9794667467638998
RNN R^2: 0.8087868690490723
SVR R^2: 0.10039663394230902
LightGBM R^2: 0.8389452923538133
