### Case Background

Investors often place advertisements through multiple different channels to gain economic benefits. In this case, we selected the company's investment in television, radio and newspapers to predict advertising revenue, which is of great significance to the formulation of corporate strategies.

### Loading data

In [1]:
# input data
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
df = pd.read_excel('Advertising revenue data.xlsx')
display(df.head())
# 1.Extract feature variables and target variables
X = df.drop(columns='Income') 
y = df['Income'] 

Unnamed: 0,Splash_Ad,Banner,Promote_Card,Income
0,230.1,37.8,69.2,331.5
1,44.5,39.3,45.1,156.0
2,17.2,45.9,69.3,139.5
3,151.5,41.3,58.5,277.5
4,180.8,10.8,58.4,193.5


### Model Building

In [2]:
from sklearn.linear_model import LinearRegression,ElasticNet,ElasticNetCV
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import AdaBoostRegressor,GradientBoostingRegressor,ExtraTreesRegressor
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
from sklearn.model_selection import GridSearchCV,RandomizedSearchCV
from sklearn.metrics import r2_score

#### Linear Regression

In [3]:
model = LinearRegression()
# 2.Divide the training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=64)
model.fit(X_train,y_train)

y_pred = model.predict(X_test)
print('Test data algorithm prediction results：',y_pred[:10].round(1))
print('The real data of advertising revenue is：',y_test[:10].values)

r2 = r2_score(y_test,y_pred)
print('Linear Regression Score：',r2)

Test data algorithm prediction results： [221.4 265.4 212.9 135.  239.3 232.3 107.8 323.5 197.6 141.8]
The real data of advertising revenue is： [211.5 232.5 204.  129.  201.  234.  166.5 339.  192.  160.5]
Linear Regression Score： 0.8218538964779601


#### Elastic Network

In [4]:
model = ElasticNet()

# 2.Divide the training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=64)
model.fit(X_train,y_train)

y_pred = model.predict(X_test)
print('Test data algorithm prediction results：',y_pred[:10].round(1))
print('The real data of advertising revenue is：',y_test[:10].values)
r2 = r2_score(y_test,y_pred)
print('Elastic Network Score：',r2)

Test data algorithm prediction results： [221.4 265.4 212.8 135.1 239.4 232.  108.  323.4 197.5 142. ]
The real data of advertising revenue is： [211.5 232.5 204.  129.  201.  234.  166.5 339.  192.  160.5]
Elastic Network Score： 0.8219253422813386


#### Support Vector Machine

In [5]:
%%time
model = SVR()

# 2.Divide the training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=64)
model.fit(X_train,y_train)

y_pred = model.predict(X_test)
print('Test data algorithm prediction results：',y_pred[:10].round(1))
print('The real data of advertising revenue is：',y_test[:10].values)
r2 = r2_score(y_test,y_pred)
print('SVR Score：',r2)

Test data algorithm prediction results： [239.2 259.9 192.2 156.7 265.9 177.5 163.2 283.3 176.8 186. ]
The real data of advertising revenue is： [211.5 232.5 204.  129.  201.  234.  166.5 339.  192.  160.5]
SVR Score： 0.6289191684949812
CPU times: user 69.4 ms, sys: 6.17 ms, total: 75.6 ms
Wall time: 76.3 ms


#### Decision Tree Algorithm

In [6]:
model = DecisionTreeRegressor()
# 2.Divide the training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=64)
model.fit(X_train,y_train)

y_pred = model.predict(X_test)
print('Test data algorithm prediction results：',y_pred[:10].round(1))
print('The real data of advertising revenue is：',y_test[:10].values)
r2 = r2_score(y_test,y_pred)
print('Decision Tree Algorithm Score：',r2)

Test data algorithm prediction results： [220.5 259.5 189.  144.  220.5 204.  136.5 354.  177.  175.5]
The real data of advertising revenue is： [211.5 232.5 204.  129.  201.  234.  166.5 339.  192.  160.5]
Decision Tree Algorithm Score： 0.9198272852939765


#### Extreme Forest

In [7]:
model = ExtraTreesRegressor()
# 2.Divide the training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=64)
model.fit(X_train,y_train)

y_pred = model.predict(X_test)
print('Test data algorithm prediction results：',y_pred[:10].round(1))
print('The real data of advertising revenue is：',y_test[:10].values)
r2 = r2_score(y_test,y_pred)
print('Extreme Forest Algorithm Score：',r2)

Test data algorithm prediction results： [217.1 256.6 193.4 144.  218.6 217.5 135.8 348.7 179.5 154. ]
The real data of advertising revenue is： [211.5 232.5 204.  129.  201.  234.  166.5 339.  192.  160.5]
Extreme Forest Algorithm Score： 0.945749607009464


#### Gradient Boosted Tree

In [8]:
model = GradientBoostingRegressor()
# 2.Divide the training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=64)
model.fit(X_train,y_train)
y_pred = model.predict(X_test)
print('Test data algorithm prediction results：',y_pred[:10].round(1))
print('The real data of advertising revenue is：',y_test[:10].values)
r2 = r2_score(y_test,y_pred)
print('Gradient Boosted Score：',r2)

Test data algorithm prediction results： [225.  272.7 220.7 133.1 209.3 202.3 135.4 343.1 183.6 165.8]
The real data of advertising revenue is： [211.5 232.5 204.  129.  201.  234.  166.5 339.  192.  160.5]
Gradient Boosted Score： 0.9392049793400534


#### Adaboost

In [9]:
model = AdaBoostRegressor()
# 2.Divide the training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=64)
model.fit(X_train,y_train)
y_pred = model.predict(X_test)
print('Test data algorithm prediction results：',y_pred[:10].round(1))
print('The real data of advertising revenue is：',y_test[:10].values)
r2 = r2_score(y_test,y_pred)
print('Adaboost Score：',r2)

Test data algorithm prediction results： [214.2 269.7 204.2 133.7 214.2 202.4 157.9 349.4 197.  168.6]
The real data of advertising revenue is： [211.5 232.5 204.  129.  201.  234.  166.5 339.  192.  160.5]
Adaboost Score： 0.9216613232499182


#### Xgboost

In [10]:
model = XGBRegressor()
# 2.Divide the training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=64)
model.fit(X_train,y_train)
y_pred = model.predict(X_test)
print('Test data algorithm prediction results：',y_pred[:10].round(1))
print('The real data of advertising revenue is：',y_test[:10].values)
r2 = r2_score(y_test,y_pred)
print('Xgboost Score：',r2)

Test data algorithm prediction results： [234.9 256.7 193.5 136.8 205.2 202.3 139.9 349.4 184.  173. ]
The real data of advertising revenue is： [211.5 232.5 204.  129.  201.  234.  166.5 339.  192.  160.5]
Xgboost Score： 0.9393792705222893


###  Model parameter tuning（Cross-Validation）

#### Elastic Network

In [11]:
%%time
# set alpha and l1_ratio
l1_ratios = [0.1, 0.5, 0.7,0.9, 0.95, 0.99, 1]

model = ElasticNetCV(n_alphas=100,l1_ratio=l1_ratios,cv = 5)

# 2.Divide the training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,random_state=64)
model.fit(X_train,y_train)

y_pred = model.predict(X_test)
print('Test data prediction results：',y_pred[:10].round(1))
print('The real data of advertising revenue is：',y_test[:10].values)
r2 = r2_score(y_test,y_pred)
print('Elastic Net Cross-Validation score：',r2)
#Output the best alpha and l1_ratio
print("Best alpha: ", model.alpha_)
print("Best l1_ratio: ", model.l1_ratio_)

Test data prediction results： [221.8 265.4 212.6 135.3 239.4 232.  108.4 323.1 197.5 142.3]
The real data of advertising revenue is： [211.5 232.5 204.  129.  201.  234.  166.5 339.  192.  160.5]
Elastic Net Cross-Validation score： 0.8225585423221857
Best alpha:  5.342325237656248
Best l1_ratio:  1.0
CPU times: user 777 ms, sys: 34.9 ms, total: 812 ms
Wall time: 438 ms


#### SVM (support vector machine)

`SVR()` parameter explanation

- `C` is the penalty parameter of the error term. The larger C is, the less tolerant the model is to errors; the smaller C is, the more tolerant the model is to errors.
- `epsilon` Controls the maximum error allowed between the model's predictions and the actual results.
- `gamma` Used for 'rbf', 'poly' and 'sigmoid' kernel functions, it affects the influence range of the sample. The larger the gamma value, the smaller the influence range of the sample, and vice versa.
- `degree` is the polynomial degree of the 'poly' kernel function.


In [12]:
%%time
model = SVR()
# Divide data to training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=64)
# Defining the parameter grid
param_grid = {
    'C': [0.1,0.2,0.5, 1],
    'epsilon': [0.01, 0.5],
    'kernel': ['rbf', 'poly', 'sigmoid','linear']}

gridSearchCV = GridSearchCV(model,
                            param_grid  = param_grid,
                            cv = 5,
                            n_jobs=-1)

gridSearchCV.fit(X_train,y_train)
print('The best combination of parameters is：',gridSearchCV.best_params_)
print('Best validation data score',gridSearchCV.best_score_)
y_pred = gridSearchCV.predict(X_test)
print('Test data algorithm prediction results：',y_pred[:10].round(1))
print('The real data of advertising revenue is：',y_test[:10].values)
r2 = r2_score(y_test,y_pred)
print('SVR Score：',r2)

The best combination of parameters is： {'C': 0.1, 'epsilon': 0.01, 'kernel': 'linear'}
Best validation data score 0.8577940109772829
Test data algorithm prediction results： [224.  267.4 218.1 143.2 235.6 243.3 112.2 324.3 205.2 144.7]
The real data of advertising revenue is： [211.5 232.5 204.  129.  201.  234.  166.5 339.  192.  160.5]
SVR Score： 0.8260892149370767
CPU times: user 367 ms, sys: 114 ms, total: 481 ms
Wall time: 8.99 s


#### Xgboost

XGBoost is a very powerful gradient boosting decision tree model with many tunable hyperparameters. Below are some common hyperparameters that may be adjusted when using GridSearchCV:

1. `n_estimators: This is the number of trees to construct. Increasing this value can make the model more complex, potentially improving performance, but it will also increase computation time and may lead to overfitting.

2. `max_depth: This is the maximum depth of the trees. Increasing this value can make the model more complex, possibly improving performance, but it may also lead to overfitting.

3. `learning_rate: Also known as the step size, this controls the extent of improvement in the model at each iteration. A lower learning rate may require more trees (n_estimators) to achieve good performance.

4. `subsample: This is the proportion of samples used for training each tree. It helps prevent overfitting.

5. `colsample_bytree: This is the subsample ratio of columns used when constructing each tree. It is a form of feature sampling that helps improve model generalization.

6. `gamma: This is the minimum loss reduction required to make a further split in a leaf node. The larger this value, the more conservative the algorithm becomes.

7. `reg_alpha and reg_lambda: These are L1 (Lasso) and L2 (Ridge) regularization weights, which can be used to prevent overfitting.

In [13]:
%%time
# Divide data to training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=64)
# Parameter Tuning
parameters = {'n_estimators': [100, 200, 300],
              'learning_rate': [0.01, 0.05, 0.1],
              'max_depth': [4, 6, 8],
              'subsample': [0.7, 0.8, 0.9],
              'colsample_bytree': [0.3,0.5, 0.9,1.0],
              'gamma':[0.1,0.5,1.0]}

model = XGBRegressor()  # Build the model
gridSearchCV = GridSearchCV(model, 
                           parameters,
                            scoring='r2',
                            cv=5,n_jobs=-1) # cv=5 means 5 cross validations, scoring='r2' means using R-squared as the model evaluation criterion

# Output the best velue of parameter
gridSearchCV.fit(X_train, y_train)  # input data
print('The best combination of parameters is：',gridSearchCV.best_params_)
print('Best validation data score',gridSearchCV.best_score_)

y_pred = gridSearchCV.predict(X_test)
print('Test data algorithm prediction results：',y_pred[:10].round(1))
print('The real data of advertising revenue is：',y_test[:10].values)
r2 = r2_score(y_test,y_pred)
print('Xgboost Score：',r2)

  _data = np.array(data, dtype=dtype, copy=copy,


The best combination of parameters is： {'colsample_bytree': 1.0, 'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 6, 'n_estimators': 300, 'subsample': 0.7}
Best validation data score 0.9557174286855048
Test data algorithm prediction results： [228.  267.5 196.8 136.  207.2 208.4 139.6 342.2 183.1 167.3]
The real data of advertising revenue is： [211.5 232.5 204.  129.  201.  234.  166.5 339.  192.  160.5]
Xgboost Score： 0.9444444848508532
CPU times: user 14.1 s, sys: 1.53 s, total: 15.6 s
Wall time: 6min 30s


In [14]:
best = gridSearchCV.best_estimator_
best

### Best combination

In [15]:
model_best = gridSearchCV.best_estimator_
model_best

In [16]:
best_income = {}
income_init = 100
total = 200 # Total advertising budget：200k

for i in range(1,201):
    Splash_Ad = i # Splash advertising amount
    for j in range(1,total - i + 1):
        Banner = j # Banner Amount of money invested
        Promote_Card = total - i -j # Promote_Card placement amount
        
        income = model_best.predict(np.array([[Splash_Ad,Banner,Promote_Card]]))

        if income > income_init:
            best_income.clear()
            best_income['Splash_Ad'] = Splash_Ad
            best_income['Banner'] = Banner
            best_income['Promote_Card'] = Promote_Card
            best_income['income'] = income
            income_init = income

print(best_income)

{'Splash_Ad': 139, 'Banner': 46, 'Promote_Card': 15, 'income': array([315.98532], dtype=float32)}
