In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm

In [2]:
df = pd.read_csv('ads_sales_dataset_updated.csv')

In [3]:
df

Unnamed: 0,S/N,Facebook Ads,TV Ads,Radio Ads,Billboard Ads,Sales
0,1,57.450712,33.577874,12.027862,12.270966,249.241213
1,2,47.926035,35.607845,17.003125,7.233504,205.267132
2,3,59.715328,40.830512,20.026218,12.608818,265.147874
3,4,72.845448,40.538021,20.234903,14.066914,307.686417
4,5,46.487699,16.223306,17.749673,11.240305,207.349696
...,...,...,...,...,...,...
195,196,55.779761,25.308243,17.449918,13.159459,243.109346
196,197,36.742138,12.868655,18.650625,9.881335,195.733202
197,198,52.305877,43.538724,15.106181,12.044502,234.144762
198,199,50.873131,28.854602,17.778534,10.084955,220.003181


In [6]:
df.drop(columns='S/N', inplace=True)

In [7]:
df.corr()

Unnamed: 0,Facebook Ads,TV Ads,Radio Ads,Billboard Ads,Sales
Facebook Ads,1.0,0.095147,-0.134197,0.065398,0.95329
TV Ads,0.095147,1.0,-0.032626,-0.101396,0.052122
Radio Ads,-0.134197,-0.032626,1.0,0.105959,0.080648
Billboard Ads,0.065398,-0.101396,0.105959,1.0,0.191849
Sales,0.95329,0.052122,0.080648,0.191849,1.0


In [12]:
X = df.drop(columns='Sales')
y = df['Sales']

### Split the data into trianing and testing set

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [15]:
model = LinearRegression()
model.fit(X_train, y_train)

In [17]:
print('Feature coefficients: ')
print(f'Facebook Ads: {model.coef_[0]}')
print(f'Radio Ads: {model.coef_[2]}')
print(f'Billboard Ads: {model.coef_[3]}')
print(f'TV Ads: {model.coef_[1]}')

Feature coefficients: 
Facebook Ads: 3.508430110914172
Radio Ads: 2.0649499673221077
Billboard Ads: 1.7307750662747665
TV Ads: -0.14451379296037486


In [19]:
X_train_sm = sm.add_constant(X_train) # Adding a constant for the intercept
model_sm = sm.OLS(y_train, X_train_sm).fit()
print(model_sm.summary())

                            OLS Regression Results                            
Dep. Variable:                  Sales   R-squared:                       0.968
Model:                            OLS   Adj. R-squared:                  0.967
Method:                 Least Squares   F-statistic:                     1163.
Date:                Mon, 05 Aug 2024   Prob (F-statistic):          1.92e-114
Time:                        10:07:29   Log-Likelihood:                -580.61
No. Observations:                 160   AIC:                             1171.
Df Residuals:                     155   BIC:                             1187.
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                    coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------
const            -1.0152      5.186     -0.196

In [20]:
import numpy as np

In [24]:
new_ad = np.array([[2000, 300, 2000, 1000]])
predicted_ad = model.predict(new_ad)
print(f'Predicted sales: {predicted_ad[0]}')

Predicted sales: 12833.1658477107


