### Multiple Linear Regression

In [179]:
import numpy as np
import pandas as pd
ad = pd.read_csv("Advertising.csv", usecols = [1,2,3,4])
df = ad.copy()
df.head()

Unnamed: 0,TV,radio,newspaper,sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,9.3
3,151.5,41.3,58.5,18.5
4,180.8,10.8,58.4,12.9


In [181]:
from sklearn.model_selection import train_test_split, cross_val_score, cross_val_predict

In [183]:
X = df.drop("sales" , axis= 1)
y = df["sales"]

In [185]:
X_train , X_test , y_train , y_test = train_test_split(X,y,test_size = 0.20 , random_state = 42)

In [187]:
X_train.shape

(160, 3)

In [189]:
y_train.shape

(160,)

In [191]:
X_test.shape

(40, 3)

In [193]:
y_test.shape

(40,)

In [195]:
training = df.copy()

In [197]:
training.shape

(200, 4)

### Statsmodels

In [200]:
import statsmodels.api as sm

In [202]:
lm = sm.OLS(y_train, X_train)

In [204]:
model = lm.fit()
model.summary()

0,1,2,3
Dep. Variable:,sales,R-squared (uncentered):,0.982
Model:,OLS,Adj. R-squared (uncentered):,0.982
Method:,Least Squares,F-statistic:,2935.0
Date:,"Thu, 21 Aug 2025",Prob (F-statistic):,1.28e-137
Time:,16:44:07,Log-Likelihood:,-336.65
No. Observations:,160,AIC:,679.3
Df Residuals:,157,BIC:,688.5
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
TV,0.0531,0.001,36.467,0.000,0.050,0.056
radio,0.2188,0.011,20.138,0.000,0.197,0.240
newspaper,0.0239,0.008,3.011,0.003,0.008,0.040

0,1,2,3
Omnibus:,11.405,Durbin-Watson:,1.895
Prob(Omnibus):,0.003,Jarque-Bera (JB):,15.574
Skew:,-0.432,Prob(JB):,0.000415
Kurtosis:,4.261,Cond. No.,13.5


### scikit-learn model 

In [134]:
from sklearn.linear_model import LinearRegression

In [136]:
lm = LinearRegression()
model = lm.fit(X_train, y_train)

In [138]:
model.intercept_ ### Constant

2.979067338122631

In [140]:
model.coef_ ### Independent variable coefficients

array([0.04472952, 0.18919505, 0.00276111])

### Prediction

In [150]:
from sklearn.metrics import mean_squared_error

In [213]:
new_data = [[30], [109],[40]]
new_data = pd.DataFrame(new_data).T
new_data

Unnamed: 0,0,1,2
0,30,109,40


In [215]:
model.predict(new_data)

0    26.395232
dtype: float64

In [217]:
rmse = np.sqrt(mean_squared_error(y_train, model.predict(X_train)))

In [219]:
rmse

1.984072821267917

In [227]:
rmse = np.sqrt(mean_squared_error(y_test, model.predict(X_test)))

In [229]:
rmse

2.1451936948156196

### Model Tuning

In [244]:
X = df.drop("sales",axis = 1)
y =df["sales"]
X_train, X_test , y_train , y_test = train_test_split(X,y,
                                                      test_size = 0.20, 
                                                      random_state= 42)
lm = LinearRegression()
model = lm.fit(X_train , y_train)

In [252]:
np.sqrt(mean_squared_error(y_train,model.predict(X_train)))

1.6447277656443375

In [254]:
model.score(X_train,y_train)

0.8957008271017817

In [256]:
cross_val_score(model,X,y,cv = 10, scoring = "r2").mean()

0.8853562237979616

In [268]:
np.sqrt(-cross_val_score(model,X_train,y_train,cv=10, scoring ="neg_mean_squared_error")).mean() ### Eğitim Seti

1.6513523730313335

In [270]:
np.sqrt(-cross_val_score(model,X_test,y_test,cv=10, scoring ="neg_mean_squared_error")).mean() ### Test Seti

1.8462778823997095