In [38]:
import pandas as pd 
import sklearn 
from sklearn.linear_model import LinearRegression, Lasso, Ridge 
from sklearn.model_selection import train_test_split 
from sklearn.metrics import mean_squared_error 

In [39]:
df = pd.read_csv('E:\Machine Learning\Data Sets/SuperShops.csv')
df.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,114523.61,136897.8,471784.1,Dhaka,192261.83
1,162597.7,151377.59,443898.53,Ctg,191792.06
2,153441.51,101145.55,407934.54,Rangpur,191050.39
3,144372.41,118671.85,383199.62,Dhaka,182901.99
4,142107.34,91391.77,366168.42,Rangpur,166187.94


In [40]:
df.isnull().sum()

Marketing Spend    0
Administration     0
Transport          1
Area               0
Profit             0
dtype: int64

In [41]:
df.Transport = df.Transport.fillna(df.Transport.mean())

In [42]:
df.drop('Area', axis=1,inplace=True)

In [43]:
df1 = df.copy()

In [44]:
x = df.drop('Profit', axis=1)

In [45]:
x.head()

Unnamed: 0,Marketing Spend,Administration,Transport
0,114523.61,136897.8,471784.1
1,162597.7,151377.59,443898.53
2,153441.51,101145.55,407934.54
3,144372.41,118671.85,383199.62
4,142107.34,91391.77,366168.42


In [46]:
y=df[['Profit']]

In [47]:
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.25, random_state=0)

In [48]:
xtrain.shape

(37, 3)

# Ordinary Least Squares (OLS)

In [49]:
model = LinearRegression()
model.fit(xtrain, ytrain)

In [50]:
y_pred = model.predict(xtest)

In [51]:
y_pred

array([[103811.61401638],
       [128090.65172169],
       [128955.46611288],
       [ 73818.5411586 ],
       [176719.46824888],
       [123418.48179269],
       [ 68943.67844084],
       [ 94839.10021303],
       [119498.55653098],
       [164748.07766423],
       [ 97678.19628921],
       [ 88998.80999378],
       [107056.69891647]])

In [52]:
mse = mean_squared_error(ytest, y_pred)
print('MSE:', mse)

MSE: 109132410.4447543


In [53]:
model.score(xtest, ytest)

0.8988383259640067

# Polynomial

In [54]:
df.corr()

Unnamed: 0,Marketing Spend,Administration,Transport,Profit
Marketing Spend,1.0,0.230437,0.718001,0.937948
Administration,0.230437,1.0,0.009402,0.200717
Transport,0.718001,0.009402,1.0,0.781996
Profit,0.937948,0.200717,0.781996,1.0


In [55]:
from sklearn.preprocessing import PolynomialFeatures

In [56]:
poly = PolynomialFeatures(degree=2) 
X_poly = poly.fit_transform(x)

In [57]:
X_poly.shape

(50, 10)

In [58]:
xtrain, xtest, ytrain, ytest = train_test_split(X_poly, y, test_size=0.25, random_state=0)

In [59]:
xtrain.shape

(37, 10)

In [60]:
model2 = LinearRegression()
model2.fit(xtrain, ytrain)

In [61]:
y_pred2 = model2.predict(xtest)

In [62]:
y_pred2

array([[106465.31653821],
       [127249.18986145],
       [128150.56904415],
       [ 71917.04791836],
       [174255.93034425],
       [132972.62867139],
       [ 67004.30998651],
       [104627.66884143],
       [125314.44298831],
       [159931.03072341],
       [100017.67874561],
       [ 91337.68045106],
       [113069.60332529]])

In [63]:
mse = mean_squared_error(ytest, y_pred2)
print('MSE:', mse)

MSE: 179095765.6152526


In [64]:
model2.score(xtest, ytest)

0.8339849052306192

In [65]:
df1.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Profit
0,114523.61,136897.8,471784.1,192261.83
1,162597.7,151377.59,443898.53,191792.06
2,153441.51,101145.55,407934.54,191050.39
3,144372.41,118671.85,383199.62,182901.99
4,142107.34,91391.77,366168.42,166187.94


In [66]:
x = df.drop('Profit', axis=1)
y = df[['Profit']]

In [67]:
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.25, random_state=0)

In [68]:
xtrain.head()

Unnamed: 0,Marketing Spend,Administration,Transport
34,46426.07,157693.92,210797.67
18,91749.16,114175.79,294919.57
7,130298.13,145530.06,323876.68
14,119943.24,156547.42,256512.92
45,1000.23,124153.04,1903.93


In [69]:
lasso_model = Lasso(alpha=0.1) # Tune alpha parameter
lasso_model.fit(xtrain, ytrain)

In [70]:
ridge_model = Ridge(alpha=0.1) # Tune alpha parameter
ridge_model.fit(xtrain, ytrain)

# Performance

In [71]:
lasso_predictions = lasso_model.predict(xtest)
lasso_mse = mean_squared_error(ytest, lasso_predictions)

ridge_predictions = ridge_model.predict(xtest)
ridge_mse = mean_squared_error(ytest, ridge_predictions)

print('Lasso MSE:', lasso_mse)
print('Ridge MSE:', ridge_mse)

Lasso MSE: 109132410.37058003
Ridge MSE: 109132410.44509459


In [72]:
lasso_model.score(xtest, ytest)

0.8988383260327635

In [73]:
ridge_model.score(xtest, ytest)

0.8988383259636913