# **Multiple Linear Regression**

*Multiple input features*

- simple linear regression : $y = mx_1 + b$ *[here we put 1 bcoz it is one dimensional data]*

- multiple linear regression : $y = m_1x_1 + m_2x_2 + ... + m_nx_n + b$
    - So here the most impo thing we have to find is $m_1, m_2, ... m_n$, and $b$
    - If $m_1$ > $m_2$ then $x_1$ feature is more important than $x_2$ feature, vice-versa. And the same for other features.


In [46]:
import numpy as np
import pandas as pd

import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

import warnings
warnings.filterwarnings('ignore')

In [47]:
from sklearn.datasets import make_regression

X,y = make_regression(n_samples=100, n_features=2, n_informative=2, n_targets=1, noise=50)


print(X.shape)
print(y.shape)


(100, 2)
(100,)


In [48]:
df = pd.DataFrame({'feature1':X[:,0],'feature2':X[:,1],'target':y})
print(df.shape) 
df.head()

(100, 3)


Unnamed: 0,feature1,feature2,target
0,2.036211,0.183135,91.037247
1,-0.999175,-1.077491,-209.810529
2,-1.329623,0.420622,-17.484213
3,0.434036,0.012117,-61.703514
4,-1.372095,-1.03379,-70.597884


In [49]:
fig = px.scatter_3d(df, x='feature1', y='feature2', z='target')

fig.show()

In [50]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=3)

lr = LinearRegression()
lr.fit(X_train,y_train)

y_pred = lr.predict(X_test)

print("MAE",mean_absolute_error(y_test,y_pred))
print("MSE",mean_squared_error(y_test,y_pred))
print("R2 score",r2_score(y_test,y_pred))



MAE 43.47003746832421
MSE 2568.1131111108593
R2 score 0.7013990156946217


In [51]:
import plotly.graph_objects as go       # we used it to print the plane in 3D graph
#  it is just like we use a line in 2D graph to find the best fit line for a linear model


x = np.linspace(-5, 5, 10)
y = np.linspace(-5, 5, 10)
xGrid, yGrid = np.meshgrid(y, x)

final = np.vstack((xGrid.ravel().reshape(1,100),yGrid.ravel().reshape(1,100))).T
z = lr.predict(final).reshape(10,10)

fig = px.scatter_3d(df, x='feature1', y='feature2', z='target')
fig.add_trace(go.Surface(x = x, y = y, z = z))
fig.show()





In [52]:
print(lr.coef_)
print(lr.intercept_)

[22.92340425 80.71469218]
-8.80953394378826
