### Multiple Linear Regression 
![image.png](attachment:image.png)

In [1]:
from warnings import filterwarnings
filterwarnings('ignore')

### Read the dataset

In [3]:
import pandas as pd
df = pd.read_csv('50_Startups.csv')
df.head()

Unnamed: 0,RND,ADMIN,MKT,STATE,PROFIT
0,165349.2,136897.8,471784.1,New York,192261.83
1,162597.7,151377.59,443898.53,California,191792.06
2,153441.51,101145.55,407934.54,Florida,191050.39
3,144372.41,118671.85,383199.62,New York,182901.99
4,142107.34,91391.77,366168.42,Florida,166187.94


In [4]:
df.isna().sum()

RND       0
ADMIN     0
MKT       0
STATE     0
PROFIT    0
dtype: int64

In [5]:
df.duplicated().sum()

0

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   RND     50 non-null     float64
 1   ADMIN   50 non-null     float64
 2   MKT     50 non-null     float64
 3   STATE   50 non-null     object 
 4   PROFIT  50 non-null     float64
dtypes: float64(4), object(1)
memory usage: 2.1+ KB


### Separate X (RND, ADMIN, MKT) and Y(PROFIT)

In [8]:
X = df[['RND', 'ADMIN', 'MKT']]
Y = df[['PROFIT']]

In [9]:
X.head()

Unnamed: 0,RND,ADMIN,MKT
0,165349.2,136897.8,471784.1
1,162597.7,151377.59,443898.53
2,153441.51,101145.55,407934.54
3,144372.41,118671.85,383199.62
4,142107.34,91391.77,366168.42


In [10]:
Y.head()

Unnamed: 0,PROFIT
0,192261.83
1,191792.06
2,191050.39
3,182901.99
4,166187.94


### Built the Linear Regression Model 

In [12]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X, Y)

In [13]:
model.intercept_

array([50122.19298987])

In [14]:
model.coef_

array([[ 0.80571505, -0.02681597,  0.02722806]])

### Evaluate the model 

In [15]:
# R2 score 


model.score(X,Y)

0.9507459940683246

In [20]:
ypred = model.predict(X)
ypred[0:5]

array([[192521.25289008],
       [189156.76823227],
       [182147.2790962 ],
       [173696.70002553],
       [172139.51418327]])

In [21]:
Y.head()

Unnamed: 0,PROFIT
0,192261.83
1,191792.06
2,191050.39
3,182901.99
4,166187.94


In [27]:
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error, r2_score, root_mean_squared_error

In [30]:
def evaluate_model(X,Y):
    ypred = model.predict(X)
    mse = mean_squared_error(Y, ypred)
    rmse = mse**(1/2)
    mae = mean_absolute_error(Y, ypred)
    mape = mean_absolute_percentage_error(Y, ypred)
    r2 = r2_score(Y, ypred)
    print(f'Mean Squared Error MSE : {mse:.2f}')
    print(f'Root Mean Absolute Error RMAE : {rmse:.2f}')
    print(f'Mean Absolute Error MAE : {mae:.2f}')
    print(f'Mean Absolute Percentage Error MAPE : {mape*100:.2f}%')
    print(f'R2 Score : {r2:.4f}')

In [31]:
evaluate_model(X, Y)

Mean Squared Error MSE : 78417126.02
Root Mean Absolute Error RMAE : 8855.34
Mean Absolute Error MAE : 6471.45
Mean Absolute Percentage Error MAPE : 10.60%
R2 Score : 0.9507


### It is a good model as the R2 score 0.9507 > 08 

### Out of Sample Prediction 

In [37]:
rnd = 65000
admin = 50000
mkt = 40000
xnew = [[rnd, admin, mkt]]
pred = model.predict(xnew)[0][0]
print(f'The estimated profit is : {pred:.2f}')

The estimated profit is : 102242.00
