In [2]:
import pandas as pd 
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [3]:
df = pd.read_csv("50_Startups.csv")
print(df.info())
print(df.describe())
print(df.head())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   R&D Spend        50 non-null     float64
 1   Administration   50 non-null     float64
 2   Marketing Spend  50 non-null     float64
 3   State            50 non-null     object 
 4   Profit           50 non-null     float64
dtypes: float64(4), object(1)
memory usage: 2.1+ KB
None
           R&D Spend  Administration  Marketing Spend         Profit
count      50.000000       50.000000        50.000000      50.000000
mean    73721.615600   121344.639600    211025.097800  112012.639200
std     45902.256482    28017.802755    122290.310726   40306.180338
min         0.000000    51283.140000         0.000000   14681.400000
25%     39936.370000   103730.875000    129300.132500   90138.902500
50%     73051.080000   122699.795000    212716.240000  107978.190000
75%    101602.800000   144842.180000

In [4]:
# features and labels
x = df[['R&D Spend', 
        'Administration',
        'Marketing Spend']]
y = df['Profit']

In [5]:
# train and test
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2, random_state=42)

In [6]:
# model
model = LinearRegression()
model.fit(x_train, y_train)
y_pred = model.predict(x_test)

In [7]:
# evaluation
MAE = mean_absolute_error(y_test, y_pred)
MSE = mean_squared_error(y_test, y_pred)
RMSE = np.sqrt(MSE)
R2 = r2_score(y_test, y_pred)

In [40]:
print("Intercept:", model.intercept_)
print("Coefficient:", model.coef_)
print("\n\n\n\n==============================================\n==============================================\n========== Model Evaluation Metrics ==========\n==============================================\n==============================================\n")
print("Mean Absolute Error (MAE):", MSE)
print("Root Mean Squared Error (RMSE):", RMSE)
print("R-squared Score (R2):", R2)

Intercept: 54071.875745990845
Coefficient: [ 0.80377928 -0.06792917  0.03124155]





Mean Absolute Error (MAE): 80926321.2229516
Root Mean Squared Error (RMSE): 8995.905803361416
R-squared Score (R2): 0.900065308303732


In [19]:
# prediction
val1 = float(input("Enter R&D Spend: "))
val2 = float(input("Enter Administration: "))
val3 = float(input("Enter Marketing Spend: "))

new_value = pd.DataFrame([[val1, val2, val3]],
                         columns = ["R&D Spend", "Administration", "Marketing Spend"])
prediction = model.predict(new_value)
print(f'{prediction[0]:0.2f}')

Enter R&D Spend:  1
Enter Administration:  2
Enter Marketing Spend:  3


54072.64


                                ==============
                            ===================
                        ========================
                    ====== - ======== - =========
                ================  ================
            ==================   ================
        ====================    ================
    ============   =================   ========
----------------      ----------      --------
-------------------                ----------
--------------------------------------------
-------------------------------------------
------------------------------------------
-----------------------------------------
----------------------------------------
---------------------------------------
--------------------------------------
    ----------------------------
        --------------------
            ------------
                ----
                 --
                 -

