In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error,r2_score

data = {
    "Area_sqft": [800,1000,1200,1500,1800,2000,2200,2500,900,1600,1400,2100],
    "Bedrooms": [2,2,3,3,4,4,4,5,2,3,3,4],
    "Age_years": [15,10,8,5,4,3,2,1,12,6,7,3],
    "Distance_km": [12,10,8,6,5,4,3,2,11,7,9,4],
    "Price_lakhs": [40,50,62,75,90,105,120,140,45,80,70,110]
}
df=pd.DataFrame(data)
print("DataSet:")
print(df)
X=df[["Area_sqft","Bedrooms","Age_years","Distance_km"]]
y=df["Price_lakhs"]

X_train,X_test,y_train,y_test=train_test_split(
    X,y,test_size=0.2,random_state=42
)
model=LinearRegression()
model.fit(X_train,y_train)

y_pred=model.predict(X_test)

print("Actual vs Predicted:")
for actual,pred in zip(y_test,y_pred):
    print(f"Actual: {actual:.2f}, Predicted: {pred:.2f}")

print("Mean Absolute Error:",mean_absolute_error(y_test,y_pred))
print("R2 Score:",r2_score(y_test,y_pred))

new_house_data={
    "Area_sqft":[1700],
    "Bedrooms":[3],
    "Age_years":[5],
    "Distance_km":[6]
} 
new_house_df=pd.DataFrame(new_house_data)
predicted_price=model.predict(new_house_df)
print("Predicted Price for New House: Rs. {:.2f} lakhs".format(predicted_price[0]))

DataSet:
    Area_sqft  Bedrooms  Age_years  Distance_km  Price_lakhs
0         800         2         15           12           40
1        1000         2         10           10           50
2        1200         3          8            8           62
3        1500         3          5            6           75
4        1800         4          4            5           90
5        2000         4          3            4          105
6        2200         4          2            3          120
7        2500         5          1            2          140
8         900         2         12           11           45
9        1600         3          6            7           80
10       1400         3          7            9           70
11       2100         4          3            4          110
Actual vs Predicted:
Actual: 70.00, Predicted: 54.90
Actual: 80.00, Predicted: 78.73
Actual: 40.00, Predicted: 54.54
Mean Absolute Error: 10.305282005371604
R2 Score: 0.49103377000320525
Predicted P

In [None]:
# Scenario: Predicting Employee Salary Based on Multiple Factors

# A company wants to predict employee salary based on several important factors:

# Years of Experience

# Education Level (1 = Bachelor, 2 = Master, 3 = PhD)

# Number of Skills Known

# Performance Rating (1 to 5)

# Since salary depends on multiple variables, the company uses Multiple Linear Regression.

# Salary=b0​+b1​(Experience)+b2​(EducationLevel)+b3​(Skills)+b4​(Performance)

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

df = pd.read_csv("multil_salary_pred.csv")

print(df.head())

X = df[[
    "Experience_years",
    "Education_Level",
    "Skills_Count",
    "Performance_Rating"
]]

y = df["Salary_lpa"]
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

model = LinearRegression()
model.fit(X_train, y_train)

print("Intercept (b0):", model.intercept_)
print("Coefficients:")
print(model.coef_)


y_pred = model.predict(X_test)
print(y_pred)

new_employee = [[5, 2, 8, 4]]

predicted_salary = model.predict(new_employee)

print("Predicted Salary:", predicted_salary[0], "LPA")

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("MSE:", mse)
print("R2 Score:", r2)



   Experience_years  Education_Level  Skills_Count  Performance_Rating  \
0                 1                1             3                   3   
1                 2                1             4                   3   
2                 3                2             5                   4   
3                 4                2             6                   4   
4                 5                2             7                   5   

   Salary_lpa  
0         4.0  
1         5.0  
2         7.0  
3         8.0  
4        10.0  
Intercept (b0): 0.5236966824644522
Coefficients:
[1.25592417 0.34952607 0.20616114 0.22274882]
[11.09241706  7.94549763  3.41587678]
Predicted Salary: 10.042654028436019 LPA
MSE: 0.1827363304208509
R2 Score: 0.9776241228056101


