In [1]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

# Step 1: Create Sample Dataset (Multiple Features)
data = {
    "YearsExperience": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    "EducationLevel": [12, 14, 16, 12, 16, 18, 14, 16, 18, 20],  # Education in years
    "WorkHoursPerWeek": [35, 40, 45, 50, 55, 60, 50, 45, 40, 35],  # Work hours per week
    "Salary": [40000, 45000, 50000, 60000, 65000, 70000, 80000, 85000, 95000, 100000]
}

df = pd.DataFrame(data)

# Define Features (X) and Target (y)
X = df[["YearsExperience", "EducationLevel", "WorkHoursPerWeek"]]
y = df["Salary"]

# Split into Training (80%) and Testing (20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 2: Train the Multiple Linear Regression Model
model = LinearRegression()
model.fit(X_train, y_train)

# Print Model Parameters
print(f"Intercept: {model.intercept_}")
print(f"Coefficients: {model.coef_}")

# Step 3: Make Predictions
y_pred = model.predict(X_test)

# Step 4: Calculate Model Performance Metrics
r2 = r2_score(y_test, y_pred)  # R-squared Score
mse = mean_squared_error(y_test, y_pred)  # Mean Squared Error
rmse = np.sqrt(mse)  # Root Mean Squared Error

# Print Performance Metrics
print("\n🔹 Model Performance Metrics:")
print(f"R-squared Score (R²): {r2:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")

# Step 5: Interpret Metrics
if r2 > 0.85:
    print("\n✅ The model has a high R² value, indicating it explains most of the variance in salary.")
elif r2 > 0.5:
    print("\n⚠️ The model has a moderate R² value, suggesting some predictive power but room for improvement.")
else:
    print("\n❌ The model has a low R² value, meaning it does not fit the data well.")

print("\n📌 RMSE Interpretation:")
print(f"An RMSE of {rmse:.2f} means the model's salary predictions are off by approximately ±{rmse:.2f} on average.")


Intercept: 41593.55041356926
Coefficients: [7079.88692284 -510.67951     -82.5044498 ]

🔹 Model Performance Metrics:
R-squared Score (R²): 0.9961
Root Mean Squared Error (RMSE): 1556.2864

✅ The model has a high R² value, indicating it explains most of the variance in salary.

📌 RMSE Interpretation:
An RMSE of 1556.29 means the model's salary predictions are off by approximately ±1556.29 on average.
