In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error, accuracy_score, precision_score, recall_score
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.cluster import KMeans

In [7]:
data_q1 = {
    'YearsExperience': [1.1, 1.3, 1.5, 2.0, 2.2, 2.9, 3.0, 3.2, 3.2, 3.7],
    'Salary': [39.0, 46.0, 47.0, 52.0, 56.0, 64.0, 65.0, 67.0, 68.0, 70.0]
}
df_q1 = pd.DataFrame(data_q1)
print("\nMETHOD 1:FROM SCRATCH")
X = df_q1['YearsExperience'].values
y = df_q1['Salary'].values
n = len(X)
x_mean = np.mean(X)
y_mean = np.mean(y)
slope_scratch = np.sum((X - x_mean) * (y - y_mean)) / np.sum((X - x_mean)**2)
intercept_scratch = y_mean - slope_scratch * x_mean
print(f"Intercept (from scratch): {intercept_scratch:.4f}")
print(f"Slope (from scratch): {slope_scratch:.4f}")
prediction_scratch = intercept_scratch + slope_scratch * 4.5
print(f"Predicted salary for 4.5 years experience: ${prediction_scratch:.2f}k")
y_pred_scratch = intercept_scratch + slope_scratch * X
ss_res = np.sum((y - y_pred_scratch)**2)
ss_tot = np.sum((y - y_mean)**2)
r2_scratch = 1 - (ss_res / ss_tot)
print(f"R² (from scratch): {r2_scratch:.4f}")
print(f"Interpretation: {r2_scratch*100:.2f}% of variance in salary is explained by years of experience")


METHOD 1:FROM SCRATCH
Intercept (from scratch): 28.8687
Slope (from scratch): 11.8387
Predicted salary for 4.5 years experience: $82.14k
R² (from scratch): 0.9788
Interpretation: 97.88% of variance in salary is explained by years of experience


In [8]:
print("\nMETHOD 2:USING SCIKIT-LEARN")
X_sklearn = df_q1[['YearsExperience']]
y_sklearn = df_q1['Salary']
model_q1 = LinearRegression()
model_q1.fit(X_sklearn, y_sklearn)
print(f"Intercept (scikit-learn): {model_q1.intercept_:.4f}")
print(f"Slope (scikit-learn): {model_q1.coef_[0]:.4f}")
prediction_sklearn = model_q1.predict([[4.5]])[0]
print(f"Predicted salary for 4.5 years experience: ${prediction_sklearn:.2f}k")
r2_sklearn = model_q1.score(X_sklearn, y_sklearn)
print(f"R² (scikit-learn): {r2_sklearn:.4f}")


METHOD 2:USING SCIKIT-LEARN
Intercept (scikit-learn): 28.8687
Slope (scikit-learn): 11.8387
Predicted salary for 4.5 years experience: $82.14k
R² (scikit-learn): 0.9788
