In [16]:
! pip install pandas numpy scikit-learn



In [17]:
# IMPORT LIBRARIES
import numpy as np
import pandas as pd

from sklearn.linear_model import LinearRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [18]:
# LOAD DATA
data = pd.read_csv('Experience-Salary.csv')

x = data["exp(in months)"].values
y = data["salary(in thousands)"].values

n = len(x)
print("Total samples:", n)

Total samples: 1000


In [19]:
# MODEL 1: MANUAL LINEAR REGRESSION (FORMULA METHOD)
# SECTION 1: Manual Training
sum_x = np.sum(x)
sum_y = np.sum(y)
sum_x2 = np.sum(x ** 2)
sum_xy = np.sum(x * y)

m_manual = (n * sum_xy - sum_x * sum_y) / (n * sum_x2 - sum_x ** 2)
c_manual = (sum_y - m_manual * sum_x) / n

In [20]:
# SECTION 2: Manual Model Output (VISIBLE)
print("===== MANUAL LINEAR REGRESSION MODEL =====")
print(f"Slope (m)     : {m_manual}")
print(f"Intercept (c) : {c_manual}")
print(f"Equation      : Salary = {m_manual:.2f} * Experience + {c_manual:.2f}")

===== MANUAL LINEAR REGRESSION MODEL =====
Slope (m)     : 0.8228573477761276
Intercept (c) : 5.198560811223339
Equation      : Salary = 0.82 * Experience + 5.20


In [21]:
# SECTION 3: Manual Predictions
y_pred_manual = m_manual * x + c_manual

In [22]:
# SECTION 4: Manual → Classification

# Single, shared rule
# Salary ≥ mean → Class 1
# Salary < mean → Class 0
threshold = np.mean(y)

y_actual_class = np.where(y >= threshold, 1, 0)
y_pred_class_manual = np.where(y_pred_manual >= threshold, 1, 0)

In [23]:
# SECTION 5: MANUAL MODEL METRICS (FORMULA ONLY)

TP = np.sum((y_actual_class == 1) & (y_pred_class_manual == 1))
TN = np.sum((y_actual_class == 0) & (y_pred_class_manual == 0))
FP = np.sum((y_actual_class == 0) & (y_pred_class_manual == 1))
FN = np.sum((y_actual_class == 1) & (y_pred_class_manual == 0))

cm_manual = np.array([[TN, FP], [FN, TP]])

accuracy_manual = (TP + TN) / (TP + TN + FP + FN)

precision_0 = TN / (TN + FN) if (TN + FN) != 0 else 0
recall_0    = TN / (TN + FP) if (TN + FP) != 0 else 0
f1_0        = (2 * precision_0 * recall_0) / (precision_0 + recall_0) if (precision_0 + recall_0) != 0 else 0

precision_1 = TP / (TP + FP) if (TP + FP) != 0 else 0
recall_1    = TP / (TP + FN) if (TP + FN) != 0 else 0
f1_1        = (2 * precision_1 * recall_1) / (precision_1 + recall_1) if (precision_1 + recall_1) != 0 else 0

# Supports
support_0 = np.sum(y_actual_class == 0)
support_1 = np.sum(y_actual_class == 1)
total = support_0 + support_1

# Macro averages
macro_precision = (precision_0 + precision_1) / 2
macro_recall = (recall_0 + recall_1) / 2
macro_f1 = (f1_0 + f1_1) / 2

# Weighted averages
weighted_precision = (
    (precision_0 * support_0 + precision_1 * support_1) / total
)
weighted_recall = (
    (recall_0 * support_0 + recall_1 * support_1) / total
)
weighted_f1 = (
    (f1_0 * support_0 + f1_1 * support_1) / total
)

In [24]:
# SECTION 5: Manual Model Metrics (sklearn)
print("--- MANUAL MODEL METRICS ---")

print("Accuracy:", round(accuracy_manual, 2))

print("\nConfusion Matrix:\n", cm_manual)

print("\nClassification Report (Manual):\n")
print(f"{'Class':<10}{'Precision':<12}{'Recall':<10}{'F1-score':<10}{'Support':<10}")
print("-" * 55)
print(f"{'0':<10}{precision_0:<12.2f}{recall_0:<10.2f}{f1_0:<10.2f}{support_0:<10}")
print(f"{'1':<10}{precision_1:<12.2f}{recall_1:<10.2f}{f1_1:<10.2f}{support_1:<10}")
print("-" * 55)
print(f"{'Accuracy':<34}{accuracy_manual:<10.2f}{total:<10}")
print(f"{'Macro Avg':<10}{macro_precision:<12.2f}{macro_recall:<10.2f}{macro_f1:<10.2f}{total:<10}")
print(f"{'Weighted Avg':<10}{weighted_precision:<12.2f}{weighted_recall:<10.2f}{weighted_f1:<10.2f}{total:<10}")

--- MANUAL MODEL METRICS ---
Accuracy: 0.82

Confusion Matrix:
 [[414  85]
 [ 98 403]]

Classification Report (Manual):

Class     Precision   Recall    F1-score  Support   
-------------------------------------------------------
0         0.81        0.83      0.82      499       
1         0.83        0.80      0.81      501       
-------------------------------------------------------
Accuracy                          0.82      1000      
Macro Avg 0.82        0.82      0.82      1000      
Weighted Avg0.82        0.82      0.82      1000      


In [25]:
# MODEL 2: SKLEARN LINEAR REGRESSION (BUILT-IN)
# SECTION 6: sklearn Training
X = x.reshape(-1, 1)

lr_model = LinearRegression()
lr_model.fit(X, y)

m_sklearn = lr_model.coef_[0]
c_sklearn = lr_model.intercept_

In [26]:
# SECTION 7: sklearn Model Output (VISIBLE)
print("===== SKLEARN LINEAR REGRESSION MODEL =====")
print(f"Slope (m)     : {m_sklearn}")
print(f"Intercept (c) : {c_sklearn}")
print(f"Equation      : Salary = {m_sklearn:.2f} * Experience + {c_sklearn:.2f}")

===== SKLEARN LINEAR REGRESSION MODEL =====
Slope (m)     : 0.8228573477761271
Intercept (c) : 5.198560811223356
Equation      : Salary = 0.82 * Experience + 5.20


In [27]:
# SECTION 8: sklearn Predictions
y_pred_sklearn = lr_model.predict(X)

In [28]:
# SECTION 9: sklearn → Classification
y_pred_class_sklearn = np.where(y_pred_sklearn >= threshold, 1, 0)

In [29]:
# SECTION 10: sklearn Model Metrics (sklearn)
print("===== SKLEARN MODEL METRICS =====")

accuracy_sklearn = accuracy_score(y_actual_class, y_pred_class_sklearn)
cm_sklearn = confusion_matrix(y_actual_class, y_pred_class_sklearn)
report_sklearn = classification_report(y_actual_class, y_pred_class_sklearn)

print("Accuracy:", round(accuracy_sklearn, 2))
print("\nConfusion Matrix:\n", cm_sklearn)
print("\nClassification Report:\n", report_sklearn)

===== SKLEARN MODEL METRICS =====
Accuracy: 0.82

Confusion Matrix:
 [[414  85]
 [ 98 403]]

Classification Report:
               precision    recall  f1-score   support

           0       0.81      0.83      0.82       499
           1       0.83      0.80      0.81       501

    accuracy                           0.82      1000
   macro avg       0.82      0.82      0.82      1000
weighted avg       0.82      0.82      0.82      1000



In [30]:
# Run Time Input
print("===== RUNTIME PREDICTION USING BOTH MODELS =====")

exp_input = float(input("Enter years of experience: "))

salary_manual = m_manual * exp_input + c_manual

salary_sklearn = lr_model.predict([[exp_input]])[0]

print(f"\nPrediction Results for {exp_input} Year(s) of Expirence:")
print("-----------------------------------")
print(f"Manual Linear Regression Prediction  : {salary_manual:.2f} Thousand")
print(f"sklearn Linear Regression Prediction : {salary_sklearn:.2f} Thousand")
print("-----------------------------------")

===== RUNTIME PREDICTION USING BOTH MODELS =====

Prediction Results for 10.0 Year(s) of Expirence:
-----------------------------------
Manual Linear Regression Prediction  : 13.43 Thousand
sklearn Linear Regression Prediction : 13.43 Thousand
-----------------------------------
