In [9]:
import numpy as np
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_absolute_error, root_mean_squared_error

In [2]:
df = pd.read_csv("salary_dataset.csv")

In [3]:
class TariqLinearRegression:
    
    def __init__(self, alpha=0.001, epochs=1000):
        self.alpha = alpha
        self.epochs = epochs
        self.weights = None
        self.bias = None
        
    def fit(self, X, y):
        X = np.array(X)
        y = np.array(y)
        
        # Ensure X is 2D
        if len(X.shape) == 1:
            X = X.reshape(-1, 1)
        
        m_samples, n_features = X.shape

        # Initialize weights and bias
        self.weights = np.zeros((1,n_features))
        self.bias = 0
        
        for _ in range(self.epochs):
            # Predict
            y_pred = np.dot(X, self.weights) + self.bias
            
            # Calculate gradients
            d_wj = (1 / m_samples) * np.dot(X.T, (y_pred - y))
            d_b = (1 / m_samples) * np.sum(y_pred - y)

            # Update weights and bias
            self.weights -= self.alpha * d_wj
            self.bias -= self.alpha * d_b
        
    def predict(self, X):
        X = np.array(X)
        if len(X.shape) == 1:
            X = X.reshape(-1, 1)
            
        y_pred = np.dot(X, self.weights) + self.bias
        return y_pred


In [4]:
df.head()

Unnamed: 0.1,Unnamed: 0,YearsExperience,Salary
0,0,1.2,39344.0
1,1,1.4,46206.0
2,2,1.6,37732.0
3,3,2.1,43526.0
4,4,2.3,39892.0


In [5]:
X = df[["YearsExperience"]]
y = df[["Salary"]]

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

In [7]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

In [24]:
t_lr = LinearRegression()
t_lr.fit(X_train, y_train)
y_pred = t_lr.predict(X_test)

In [25]:
r2 = r2_score(y_test, y_pred)
print(f"R2: {round(r2*100,2)}%")   

R2: 97.41%


In [26]:
mea = mean_absolute_error(y_test, y_pred)
print(f"MAE: {mea}")   

MAE: 3737.417861878897
