In [1]:
import numpy as np
import pandas as pd

In [2]:
class MRR:

    def __init__(self, lamda=100):
        self.coef_ = None
        self.intercept_ = None
        self.lamda = lamda

    def fit(self, X_train, y_train):
        X_train = np.insert(X_train, 0, 1, axis=1)

        I = np.eye(X_train.shape[1])
        I[0, 0] = 0   # do NOT penalize intercept

        coefficients = np.linalg.inv(np.dot(X_train.T, X_train) + self.lamda * I).dot(X_train.T).dot(y_train)

        self.intercept_ = coefficients[0]
        self.coef_ = coefficients[1:]
        
    def predict(self, X_test):
        y_pred = np.dot(X_test, self.coef_) + self.intercept_
        return y_pred


In [3]:
df=pd.read_csv(r"C:\Users\91861\Desktop\Artifical Intelligence\Machine Leraning\MULTIPLE_LINEAR_REGRESSION\Salary_dataset.csv")

In [4]:
df.head()

Unnamed: 0,YearsExperience,Age,Education,JobType,Location,CompanySize,PerformanceRating,ProjectsCompleted,Certifications,Promoted,Salary
0,1.1,22,Bachelor,Engineering,Urban,Medium,3,2,0,No,45000
1,1.3,23,Bachelor,Marketing,Suburban,Small,3,3,1,No,42000
2,1.5,24,Bachelor,Sales,Urban,Medium,4,4,1,No,46000
3,1.8,25,Bachelor,HR,Rural,Small,3,2,0,No,38000
4,2.0,26,Bachelor,DataScience,Urban,Large,4,5,2,No,55000


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 99 entries, 0 to 98
Data columns (total 11 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   YearsExperience    99 non-null     float64
 1   Age                99 non-null     int64  
 2   Education          99 non-null     object 
 3   JobType            99 non-null     object 
 4   Location           99 non-null     object 
 5   CompanySize        99 non-null     object 
 6   PerformanceRating  99 non-null     int64  
 7   ProjectsCompleted  99 non-null     int64  
 8   Certifications     99 non-null     int64  
 9   Promoted           99 non-null     object 
 10  Salary             99 non-null     int64  
dtypes: float64(1), int64(5), object(5)
memory usage: 8.6+ KB


In [6]:
from sklearn.preprocessing import OrdinalEncoder

# Define correct orders
education_order = ['Bachelor', 'Master', 'PhD']
jobtype_order = ['HR', 'Sales', 'Marketing', 'Engineering', 'DataScience', 'Management']
location_order = ['Rural', 'Suburban', 'Urban']
companysize_order = ['Small', 'Medium', 'Large']
promoted_order = ['No', 'Yes']

# Ordinal Encoder
encoder = OrdinalEncoder(categories=[
    education_order,
    jobtype_order,
    location_order,
    companysize_order,
    promoted_order
])

# Apply encoding
df[['Education', 'JobType', 'Location', 'CompanySize', 'Promoted']] = \
    encoder.fit_transform(
        df[['Education', 'JobType', 'Location', 'CompanySize', 'Promoted']]
    )

# Optional: convert to int for clarity
df[['Education', 'JobType', 'Location', 'CompanySize', 'Promoted']] = \
    df[['Education', 'JobType', 'Location', 'CompanySize', 'Promoted']].astype(int)

# Final check
print(df.dtypes)


YearsExperience      float64
Age                    int64
Education              int32
JobType                int32
Location               int32
CompanySize            int32
PerformanceRating      int64
ProjectsCompleted      int64
Certifications         int64
Promoted               int32
Salary                 int64
dtype: object


In [7]:
df.head()

Unnamed: 0,YearsExperience,Age,Education,JobType,Location,CompanySize,PerformanceRating,ProjectsCompleted,Certifications,Promoted,Salary
0,1.1,22,0,3,2,1,3,2,0,0,45000
1,1.3,23,0,2,1,0,3,3,1,0,42000
2,1.5,24,0,1,2,1,4,4,1,0,46000
3,1.8,25,0,0,0,0,3,2,0,0,38000
4,2.0,26,0,4,2,2,4,5,2,0,55000


In [8]:
x=df.iloc[:,1:-1]
y=df.iloc[:,-1:]

In [9]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=3)

In [10]:
print(X_train.shape,X_test.shape,y_train.shape,y_test.shape)

(79, 9) (20, 9) (79, 1) (20, 1)


In [11]:
ridge = MRR()

In [12]:
ridge.fit(X_train,y_train)

In [13]:
y_pred = ridge.predict(X_test)

In [14]:
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score
print("MAE",mean_absolute_error(y_test,y_pred))
print("MSE",mean_squared_error(y_test,y_pred))
print("R2 score",r2_score(y_test,y_pred))

MAE 4988.163711500965
MSE 46349784.223977484
R2 score 0.9315150113972591


In [15]:
ridge.coef_

array([[ 262.19784582],
       [1396.00269664],
       [ 770.46120873],
       [ 319.45250604],
       [ 566.12979134],
       [ 757.59775731],
       [3228.39088345],
       [1459.57773611],
       [ 563.94442577]])

In [16]:
ridge.intercept_

array([21285.50092798])