In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, accuracy_score
from sklearn.linear_model import LinearRegression as sk_lr
from sklearn.linear_model import LogisticRegression as sk_log_r

### 1) Linear Regression

##### 1.1 Creating custom dataset for testing model

In [2]:
from sklearn.datasets import make_regression
X, y, coef = make_regression(n_samples=1000,
                             n_features=2,
                             n_informative=2,
                             noise=10.0,
                             bias=1.0,
                             coef=True,
                             random_state=42)

In [3]:
df = pd.DataFrame(
           data={'feature1':X[:,0],
                 'feature2':X[:,1],
                 'target (y)':y, 
                 'weight1':coef[0],
                 'weight2':coef[1], 
                 'bias':1 , 
                 'y_hat': ((coef[0]*X[:,0])+(coef[1]*X[:,1]))+1 })
df.head()

Unnamed: 0,feature1,feature2,target (y),weight1,weight2,bias,y_hat
0,-0.167118,0.146714,-13.99695,40.710649,6.600984,1,-4.835031
1,-0.020902,0.117327,-11.678089,40.710649,6.600984,1,0.923559
2,0.150419,0.364961,18.775455,40.710649,6.600984,1,9.532753
3,0.555604,0.089581,7.661465,40.710649,6.600984,1,24.210339
4,0.058209,-1.14297,-13.19553,40.710649,6.600984,1,-4.175014


In [4]:
X=df.drop(columns=['target (y)','weight1','weight2','bias','y_hat'],index=None)
Y=df['target (y)']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)

##### 1.2 Training the custom made model with the dataset

In [5]:
#Import our custom model
from my_lib.LinearRegression_LinAlg import LinearRegression as custom_lr
# from my_lib.LinearRegression_GradDesc import LinearRegression as custom_lr

In [6]:
custom_model=custom_lr()
custom_model.train(X_train,Y_train)
print("Model Trained")
print("Custom model weights=", custom_model.coefficient)
print("Custom model bias=:", custom_model.bias)

Model Trained
Custom model weights= [41.07953009  6.80445538]
Custom model bias=: 0.7404004752460575


In [7]:
Y_pred = custom_model.predict(X_test)
accuracy = r2_score(Y_test, Y_pred)
accuracy

0.9406638568459064

##### 1.3 Custom model VS sci-kit learn's regression 

In [8]:
model=sk_lr()
model.fit(X_train,Y_train)
print("Model Trained")
print("Sklearn weights=", model.coef_)
print("Sklearn bias=:", model.intercept_)

Model Trained
Sklearn weights= [41.07953009  6.80445538]
Sklearn bias=: 0.7404004752460551


In [9]:
Y_pred = model.predict(X_test)
accuracy = r2_score(Y_test, Y_pred)
accuracy

0.9406638568459063

### 2) Logistic Regression

##### 2.1 Creating custom dataset for testing model

In [10]:
from sklearn.datasets import make_classification
X, Y = make_classification(n_samples=1000,
                           n_features=5,
                           n_informative=2,
                           n_redundant=1,
                           n_repeated=0,
                           n_classes=2,
                           n_clusters_per_class=1,
                           flip_y=0.1,
                           random_state=42)
df = pd.DataFrame(X, columns=[f'Feature_{i+1}' for i in range(X.shape[1])])
df['Target'] = Y
df


Unnamed: 0,Feature_1,Feature_2,Feature_3,Feature_4,Feature_5,Target
0,0.317453,0.768328,-0.332860,-1.218005,1.440387,1
1,1.065172,0.337779,1.452617,-0.828461,0.979448,1
2,0.466671,1.567346,1.521006,0.818854,-0.965285,0
3,1.497432,0.224761,1.857702,-1.157030,1.367534,1
4,-1.366858,1.150180,0.592567,-0.934416,1.105847,1
...,...,...,...,...,...,...
995,-1.951102,-0.125265,0.143588,-1.158613,1.368888,1
996,-0.889241,1.772590,0.955301,0.508680,-0.598465,0
997,-1.153332,1.513910,-1.687344,-1.162699,1.376134,1
998,-1.070852,1.098807,1.911419,-0.807573,0.955887,1


In [11]:
X=df.drop(columns=['Target'],index=None)
Y=df['Target']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)

##### 2.2 Training our Model with the dataset

In [12]:
#Importing our Custom Model
from my_lib.LogisticRegression import LogisticRegression as custom_log_r

In [13]:
custom_classifier=custom_log_r()
custom_classifier.train(X_train,Y_train)
print("Model Trained")
print("Custom classifier weights=", custom_classifier.coefficient)
print("Custom classifier bias=:", custom_classifier.bias)

Model Trained
Custom classifier weights= [ 0.08330155 -0.01968997 -0.10495092 -0.87912871  1.03879146]
Custom classifier bias=: 0.0700941041813466


In [14]:
Y_pred = custom_classifier.predict(X_test)
accuracy = accuracy_score(Y_test, Y_pred)
accuracy

0.845

##### 2.3 Custom Model vs SkLearn Classifier

In [15]:
classifier=sk_log_r()
classifier.fit(X_train,Y_train)
print("Model Trained")
print("Sklearn Classifier weights=", classifier.coef_)
print("Sklearn Classifier bias=:", classifier.intercept_)

Model Trained
Sklearn Classifier weights= [[ 0.13480204 -0.22730905 -0.16038776 -1.07154791  1.26585688]]
Sklearn Classifier bias=: [0.5153322]


In [16]:
Y_pred = classifier.predict(X_test)
accuracy = accuracy_score(Y_test, Y_pred)
accuracy

0.86

### 3) Encoders

##### 3.1 One Hot Encoder

In [17]:
import pandas as pd
from my_lib.Encoders import OneHotEncoder
onehotencoder=OneHotEncoder()

In [18]:
df=pd.read_csv("BCS_data.csv")
df

Unnamed: 0,Age,Gender,Dept,Education,Type
0,26,Male,Police,Bachelor's,Cadre
1,31,Male,Tax,Pass,Non-Cadre
2,24,Female,Admin,Master's,Cadre
3,34,Male,Tax,Pass,Cadre
4,27,Male,Admin,Bachelor's,Cadre
5,28,Female,Admin,Bachelor's,Cadre
6,25,Female,Tax,Master's,Cadre
7,30,Male,Police,Pass,Non-Cadre
8,29,Female,Admin,Pass,Non-Cadre
9,33,Male,Tax,Bachelor's,Non-Cadre


In [19]:
df=onehotencoder.transform(df,['Gender','Dept'],drop_first=True)
df

Unnamed: 0,Age,Education,Type,Gender_Female,Dept_Tax,Dept_Admin
0,26,Bachelor's,Cadre,0,0,0
1,31,Pass,Non-Cadre,0,1,0
2,24,Master's,Cadre,1,0,1
3,34,Pass,Cadre,0,1,0
4,27,Bachelor's,Cadre,0,0,1
5,28,Bachelor's,Cadre,1,0,1
6,25,Master's,Cadre,1,1,0
7,30,Pass,Non-Cadre,0,0,0
8,29,Pass,Non-Cadre,1,0,1
9,33,Bachelor's,Non-Cadre,0,1,0


In [20]:
onehotencoder.feature_names

{'Gender': array(['Male', 'Female'], dtype=object),
 'Dept': array(['Police', 'Tax', 'Admin'], dtype=object)}

##### 3.2 Ordinal Encoder

In [21]:
from my_lib.Encoders import OrdinalEncoder
ordinalencoder = OrdinalEncoder()

Here, the order of 'Education' matters, so a parameter named category is passed where the order of variables is mentioned

In [22]:
features=['Education','Type']
categories=[["Pass","Bachelor's","Master's"],None]
df=ordinalencoder.transform(df,['Education','Type'],category=categories)
df

Unnamed: 0,Age,Education,Type,Gender_Female,Dept_Tax,Dept_Admin
0,26,1,0,0,0,0
1,31,0,1,0,1,0
2,24,2,0,1,0,1
3,34,0,0,0,1,0
4,27,1,0,0,0,1
5,28,1,0,1,0,1
6,25,2,0,1,1,0
7,30,0,1,0,0,0
8,29,0,1,1,0,1
9,33,1,1,0,1,0


In [23]:
ordinalencoder.feature_maps

[{'Pass': 0, "Bachelor's": 1, "Master's": 2}, {'Cadre': 0, 'Non-Cadre': 1}]