In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, accuracy_score
from sklearn.linear_model import LinearRegression as sk_lr
from sklearn.linear_model import LogisticRegression as sk_log_r

### 1) Linear Regression

##### 1.1 Creating custom dataset for testing model

In [8]:
from sklearn.datasets import make_regression
X, y, coef = make_regression(n_samples=1000,
                             n_features=2,
                             n_informative=2,
                             noise=10.0,
                             bias=1.0,
                             coef=True,
                             random_state=42)

In [9]:
df = pd.DataFrame(
           data={'feature1':X[:,0],
                 'feature2':X[:,1],
                 'target (y)':y, 
                 'weight1':coef[0],
                 'weight2':coef[1], 
                 'bias':1 , 
                 'y_hat': ((coef[0]*X[:,0])+(coef[1]*X[:,1]))+1 })
df.head()

Unnamed: 0,feature1,feature2,target (y),weight1,weight2,bias,y_hat
0,-0.167118,0.146714,-13.99695,40.710649,6.600984,1,-4.835031
1,-0.020902,0.117327,-11.678089,40.710649,6.600984,1,0.923559
2,0.150419,0.364961,18.775455,40.710649,6.600984,1,9.532753
3,0.555604,0.089581,7.661465,40.710649,6.600984,1,24.210339
4,0.058209,-1.14297,-13.19553,40.710649,6.600984,1,-4.175014


In [10]:
X=df.drop(columns=['target (y)','weight1','weight2','bias','y_hat'],index=None)
Y=df['target (y)']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)

##### 1.2 Training the custom made model with the dataset

In [3]:
#Import our custom model
from my_lib.LinearRegression_LinAlg import LinearRegression as custom_lr

In [11]:
custom_model=custom_lr()
custom_model.train(X_train,Y_train)
print("Model Trained")
print("Custom model weights=", custom_model.coefficient)
print("Custom model bias=:", custom_model.bias)

Model Trained
Custom model weights= [41.07236248  6.7490159 ]
Custom model bias=: 1.1799319753575315


In [12]:
Y_pred = custom_model.predict(X_test)
accuracy = r2_score(Y_test, Y_pred)
accuracy

0.9352226531580733

##### 1.3 Custom model VS sci-kit learn's regression 

In [13]:
model=sk_lr()
model.fit(X_train,Y_train)
print("Model Trained")
print("Sklearn weights=", model.coef_)
print("Sklearn bias=:", model.intercept_)

Model Trained
Sklearn weights= [41.07236248  6.7490159 ]
Sklearn bias=: 1.1799319753575317


In [14]:
Y_pred = model.predict(X_test)
accuracy = r2_score(Y_test, Y_pred)
accuracy

0.9352226531580733

### 2) Logistic Regression

##### 2.1 Creating custom dataset for testing model

In [2]:
from sklearn.datasets import make_classification
X, Y = make_classification(n_samples=1000,
                           n_features=5,
                           n_informative=2,
                           n_redundant=1,
                           n_repeated=0,
                           n_classes=2,
                           n_clusters_per_class=1,
                           flip_y=0.1,
                           random_state=42)
df = pd.DataFrame(X, columns=[f'Feature_{i+1}' for i in range(X.shape[1])])
df['Target'] = Y
df


Unnamed: 0,Feature_1,Feature_2,Feature_3,Feature_4,Feature_5,Target
0,0.317453,0.768328,-0.332860,-1.218005,1.440387,1
1,1.065172,0.337779,1.452617,-0.828461,0.979448,1
2,0.466671,1.567346,1.521006,0.818854,-0.965285,0
3,1.497432,0.224761,1.857702,-1.157030,1.367534,1
4,-1.366858,1.150180,0.592567,-0.934416,1.105847,1
...,...,...,...,...,...,...
995,-1.951102,-0.125265,0.143588,-1.158613,1.368888,1
996,-0.889241,1.772590,0.955301,0.508680,-0.598465,0
997,-1.153332,1.513910,-1.687344,-1.162699,1.376134,1
998,-1.070852,1.098807,1.911419,-0.807573,0.955887,1


In [3]:
X=df.drop(columns=['Target'],index=None)
Y=df['Target']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)

##### 2.2 Training our Model with the dataset

In [4]:
#Importing our Custom Model
from my_lib.LogisticRegression import LogisticRegression as custom_log_r

In [5]:
custom_classifier=custom_log_r()
custom_classifier.train(X_train,Y_train)
print("Model Trained")
print("Custom classifier weights=", custom_classifier.coefficient)
print("Custom classifier bias=:", custom_classifier.bias)

NameError: name 'X_train' is not defined

In [5]:
Y_pred = custom_classifier.predict(X_test)
accuracy = accuracy_score(Y_test, Y_pred)
accuracy

0.905

##### 2.3 Custom Model vs SkLearn Classifier

In [6]:
classifier=sk_log_r()
classifier.fit(X_train,Y_train)
print("Model Trained")
print("Sklearn Classifier weights=", classifier.coef_)
print("Sklearn Classifier bias=:", classifier.intercept_)

Model Trained
Sklearn Classifier weights= [[ 0.06731352 -0.25255864 -0.15022302 -1.07231057  1.26672082]]
Sklearn Classifier bias=: [0.54321613]


In [7]:
Y_pred = classifier.predict(X_test)
accuracy = accuracy_score(Y_test, Y_pred)
accuracy

0.91

In [1]:
from my_lib.Encoders import OneHotEncoder

In [2]:
df3=pd.read_csv("BCS_data.csv")

In [3]:
features=['Gender','Dept']
# for feature in features:
#     for var in df3[feature].unique():
#         feature_name=f'{feature}_{var}'
#         df3[feature_name]=(df3[feature]==var).astype(int)
# df3

In [4]:
df4=OneHotEncoder.encode(df3,features)

In [5]:
df3

Unnamed: 0,Age,Gender,Dept,Type
0,26,Male,Police,Cadre
1,31,Male,Tax,Non-Cadre
2,24,Female,Admin,Cadre
3,34,Male,Tax,Cadre
4,27,Male,Admin,Cadre
5,28,Female,Admin,Non-Cadre
6,25,Female,Tax,Non-Cadre
7,30,Male,Police,Cadre
8,29,Female,Admin,Cadre
9,33,Male,Tax,Non-Cadre


In [6]:
df4

Unnamed: 0,Age,Gender,Dept,Type,Gender_Male,Gender_Female,Dept_Police,Dept_Tax,Dept_Admin
0,26,Male,Police,Cadre,1,0,1,0,0
1,31,Male,Tax,Non-Cadre,1,0,0,1,0
2,24,Female,Admin,Cadre,0,1,0,0,1
3,34,Male,Tax,Cadre,1,0,0,1,0
4,27,Male,Admin,Cadre,1,0,0,0,1
5,28,Female,Admin,Non-Cadre,0,1,0,0,1
6,25,Female,Tax,Non-Cadre,0,1,0,1,0
7,30,Male,Police,Cadre,1,0,1,0,0
8,29,Female,Admin,Cadre,0,1,0,0,1
9,33,Male,Tax,Non-Cadre,1,0,0,1,0


In [7]:
from sklearn.preprocessing import OneHotEncoder