In [42]:
import pandas as pd

import numpy as np
from sklearn.compose import make_column_transformer 
from sklearn.compose import make_column_selector
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_auc_score,accuracy_score
import matplotlib.pyplot as plt
from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.naive_bayes import BernoulliNB
from sklearn import datasets, linear_model, metrics
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import warnings
warnings.filterwarnings('ignore')

## HR Dataset

#### Example of supervised learning

In [43]:
hr = pd.read_csv('HR_comma_sep.csv')

In [44]:
X = hr.drop('left', axis=1)
y = hr['left']

In [45]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=24, test_size=0.3, stratify=y)

In [46]:
ohe = OneHotEncoder(sparse_output=False, drop='first').set_output(transform='pandas')
scaler_mm = MinMaxScaler()
scaler_std = StandardScaler()
ct = make_column_transformer(('passthrough', make_column_selector(dtype_exclude=object)  ),
                             (ohe, make_column_selector(dtype_include=object) ),
                            verbose_feature_names_out=False).set_output(transform='pandas')



#### Linear Discriminant Analysis

In [47]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
lda = LinearDiscriminantAnalysis()
pipe = Pipeline([('CT',ct),('SCL',None),('LDA',lda)])
pipe.fit(X_train,y_train)
y_pred = pipe.predict(X_test)
print(accuracy_score(y_test,y_pred))

0.7695043342965103


#### Quadratic Discriminant Analysis

In [48]:
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
qda = QuadraticDiscriminantAnalysis()
pipe = Pipeline([('CT',ct),('SCL',None),('DA',qda)])
pipe.fit(X_train,y_train)
y_pred = pipe.predict(X_test)
print(accuracy_score(y_test,y_pred))

0.863080684596577


## Vehicles Silhouttes

#### Example of unsupervised learning (dimentionality reduction is unsupervised learning technique)

In [75]:
vehicle = pd.read_csv('Vehicle.csv')
X = vehicle.drop('Class',axis=1)
y = vehicle['Class']

In [76]:
X_train , X_test , y_train ,y_test = train_test_split(X,y,random_state=24,test_size=0.3,stratify=y)
lda = LinearDiscriminantAnalysis().set_output(transform='pandas')
lda.fit(X_train,y_train)

In [77]:
X_train_lda = lda.transform(X_train) # lda helps us to reduce number of variables (n_classes-1)
print(X_train_lda.shape)
print(X_train.shape)

(592, 3)
(592, 18)


#### Logistic Regression along with Linear Discriminant Analysis

In [78]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()
lr.fit(X_train_lda,y_train)

In [79]:
X_test_lda = lda.transform(X_test)
y_pred = lr.predict(X_test_lda)
print(accuracy_score(y_test,y_pred))

0.7992125984251969


#### Using pipeline

In [80]:
pipe = Pipeline([('DA',lda),('LR',lr)])
pipe.fit(X_train,y_train)
y_pred = pipe.predict(X_test)
print(accuracy_score(y_test,y_pred))

0.7992125984251969


#### Only Logistic Regression

In [81]:
lr = LogisticRegression()
lr.fit(X_train,y_train)
y_pred = lr.predict(X_test)
print(accuracy_score(y_test,y_pred))

0.7559055118110236


#### Quadratic Discriminant Analysis (Only Supervised)

In [86]:
qda = QuadraticDiscriminantAnalysis()
qda.fit(X_train,y_train)
y_pred = qda.predict(X_test)
print(accuracy_score(y_test,y_pred))

0.8543307086614174
