In [163]:
!pip install imblearn



In [197]:
import copy
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from sklearn.metrics import precision_score, recall_score
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from tqdm import tqdm

In [198]:
from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
room_occupancy_estimation = fetch_ucirepo(id=864) 

In [268]:
# Baseline

class BaselineClassifier():
    def __init__(self, X, y):
        self.X = X
        self.y = y
        
    def predict(self):
        return np.random.randint(0, len(np.unique(self.y)) , size=len(self.X))
    
    

In [313]:
# SVM Solver

class SVM:
    
    def __init__(self, learning_rate = 0.00001, lambda_ = 0.001, max_iterations = 100, normalize = True):
        self.normalize = normalize
        self.learning_rate = learning_rate
        self.lambda_ = lambda_
        self.max_iterations = max_iterations
     
    def add_X0(self, X):
        return np.column_stack((np.ones(X.shape[0]), X))
    
    def normalize_testdata(self, X):
        self.mean = np.mean(X, axis=0)
        self.std = np.std(X, axis=0)
        X = (X-self.mean) / self.std
        X = self.add_X0(X)
        return X
    
    def normalize_traindata(self, X):
        X = (X-self.mean) / self.std
        X = self.add_X0(X)
        return X
    
    def fit(self, X, y):
        #n = X.shape[0]
        #y = np.where(y <= 0, -1, 1)
        if self.normalize:
            X = self.normalize_testdata(X)
        self.w = np.zeros(X.shape[1])
        self.b = 0        
        for i in tqdm(range(0, self.max_iterations)):
            for idx, xi in enumerate(X):
                condition = (y[0][idx] * (np.dot(self.w, xi))) + self.b
                if condition >= 1:
                    self.w = self.w - self.learning_rate * (2 * self.lambda_ * np.array(self.w))
                else:
                    
                    self.w = self.w - self.learning_rate * ((2 * self.lambda_ * np.array(self.w)) 
                                                            - np.dot(xi, y[0][idx]))
                    self.b = self.b - (self.learning_rate * -y[0][idx])
                                 
                    
    def predict(self, X):
        if self.normalize:
            X = self.normalize_traindata(X)
        pred = np.dot(X, self.w) + self.b
        return pred

In [314]:
# MULTI-CLASS solver

class multiclassSolver:
    
    def __init__(self, X, y, model):
        self.X = X
        self.y = y
        self.model = model
    
    def fit(self):
        self.weights_array = []
        self.models_array = []
        for i in np.unique(self.y):
            print("Training for target Class", i)
            y_ = pd.DataFrame(np.where(self.y.copy() == i, 1, -1))
            self.models_array.append(copy.copy(self.model))
            weights = self.models_array[-1].fit(self.X, y_)
            #weights = [sub_arr[0] for sub_arr in weights]
            #self.weights_array.append(weights)
        print("Training completed")    
        return self.weights_array
    
    def predict(self, X):
        pred = []
        for i in range (0, len(np.unique(self.y))): 
            pred.append(self.models_array[i].predict(X))
        max_indices = np.argmax(pred, axis=0)
        return max_indices
        


Usage

In [315]:
# data (as pandas dataframes) 
X = room_occupancy_estimation.data.features 
y = room_occupancy_estimation.data.targets 

In [316]:
X = X.drop(columns = ['Date','Time'])

In [317]:
def oversample_minority_classes(X, y):
    smote = SMOTE(sampling_strategy='auto', random_state=42)
    X_resampled, y_resampled = smote.fit_resample(X, y)
    return X_resampled, y_resampled

X_resampled, y_resampled = oversample_minority_classes(X, y)

print(X_resampled.shape)

(32912, 16)


In [290]:
baseline = BaselineClassifier(X_resampled, y_resampled)

In [291]:
baseline_ = baseline.predict()

In [282]:
# Baseline model metrics

# Accuracy
accuracy = accuracy_score(y_resampled, baseline_)

# Precision
precision = precision_score(y_resampled, baseline_, average='weighted')

# Recall
recall = recall_score(y_resampled, baseline_, average='weighted')
#conf_matrix = confusion_matrix(y_resampled, baseline_)

#print("Confusion Matrix:")
#print(conf_matrix)
print("\nAccuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)


Accuracy: 0.24626276130286825
Precision: 0.24625997055719473
Recall: 0.24626276130286825


In [318]:
# Spit data
trainX, testX, trainY, testY = train_test_split(X_resampled, y_resampled, test_size = 0.20)

In [322]:
model = SVM()

In [323]:
a = multiclassSolver(trainX, trainY, model)

In [324]:
wt = a.fit()

Training for target Class 0


100%|█████████████████████████████████████████| 100/100 [00:30<00:00,  3.23it/s]


Training for target Class 1


100%|█████████████████████████████████████████| 100/100 [00:36<00:00,  2.73it/s]


Training for target Class 2


100%|█████████████████████████████████████████| 100/100 [00:39<00:00,  2.51it/s]


Training for target Class 3


100%|█████████████████████████████████████████| 100/100 [00:34<00:00,  2.87it/s]

Training completed





In [325]:
s = a.predict(testX)

In [332]:
# SVM accuracy

import numpy as np

def confusion_matrix(y_true, y_pred, num_classes):
    matrix = np.zeros((num_classes, num_classes), dtype=int)
    for true, pred in zip(y_true, y_pred):
        matrix[int(true), int(pred)] += 1
    return matrix

def precision_recall_accuracy(conf_matrix):
    num_classes = conf_matrix.shape[0]
    precision = np.zeros(num_classes)
    recall = np.zeros(num_classes)
    accuracy = np.trace(conf_matrix) / np.sum(conf_matrix)
    for i in range(num_classes):
        true_positives = conf_matrix[i, i]
        false_positives = np.sum(conf_matrix[:, i]) - true_positives
        false_negatives = np.sum(conf_matrix[i, :]) - true_positives
        precision[i] = true_positives / (true_positives + false_positives)
        recall[i] = true_positives / (true_positives + false_negatives)
    return precision, recall, accuracy


conf_matrix = confusion_matrix(np.array(testY), s, 4)
precision, recall, accuracy = precision_recall_accuracy(conf_matrix)

print("Confusion Matrix:")
print(conf_matrix)
print("Precision:", precision)
print("Recall:", recall)
print("Accuracy:", accuracy)


Confusion Matrix:
[[1633   10    0   23]
 [   0 1609    1    1]
 [   0  249  508  910]
 [  22    0   42 1575]]
Precision: [0.98670695 0.86134904 0.92196007 0.62774014]
Recall: [0.98019208 0.99875854 0.30473905 0.9609518 ]
Accuracy: 0.8089017165426098
