# BITS F312 - Neural Network and Fuzzy Logic



## Assignment 1

In [None]:
# importing libraries required
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
# connecting gdrive to access the datasets
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# finding out current working directory
!pwd

/content


In [None]:
# changing directory to - 'drive/MyDrive/NNFL/Data_A1/'
%cd drive/MyDrive/NNFL/Data_A1/

/content/drive/MyDrive/NNFL/Data_A1


In [None]:
# defining plotting style
plt.style.use('ggplot')
plt.rcParams["figure.figsize"] = (14, 14)

# Q6
Implement multiclass LOR, multiclass LOR with L2-norm regularization, and
multiclass LOR with L1-norm regularization models using BGD, SGD, and MBGD
algorithms. The multiclass extension of the LOR models must be done using One vs. one and one vs. All coding algorithms. The dataset in data_q6_q7.txt contains 7 features and one output. The output is classified as class 1, class2, or class 3. You must use hold-out cross-validation ((CV) with 70% as training, 10% as validation and 20% as testing) for the evaluation of training, validation, and testing instances for each model. Evaluate the performance of each model using individual accuracy and overall accuracy measures.


## Implementing one vs. one

In [None]:
def sigmoid(X):
	return 1.0/(1+np.exp(-X))

def logisticRegressionLossFunction(Y,Y_Pred):
	loss = -(Y*np.log(Y_Pred))
	loss = loss - (1-Y)*np.log(1-Y_Pred)
	return loss

def assignLabel(a, b, label):
	if (label==b):
		return 1
	elif (label==a):
		return 0
	return -1


def predict(W, X, threshold = 0.5):
	Z = x.dot(W)
	Y_Pred = sigmoid(Z)
	if(Y_Pred>=threshold):return 1
	return 0



## Output

In [None]:
print("One v One BGD L1")
print('------------------------------------------------------------------------------------------------------------')
EPOCH = 500
ALPHA = 0.1
LAMBD = 0.001

df = pd.read_csv('data_q6_q7.txt', sep="\t", header = None, engine='python')

data = np.array(df)
X = data[:,:-1]
X = X - np.mean(X,axis=0)
X = X/(np.std(X,axis=0))

bias = np.ones((X.shape[0],1))
X = np.hstack((X[:,:-1],bias,X[:,-1:]))
Y = data[:,-1]
Y = Y - 1
categories = 3
idx = np.array(range(0, X.shape[0]))
train_val = math.floor(0.7 * X.shape[0])
test_val = math.floor(0.2 * X.shape[0])

np.random.shuffle(idx)
train_range = idx[ :train_val]
test_range = idx[train_val:test_val+train_val ]
val_range = idx[test_val+train_val:]
X_Train, X_Test, X_Val = X[train_range], X[test_range], X[val_range]
Y_Train, Y_Test, Y_Val = Y[train_range], Y[test_range], Y[val_range]
 


weights = []
losses = []

for i in range(categories):
	row = []
	for j in range(categories):
		row.append(np.random.randn(X.shape[1]))
	weights.append(row)

plot_labels = []
for i in range(categories):
	for j in range(i+1, categories):
		w = weights[i][j]
		temp_Y = [assignLabel(i, j, label) for label in Y_Train]
		temp = [ Y>=0 for Y in temp_Y]
		temp_Y = np.array(temp_Y)
		temp_Y = temp_Y[temp]
		temp_X = X_Train[temp]
		row = []
		plot_labels.append("{0} vs {1}".format(i,j))
		for e in range(EPOCH):
			z = temp_X.dot(w)
			Y_Pred = sigmoid(z)
			loss = logisticRegressionLossFunction(temp_Y,Y_Pred)
			epoch_loss = np.sum(loss)/temp_Y.shape[0]
			diff = Y_Pred-temp_Y
			loss_der = diff.T.dot(temp_X)/temp_X.shape[0]
			row.append(epoch_loss)
			w = w - ALPHA*loss_der - ALPHA*LAMBD*np.sign(w)
			weights[i][j] = w
		losses.append(row)

correct = 0
Y_category = list(Y_Test)
Y_Dist = [Y_category.count(Y) for Y in set(Y_category)]
YDistCorrect = [0]*categories
print('------------------------------------------------------------------------------------------------------------')
print("For testing set")
print('------------------------------------------------------------------------------------------------------------')

for k in range(len(X_Test)):
	x =  X_Test[k]
	votes = [0]*categories
	for i in range(categories):
		for j in range(i+1,categories):
			if (predict(weights[i][j],x)==1): votes[j]+=1
			else: votes[i]+=1
	Y_pred = votes.index(max(votes))
	if (Y_pred==Y_Test[k]):
		YDistCorrect[Y_pred]+=1
		correct+=1


for i in range(categories):
	class_accuracy= YDistCorrect[i]*100/Y_Dist[i]
	print("Class accuracy of {0} : {1}%".format(i+1,round(class_accuracy)))

print("Out of {0}, {1} were predicted correctly, so the overall accuracy of the model is {2}%".format(len(X_Test),correct,correct*100/len(X_Test)))
print('------------------------------------------------------------------------------------------------------------')
print("For validation set")
print('------------------------------------------------------------------------------------------------------------')

correct=0
Y_category= list(Y_Val)
Y_Dist= [Y_category.count(Y) for Y in set(Y_category)]
YDistCorrect=[0]*categories

for k in range(len(X_Val)):
	x = X_Val[k]
	votes = [0]*categories
	for i in range(categories):
		for j in range(i+1,categories):
			if (predict(weights[i][j],x)==1): votes[j]+=1
			else: votes[i]+=1
	Y_pred = votes.index(max(votes))

	if (Y_pred==Y_Val[k]):
		YDistCorrect[Y_pred]+=1
		correct+=1


for i in range(categories):
	class_accuracy= YDistCorrect[i]*100/Y_Dist[i]
	print("Class accuracy of {0} : {1}%".format(i+1,round(class_accuracy)))

print("Out of {0}, {1} were predicted correctly, so the overall accuracy of the model is {2}%".format(len(X_Val),correct,correct*100/len(X_Val)))

print('------------------------------------------------------------------------------------------------------------')
print("For training set")
print('------------------------------------------------------------------------------------------------------------')

correct=0
Y_category= list(Y_Train)
Y_Dist = [Y_category.count(Y) for Y in set(Y_category)]
YDistCorrect = [0]*categories

for k in range(len(X_Train)):
	x = X_Train[k]
	votes = [0]*categories
	for i in range(categories):
		for j in range(i+1,categories):
			if (predict(weights[i][j],x)==1): votes[j]+=1
			else: votes[i]+=1
	Y_pred = votes.index(max(votes))

	if (Y_pred==Y_Train[k]):
		YDistCorrect[Y_pred]+=1
		correct+=1


for i in range(categories):
	class_accuracy= YDistCorrect[i]*100/Y_Dist[i]
	print("Class accuracy of {0} : {1}%".format(i+1,round(class_accuracy)))

print("Out of {0}, {1} were predicted correctly, so the overall accuracy of the model is {2}%".format(len(X_Train),correct,correct*100/len(X_Train)))


One v One BGD L1
------------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------------
For testing set
------------------------------------------------------------------------------------------------------------
Class accuracy of 1 : 93%
Class accuracy of 2 : 100%
Class accuracy of 3 : 95%
Out of 42, 40 were predicted correctly, so the overall accuracy of the model is 95.23809523809524%
------------------------------------------------------------------------------------------------------------
For validation set
------------------------------------------------------------------------------------------------------------
Class accuracy of 1 : 88%
Class accuracy of 2 : 100%
Class accuracy of 3 : 100%
Out of 21, 20 were predicted correctly, so the overall accuracy of the model is 95.23809523809524%
----------------------------------------------

In [None]:
print("One v One BGD L2")
print('------------------------------------------------------------------------------------------------------------')
EPOCH = 500
ALPHA = 0.1
LAMBD = 0.001

df = pd.read_csv('data_q6_q7.txt', sep="\t", header = None, engine='python')

data = np.array(df)
X = data[:,:-1]
X = X - np.mean(X,axis=0)
X = X/(np.std(X,axis=0))

bias = np.ones((X.shape[0],1))
X = np.hstack((X[:,:-1],bias,X[:,-1:]))
Y = data[:,-1]
Y = Y - 1
categories = 3
idx = np.array(range(0, X.shape[0]))
train_val = math.floor(0.7 * X.shape[0])
test_val = math.floor(0.2 * X.shape[0])

np.random.shuffle(idx)
train_range = idx[ :train_val]
test_range = idx[train_val:test_val+train_val ]
val_range = idx[test_val+train_val:]
X_Train, X_Test, X_Val = X[train_range], X[test_range], X[val_range]
Y_Train, Y_Test, Y_Val = Y[train_range], Y[test_range], Y[val_range]
 


weights = []
losses = []

for i in range(categories):
	row = []
	for j in range(categories):
		row.append(np.random.randn(X.shape[1]))
	weights.append(row)

plot_labels = []
for i in range(categories):
	for j in range(i+1, categories):
		w = weights[i][j]
		temp_Y = [assignLabel(i, j, label) for label in Y_Train]
		temp = [ Y>=0 for Y in temp_Y]
		temp_Y = np.array(temp_Y)
		temp_Y = temp_Y[temp]
		temp_X = X_Train[temp]
		row = []
		plot_labels.append("{0} vs {1}".format(i,j))
		for e in range(EPOCH):
			z = temp_X.dot(w)
			Y_Pred = sigmoid(z)
			loss = logisticRegressionLossFunction(temp_Y,Y_Pred)
			epoch_loss = np.sum(loss)/temp_Y.shape[0]
			diff = Y_Pred-temp_Y
			loss_der = diff.T.dot(temp_X)/temp_X.shape[0]
			row.append(epoch_loss)
			w = w - ALPHA*loss_der - ALPHA*LAMBD*w
			weights[i][j] = w
		losses.append(row)

correct = 0
Y_category = list(Y_Test)
Y_Dist = [Y_category.count(Y) for Y in set(Y_category)]
YDistCorrect = [0]*categories
print('------------------------------------------------------------------------------------------------------------')
print("For testing set")
print('------------------------------------------------------------------------------------------------------------')

for k in range(len(X_Test)):
	x =  X_Test[k]
	votes = [0]*categories
	for i in range(categories):
		for j in range(i+1,categories):
			if (predict(weights[i][j],x)==1): votes[j]+=1
			else: votes[i]+=1
	Y_pred = votes.index(max(votes))
	if (Y_pred==Y_Test[k]):
		YDistCorrect[Y_pred]+=1
		correct+=1


for i in range(categories):
	class_accuracy= YDistCorrect[i]*100/Y_Dist[i]
	print("Class accuracy of {0} : {1}%".format(i+1,round(class_accuracy)))

print("Out of {0}, {1} were predicted correctly, so the overall accuracy of the model is {2}%".format(len(X_Test),correct,correct*100/len(X_Test)))
print('------------------------------------------------------------------------------------------------------------')
print("For validation set")
print('------------------------------------------------------------------------------------------------------------')

correct=0
Y_category= list(Y_Val)
Y_Dist= [Y_category.count(Y) for Y in set(Y_category)]
YDistCorrect=[0]*categories

for k in range(len(X_Val)):
	x = X_Val[k]
	votes = [0]*categories
	for i in range(categories):
		for j in range(i+1,categories):
			if (predict(weights[i][j],x)==1): votes[j]+=1
			else: votes[i]+=1
	Y_pred = votes.index(max(votes))

	if (Y_pred==Y_Val[k]):
		YDistCorrect[Y_pred]+=1
		correct+=1


for i in range(categories):
	class_accuracy= YDistCorrect[i]*100/Y_Dist[i]
	print("Class accuracy of {0} : {1}%".format(i+1,round(class_accuracy)))

print("Out of {0}, {1} were predicted correctly, so the overall accuracy of the model is {2}%".format(len(X_Val),correct,correct*100/len(X_Val)))

print('------------------------------------------------------------------------------------------------------------')
print("For training set")
print('------------------------------------------------------------------------------------------------------------')

correct=0
Y_category= list(Y_Train)
Y_Dist = [Y_category.count(Y) for Y in set(Y_category)]
YDistCorrect = [0]*categories

for k in range(len(X_Train)):
	x = X_Train[k]
	votes = [0]*categories
	for i in range(categories):
		for j in range(i+1,categories):
			if (predict(weights[i][j],x)==1): votes[j]+=1
			else: votes[i]+=1
	Y_pred = votes.index(max(votes))

	if (Y_pred==Y_Train[k]):
		YDistCorrect[Y_pred]+=1
		correct+=1


for i in range(categories):
	class_accuracy= YDistCorrect[i]*100/Y_Dist[i]
	print("Class accuracy of {0} : {1}%".format(i+1,round(class_accuracy)))

print("Out of {0}, {1} were predicted correctly, so the overall accuracy of the model is {2}%".format(len(X_Train),correct,correct*100/len(X_Train)))


One v One BGD L2
------------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------------
For testing set
------------------------------------------------------------------------------------------------------------
Class accuracy of 1 : 83%
Class accuracy of 2 : 93%
Class accuracy of 3 : 100%
Out of 42, 38 were predicted correctly, so the overall accuracy of the model is 90.47619047619048%
------------------------------------------------------------------------------------------------------------
For validation set
------------------------------------------------------------------------------------------------------------
Class accuracy of 1 : 100%
Class accuracy of 2 : 88%
Class accuracy of 3 : 100%
Out of 21, 20 were predicted correctly, so the overall accuracy of the model is 95.23809523809524%
----------------------------------------------

In [None]:
print("One v One SGD L2")
print('------------------------------------------------------------------------------------------------------------')
EPOCH = 500

ALPHA = 0.1
LAMBD = 0.001

df = pd.read_csv('data_q6_q7.txt', sep="\t", header = None, engine='python')

data = np.array(df)
X = data[:,:-1]
X = X - np.mean(X,axis=0)
X = X/(np.std(X,axis=0))

bias = np.ones((X.shape[0],1))
X = np.hstack((X[:,:-1],bias,X[:,-1:]))
Y = data[:,-1]
Y = Y - 1
categories = 3
idx = np.array(range(0, X.shape[0]))
train_val = math.floor(0.7 * X.shape[0])
test_val = math.floor(0.2 * X.shape[0])

np.random.shuffle(idx)
train_range = idx[ :train_val]
test_range = idx[train_val:test_val+train_val ]
val_range = idx[test_val+train_val:]
X_Train, X_Test, X_Val = X[train_range], X[test_range], X[val_range]
Y_Train, Y_Test, Y_Val = Y[train_range], Y[test_range], Y[val_range]
 


weights = []
losses = []

for i in range(categories):
	row = []
	for j in range(categories):
		row.append(np.random.randn(X.shape[1]))
	weights.append(row)

plot_labels = []
for i in range(categories):
	for j in range(i+1, categories):
		w = weights[i][j]
		temp_Y = [assignLabel(i, j, label) for label in Y_Train]
		temp = [ Y>=0 for Y in temp_Y]
		temp_Y = np.array(temp_Y)
		temp_Y = temp_Y[temp]
		temp_X = X_Train[temp]
		row = []
		plot_labels.append("{0} vs {1}".format(i,j))
		for e in range(EPOCH):
			idx = np.random.randint(0,x.shape[0])
			z = temp_X[idx].dot(w)
			Y_Pred = sigmoid(z)
			loss = logisticRegressionLossFunction(temp_Y[idx],Y_Pred)
			epoch_loss = np.sum(loss)/1
			diff = Y_Pred-temp_Y[idx]
			loss_der = diff.T*(temp_X[idx])/1
			row.append(epoch_loss)
			w = w - ALPHA*loss_der - ALPHA*LAMBD*w
			weights[i][j] = w
		losses.append(row)

correct = 0
Y_category = list(Y_Test)
Y_Dist = [Y_category.count(Y) for Y in set(Y_category)]
YDistCorrect = [0]*categories
print('------------------------------------------------------------------------------------------------------------')
print("For testing set")
print('------------------------------------------------------------------------------------------------------------')

for k in range(len(X_Test)):
	x =  X_Test[k]
	votes = [0]*categories
	for i in range(categories):
		for j in range(i+1,categories):
			if (predict(weights[i][j],x)==1): votes[j]+=1
			else: votes[i]+=1
	Y_pred = votes.index(max(votes))
	if (Y_pred==Y_Test[k]):
		YDistCorrect[Y_pred]+=1
		correct+=1


for i in range(categories):
	class_accuracy= YDistCorrect[i]*100/Y_Dist[i]
	print("Class accuracy of {0} : {1}%".format(i+1,round(class_accuracy)))

print("Out of {0}, {1} were predicted correctly, so the overall accuracy of the model is {2}%".format(len(X_Test),correct,correct*100/len(X_Test)))
print('------------------------------------------------------------------------------------------------------------')
print("For validation set")
print('------------------------------------------------------------------------------------------------------------')

correct=0
Y_category= list(Y_Val)
Y_Dist= [Y_category.count(Y) for Y in set(Y_category)]
YDistCorrect=[0]*categories

for k in range(len(X_Val)):
	x = X_Val[k]
	votes = [0]*categories
	for i in range(categories):
		for j in range(i+1,categories):
			if (predict(weights[i][j],x)==1): votes[j]+=1
			else: votes[i]+=1
	Y_pred = votes.index(max(votes))

	if (Y_pred==Y_Val[k]):
		YDistCorrect[Y_pred]+=1
		correct+=1


for i in range(categories):
	class_accuracy= YDistCorrect[i]*100/Y_Dist[i]
	print("Class accuracy of {0} : {1}%".format(i+1,round(class_accuracy)))

print("Out of {0}, {1} were predicted correctly, so the overall accuracy of the model is {2}%".format(len(X_Val),correct,correct*100/len(X_Val)))

print('------------------------------------------------------------------------------------------------------------')
print("For training set")
print('------------------------------------------------------------------------------------------------------------')

correct=0
Y_category= list(Y_Train)
Y_Dist = [Y_category.count(Y) for Y in set(Y_category)]
YDistCorrect = [0]*categories

for k in range(len(X_Train)):
	x = X_Train[k]
	votes = [0]*categories
	for i in range(categories):
		for j in range(i+1,categories):
			if (predict(weights[i][j],x)==1): votes[j]+=1
			else: votes[i]+=1
	Y_pred = votes.index(max(votes))

	if (Y_pred==Y_Train[k]):
		YDistCorrect[Y_pred]+=1
		correct+=1


for i in range(categories):
	class_accuracy= YDistCorrect[i]*100/Y_Dist[i]
	print("Class accuracy of {0} : {1}%".format(i+1,round(class_accuracy)))

print("Out of {0}, {1} were predicted correctly, so the overall accuracy of the model is {2}%".format(len(X_Train),correct,correct*100/len(X_Train)))


One v One SGD L2
------------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------------
For testing set
------------------------------------------------------------------------------------------------------------
Class accuracy of 1 : 90%
Class accuracy of 2 : 95%
Class accuracy of 3 : 100%
Out of 42, 40 were predicted correctly, so the overall accuracy of the model is 95.23809523809524%
------------------------------------------------------------------------------------------------------------
For validation set
------------------------------------------------------------------------------------------------------------
Class accuracy of 1 : 100%
Class accuracy of 2 : 100%
Class accuracy of 3 : 83%
Out of 21, 20 were predicted correctly, so the overall accuracy of the model is 95.23809523809524%
----------------------------------------------

In [None]:
print("One v One SGD L1")
print('------------------------------------------------------------------------------------------------------------')
EPOCH = 500

ALPHA = 0.1
LAMBD = 0.001

df = pd.read_csv('data_q6_q7.txt', sep="\t", header = None, engine='python')

data = np.array(df)
X = data[:,:-1]
X = X - np.mean(X,axis=0)
X = X/(np.std(X,axis=0))

bias = np.ones((X.shape[0],1))
X = np.hstack((X[:,:-1],bias,X[:,-1:]))
Y = data[:,-1]
Y = Y - 1
categories = 3
idx = np.array(range(0, X.shape[0]))
train_val = math.floor(0.7 * X.shape[0])
test_val = math.floor(0.2 * X.shape[0])

np.random.shuffle(idx)
train_range = idx[ :train_val]
test_range = idx[train_val:test_val+train_val ]
val_range = idx[test_val+train_val:]
X_Train, X_Test, X_Val = X[train_range], X[test_range], X[val_range]
Y_Train, Y_Test, Y_Val = Y[train_range], Y[test_range], Y[val_range]
 


weights = []
losses = []

for i in range(categories):
	row = []
	for j in range(categories):
		row.append(np.random.randn(X.shape[1]))
	weights.append(row)

plot_labels = []
for i in range(categories):
	for j in range(i+1, categories):
		w = weights[i][j]
		temp_Y = [assignLabel(i, j, label) for label in Y_Train]
		temp = [ Y>=0 for Y in temp_Y]
		temp_Y = np.array(temp_Y)
		temp_Y = temp_Y[temp]
		temp_X = X_Train[temp]
		row = []
		plot_labels.append("{0} vs {1}".format(i,j))
		for e in range(EPOCH):
			idx = np.random.randint(0,x.shape[0])
			z = temp_X[idx].dot(w)
			Y_Pred = sigmoid(z)
			loss = logisticRegressionLossFunction(temp_Y[idx],Y_Pred)
			epoch_loss = np.sum(loss)/1
			diff = Y_Pred-temp_Y[idx]
			loss_der = diff.T*(temp_X[idx])/1
			row.append(epoch_loss)
			w = w - ALPHA*loss_der - ALPHA*LAMBD*np.sign(w)
			weights[i][j] = w
		losses.append(row)

correct = 0
Y_category = list(Y_Test)
Y_Dist = [Y_category.count(Y) for Y in set(Y_category)]
YDistCorrect = [0]*categories
print('------------------------------------------------------------------------------------------------------------')
print("For testing set")
print('------------------------------------------------------------------------------------------------------------')

for k in range(len(X_Test)):
	x =  X_Test[k]
	votes = [0]*categories
	for i in range(categories):
		for j in range(i+1,categories):
			if (predict(weights[i][j],x)==1): votes[j]+=1
			else: votes[i]+=1
	Y_pred = votes.index(max(votes))
	if (Y_pred==Y_Test[k]):
		YDistCorrect[Y_pred]+=1
		correct+=1


for i in range(categories):
	class_accuracy= YDistCorrect[i]*100/Y_Dist[i]
	print("Class accuracy of {0} : {1}%".format(i+1,round(class_accuracy)))

print("Out of {0}, {1} were predicted correctly, so the overall accuracy of the model is {2}%".format(len(X_Test),correct,correct*100/len(X_Test)))
print('------------------------------------------------------------------------------------------------------------')
print("For validation set")
print('------------------------------------------------------------------------------------------------------------')

correct=0
Y_category= list(Y_Val)
Y_Dist= [Y_category.count(Y) for Y in set(Y_category)]
YDistCorrect=[0]*categories

for k in range(len(X_Val)):
	x = X_Val[k]
	votes = [0]*categories
	for i in range(categories):
		for j in range(i+1,categories):
			if (predict(weights[i][j],x)==1): votes[j]+=1
			else: votes[i]+=1
	Y_pred = votes.index(max(votes))

	if (Y_pred==Y_Val[k]):
		YDistCorrect[Y_pred]+=1
		correct+=1


for i in range(categories):
	class_accuracy= YDistCorrect[i]*100/Y_Dist[i]
	print("Class accuracy of {0} : {1}%".format(i+1,round(class_accuracy)))

print("Out of {0}, {1} were predicted correctly, so the overall accuracy of the model is {2}%".format(len(X_Val),correct,correct*100/len(X_Val)))

print('------------------------------------------------------------------------------------------------------------')
print("For training set")
print('------------------------------------------------------------------------------------------------------------')

correct=0
Y_category= list(Y_Train)
Y_Dist = [Y_category.count(Y) for Y in set(Y_category)]
YDistCorrect = [0]*categories

for k in range(len(X_Train)):
	x = X_Train[k]
	votes = [0]*categories
	for i in range(categories):
		for j in range(i+1,categories):
			if (predict(weights[i][j],x)==1): votes[j]+=1
			else: votes[i]+=1
	Y_pred = votes.index(max(votes))

	if (Y_pred==Y_Train[k]):
		YDistCorrect[Y_pred]+=1
		correct+=1


for i in range(categories):
	class_accuracy= YDistCorrect[i]*100/Y_Dist[i]
	print("Class accuracy of {0} : {1}%".format(i+1,round(class_accuracy)))

print("Out of {0}, {1} were predicted correctly, so the overall accuracy of the model is {2}%".format(len(X_Train),correct,correct*100/len(X_Train)))


One v One SGD L1
------------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------------
For testing set
------------------------------------------------------------------------------------------------------------
Class accuracy of 1 : 88%
Class accuracy of 2 : 90%
Class accuracy of 3 : 100%
Out of 42, 39 were predicted correctly, so the overall accuracy of the model is 92.85714285714286%
------------------------------------------------------------------------------------------------------------
For validation set
------------------------------------------------------------------------------------------------------------
Class accuracy of 1 : 83%
Class accuracy of 2 : 100%
Class accuracy of 3 : 100%
Out of 21, 20 were predicted correctly, so the overall accuracy of the model is 95.23809523809524%
----------------------------------------------

In [None]:
print("One v One MBGD L2")
print('------------------------------------------------------------------------------------------------------------')
EPOCH = 500
ALPHA = 0.1
LAMBD = 0.001

df = pd.read_csv('data_q6_q7.txt', sep="\t", header = None, engine='python')

data = np.array(df)
X = data[:,:-1]
X = X - np.mean(X,axis=0)
X = X/(np.std(X,axis=0))

bias = np.ones((X.shape[0],1))
X = np.hstack((X[:,:-1],bias,X[:,-1:]))
Y = data[:,-1]
Y = Y - 1
categories = 3
idx = np.array(range(0, X.shape[0]))
train_val = math.floor(0.7 * X.shape[0])
test_val = math.floor(0.2 * X.shape[0])

np.random.shuffle(idx)
train_range = idx[ :train_val]
test_range = idx[train_val:test_val+train_val ]
val_range = idx[test_val+train_val:]
X_Train, X_Test, X_Val = X[train_range], X[test_range], X[val_range]
Y_Train, Y_Test, Y_Val = Y[train_range], Y[test_range], Y[val_range]
 


weights = []
losses = []

for i in range(categories):
	row = []
	for j in range(categories):
		row.append(np.random.randn(X.shape[1]))
	weights.append(row)

plot_labels = []
for i in range(categories):
	for j in range(i+1, categories):
		w = weights[i][j]
		temp_Y = [assignLabel(i, j, label) for label in Y_Train]
		temp = [ Y>=0 for Y in temp_Y]
		temp_Y = np.array(temp_Y)
		temp_Y = temp_Y[temp]
		temp_X = X_Train[temp]
		row = []
		plot_labels.append("{0} vs {1}".format(i,j))
		for e in range(EPOCH):
			idx = np.random.randint(len(temp_X), size=32)
			z = temp_X[idx].dot(w)
			Y_Pred = sigmoid(z)
			loss = logisticRegressionLossFunction(temp_Y[idx],Y_Pred)
			epoch_loss = np.sum(loss)/temp_Y[idx].shape[0]
			diff = Y_Pred-temp_Y[idx]
			loss_der = diff.T.dot(temp_X[idx])/temp_X[idx].shape[0]
			row.append(epoch_loss)
			w = w - ALPHA*loss_der - ALPHA*LAMBD*w
			weights[i][j] = w
		losses.append(row)

correct = 0
Y_category = list(Y_Test)
Y_Dist = [Y_category.count(Y) for Y in set(Y_category)]
YDistCorrect = [0]*categories
print('------------------------------------------------------------------------------------------------------------')
print("For testing set")
print('------------------------------------------------------------------------------------------------------------')

for k in range(len(X_Test)):
	x =  X_Test[k]
	votes = [0]*categories
	for i in range(categories):
		for j in range(i+1,categories):
			if (predict(weights[i][j],x)==1): votes[j]+=1
			else: votes[i]+=1
	Y_pred = votes.index(max(votes))
	if (Y_pred==Y_Test[k]):
		YDistCorrect[Y_pred]+=1
		correct+=1


for i in range(categories):
	class_accuracy= YDistCorrect[i]*100/Y_Dist[i]
	print("Class accuracy of {0} : {1}%".format(i+1,round(class_accuracy)))

print("Out of {0}, {1} were predicted correctly, so the overall accuracy of the model is {2}%".format(len(X_Test),correct,correct*100/len(X_Test)))
print('------------------------------------------------------------------------------------------------------------')
print("For validation set")
print('------------------------------------------------------------------------------------------------------------')

correct=0
Y_category= list(Y_Val)
Y_Dist= [Y_category.count(Y) for Y in set(Y_category)]
YDistCorrect=[0]*categories

for k in range(len(X_Val)):
	x = X_Val[k]
	votes = [0]*categories
	for i in range(categories):
		for j in range(i+1,categories):
			if (predict(weights[i][j],x)==1): votes[j]+=1
			else: votes[i]+=1
	Y_pred = votes.index(max(votes))

	if (Y_pred==Y_Val[k]):
		YDistCorrect[Y_pred]+=1
		correct+=1


for i in range(categories):
	class_accuracy= YDistCorrect[i]*100/Y_Dist[i]
	print("Class accuracy of {0} : {1}%".format(i+1,round(class_accuracy)))

print("Out of {0}, {1} were predicted correctly, so the overall accuracy of the model is {2}%".format(len(X_Val),correct,correct*100/len(X_Val)))

print('------------------------------------------------------------------------------------------------------------')
print("For training set")
print('------------------------------------------------------------------------------------------------------------')

correct=0
Y_category= list(Y_Train)
Y_Dist = [Y_category.count(Y) for Y in set(Y_category)]
YDistCorrect = [0]*categories

for k in range(len(X_Train)):
	x = X_Train[k]
	votes = [0]*categories
	for i in range(categories):
		for j in range(i+1,categories):
			if (predict(weights[i][j],x)==1): votes[j]+=1
			else: votes[i]+=1
	Y_pred = votes.index(max(votes))

	if (Y_pred==Y_Train[k]):
		YDistCorrect[Y_pred]+=1
		correct+=1


for i in range(categories):
	class_accuracy= YDistCorrect[i]*100/Y_Dist[i]
	print("Class accuracy of {0} : {1}%".format(i+1,round(class_accuracy)))

print("Out of {0}, {1} were predicted correctly, so the overall accuracy of the model is {2}%".format(len(X_Train),correct,correct*100/len(X_Train)))


One v One MBGD L2
------------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------------
For testing set
------------------------------------------------------------------------------------------------------------
Class accuracy of 1 : 92%
Class accuracy of 2 : 93%
Class accuracy of 3 : 93%
Out of 42, 39 were predicted correctly, so the overall accuracy of the model is 92.85714285714286%
------------------------------------------------------------------------------------------------------------
For validation set
------------------------------------------------------------------------------------------------------------
Class accuracy of 1 : 100%
Class accuracy of 2 : 100%
Class accuracy of 3 : 100%
Out of 21, 21 were predicted correctly, so the overall accuracy of the model is 100.0%
---------------------------------------------------------

In [None]:
print("One v One MBGD L1")
print('------------------------------------------------------------------------------------------------------------')
EPOCH = 500
ALPHA = 0.1
LAMBD = 0.001

df = pd.read_csv('data_q6_q7.txt', sep="\t", header = None, engine='python')

data = np.array(df)
X = data[:,:-1]
X = X - np.mean(X,axis=0)
X = X/(np.std(X,axis=0))

bias = np.ones((X.shape[0],1))
X = np.hstack((X[:,:-1],bias,X[:,-1:]))
Y = data[:,-1]
Y = Y - 1
categories = 3
idx = np.array(range(0, X.shape[0]))
train_val = math.floor(0.7 * X.shape[0])
test_val = math.floor(0.2 * X.shape[0])

np.random.shuffle(idx)
train_range = idx[ :train_val]
test_range = idx[train_val:test_val+train_val ]
val_range = idx[test_val+train_val:]
X_Train, X_Test, X_Val = X[train_range], X[test_range], X[val_range]
Y_Train, Y_Test, Y_Val = Y[train_range], Y[test_range], Y[val_range]
 


weights = []
losses = []

for i in range(categories):
	row = []
	for j in range(categories):
		row.append(np.random.randn(X.shape[1]))
	weights.append(row)

plot_labels = []
for i in range(categories):
	for j in range(i+1, categories):
		w = weights[i][j]
		temp_Y = [assignLabel(i, j, label) for label in Y_Train]
		temp = [ Y>=0 for Y in temp_Y]
		temp_Y = np.array(temp_Y)
		temp_Y = temp_Y[temp]
		temp_X = X_Train[temp]
		row = []
		plot_labels.append("{0} vs {1}".format(i,j))
		for e in range(EPOCH):
			idx = np.random.randint(len(temp_X), size=32)
			z = temp_X[idx].dot(w)
			Y_Pred = sigmoid(z)
			loss = logisticRegressionLossFunction(temp_Y[idx],Y_Pred)
			epoch_loss = np.sum(loss)/temp_Y[idx].shape[0]
			diff = Y_Pred-temp_Y[idx]
			loss_der = diff.T.dot(temp_X[idx])/temp_X[idx].shape[0]
			row.append(epoch_loss)
			w = w - ALPHA*loss_der - ALPHA*LAMBD*np.sign(w)
			weights[i][j] = w
		losses.append(row)

correct = 0
Y_category = list(Y_Test)
Y_Dist = [Y_category.count(Y) for Y in set(Y_category)]
YDistCorrect = [0]*categories
print('------------------------------------------------------------------------------------------------------------')
print("For testing set")
print('------------------------------------------------------------------------------------------------------------')

for k in range(len(X_Test)):
	x =  X_Test[k]
	votes = [0]*categories
	for i in range(categories):
		for j in range(i+1,categories):
			if (predict(weights[i][j],x)==1): votes[j]+=1
			else: votes[i]+=1
	Y_pred = votes.index(max(votes))
	if (Y_pred==Y_Test[k]):
		YDistCorrect[Y_pred]+=1
		correct+=1


for i in range(categories):
	class_accuracy= YDistCorrect[i]*100/Y_Dist[i]
	print("Class accuracy of {0} : {1}%".format(i+1,round(class_accuracy)))

print("Out of {0}, {1} were predicted correctly, so the overall accuracy of the model is {2}%".format(len(X_Test),correct,correct*100/len(X_Test)))
print('------------------------------------------------------------------------------------------------------------')
print("For validation set")
print('------------------------------------------------------------------------------------------------------------')

correct=0
Y_category= list(Y_Val)
Y_Dist= [Y_category.count(Y) for Y in set(Y_category)]
YDistCorrect=[0]*categories

for k in range(len(X_Val)):
	x = X_Val[k]
	votes = [0]*categories
	for i in range(categories):
		for j in range(i+1,categories):
			if (predict(weights[i][j],x)==1): votes[j]+=1
			else: votes[i]+=1
	Y_pred = votes.index(max(votes))

	if (Y_pred==Y_Val[k]):
		YDistCorrect[Y_pred]+=1
		correct+=1


for i in range(categories):
	class_accuracy= YDistCorrect[i]*100/Y_Dist[i]
	print("Class accuracy of {0} : {1}%".format(i+1,round(class_accuracy)))

print("Out of {0}, {1} were predicted correctly, so the overall accuracy of the model is {2}%".format(len(X_Val),correct,correct*100/len(X_Val)))

print('------------------------------------------------------------------------------------------------------------')
print("For training set")
print('------------------------------------------------------------------------------------------------------------')

correct=0
Y_category= list(Y_Train)
Y_Dist = [Y_category.count(Y) for Y in set(Y_category)]
YDistCorrect = [0]*categories

for k in range(len(X_Train)):
	x = X_Train[k]
	votes = [0]*categories
	for i in range(categories):
		for j in range(i+1,categories):
			if (predict(weights[i][j],x)==1): votes[j]+=1
			else: votes[i]+=1
	Y_pred = votes.index(max(votes))

	if (Y_pred==Y_Train[k]):
		YDistCorrect[Y_pred]+=1
		correct+=1


for i in range(categories):
	class_accuracy= YDistCorrect[i]*100/Y_Dist[i]
	print("Class accuracy of {0} : {1}%".format(i+1,round(class_accuracy)))

print("Out of {0}, {1} were predicted correctly, so the overall accuracy of the model is {2}%".format(len(X_Train),correct,correct*100/len(X_Train)))


One v One MBGD L1
------------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------------
For testing set
------------------------------------------------------------------------------------------------------------
Class accuracy of 1 : 100%
Class accuracy of 2 : 100%
Class accuracy of 3 : 100%
Out of 42, 42 were predicted correctly, so the overall accuracy of the model is 100.0%
------------------------------------------------------------------------------------------------------------
For validation set
------------------------------------------------------------------------------------------------------------
Class accuracy of 1 : 86%
Class accuracy of 2 : 100%
Class accuracy of 3 : 100%
Out of 21, 20 were predicted correctly, so the overall accuracy of the model is 95.23809523809524%
-------------------------------------------------------