In [None]:
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

In [None]:
# Making datasets
X, y = make_classification(n_samples = 5000, n_features = 5, n_redundant = 2, n_classes = 2, weights = [0.7], class_sep = 0.7, random_state = 15)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, stratify = y)
X_train, X_cv, y_train, y_cv = train_test_split(X_train, y_train, test_size = 0.20, stratify = y_train)

# Task E

In [None]:
clf = SVC(gamma = 0.001, C = 100) # Initializing classifier
clf.fit(X_train, y_train) # Fitting classifier with the data

SVC(C=100, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.001, kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [None]:
import math
from numpy import linalg as la

In [None]:
def decision_function(X_cv_data, gamma):
  ''' This function computes the decision value'''
  decision_value = []
  for cv_data in X_cv_data: # Iterating through each data point
      sum = 0
      for i in range(len(clf.support_vectors_)):
          l2_norm_squared = (la.norm(clf.support_vectors_[i] - cv_data) ** 2) # Calculating l2 normalized value
          kernel_value = math.exp(-gamma * l2_norm_squared) # Calculating kernel value
          sum += (clf.dual_coef_[0][i] * kernel_value)
      sum += clf.intercept_
      decision_value.extend(sum)
  return np.array(decision_value)

In [None]:
fcv = decision_function(X_cv, 0.001)

In [None]:
print(fcv[:5]) # Printing decision values from our own implemented decision function

[-0.37355744 -2.7294127  -2.07524724 -0.55667951 -2.68346752]


In [None]:
print(clf.decision_function(X_cv[:5])) # Printing decision values from sklearn implemented decision function

[-0.37355744 -2.7294127  -2.07524724 -0.55667951 -2.68346752]


# Task F

In [None]:
train_positive = 0
train_negative = 0
for ele in y_train:
  if ele == 1:
    train_positive += 1 # Counting number of positive points
  else:
    train_negative += 1 # Counting number of negative points

In [None]:
print('Number of positive data points', train_positive)
print('Number of negative data points', train_negative)

Number of positive data points 969
Number of negative data points 2231


In [None]:
# Getting converted class label so that we can apply it in Platt Scaling
y_positive = (train_positive + 1) / (train_positive + 2)
y_negative = 1 / (train_negative + 2)

In [None]:
print('y_positive', y_positive)
print('y_negative', y_negative)

y_positive 0.9989701338825953
y_negative 0.0004478280340349306


In [None]:
# Getting a new list for the label of the train data which contains all the modified class label values according to indices
Y_train = []
for ele in y_train:
  if ele == 1:
    Y_train.append(y_positive)
  else:
    Y_train.append(y_negative)
Y_train = np.array(Y_train)

In [None]:
# Getting a new list for the label of the test data which contains all the modified class label values according to indices
Y_test = []
for ele in y_test:
  if ele == 1:
    Y_test.append(y_positive)
  else:
    Y_test.append(y_negative)
Y_test = np.array(Y_test)

In [None]:
w = np.zeros_like(X_train[0]) #Initializing weight vector
b = 0  #Initializing intercept

In [None]:
N = len(X_train)
learning_rate = 0.0001  #learning rate
reg_constant = 0.0001   #regularization constant
tol = 1e-3              #The stopping criterion

In [None]:
def log_loss(X_data, y_data, y, weight, intercept):
    '''
    This function computes the log loss
    '''
    log_loss = 0
    for i in range(0, len(X_data)):
        if y[i] == 1:
          log_loss += y_data[i] * np.log10(sigmoid(weight, X_data[i], intercept))
        else:
          log_loss += y_data[i] * np.log10(1 - sigmoid(weight, X_data[i], intercept))
    return -(log_loss) / len(X_data)

In [None]:
def sigmoid(weight, data_point, intercept):
    '''
    This function computes the sigmoid(x)
    '''
    return 1 / (1 + np.exp(-(np.dot(weight, data_point) + intercept)))

In [None]:
import random
def update_using_gradient_descent(X_train, y_train, Y_train, old_weight, old_intercept):
    '''
    This function updates weight and intercept using gradient descent, and computes train and test losses
    '''
    train_loss = []
    test_loss = []
    epoch_count = 0
    
    weight = old_weight
    intercept = old_intercept
    curr_loss = 0
    for j in range(0, 2):
        prev_loss = curr_loss
        for i in range(len(X_train)):
            #Updating weight
            updated_weight = (1 - (learning_rate * reg_constant) / N) * weight + reg_constant * X_train[i] * (Y_train[i] - sigmoid(weight, X_train[i], intercept))
            #Updating intercept
            updated_intercept = intercept + reg_constant * (Y_train[i] - sigmoid(weight, X_train[i], intercept))
            #Computing log loss for the train data point
            weight = updated_weight
            intercept = updated_intercept
        curr_loss = log_loss(X_train, Y_train, y_train, weight, intercept)
        train_loss.append(curr_loss)
        #Computing log loss for the test data point
        test_loss.append(log_loss(X_test, Y_test, y_test, weight, intercept))
        epoch_count += 1
    
    while(prev_loss - curr_loss > tol):
        prev_loss = curr_loss
        for i in range(len(X_train)):
            #Updating weight
            updated_weight = (1 - (learning_rate * reg_constant) / N) * weight + reg_constant * X_train[i] * (Y_train[i] - sigmoid(weight, X_train[i], intercept))
            #Updating intercept
            updated_intercept = intercept + reg_constant * (Y_train[i] - sigmoid(weight, X_train[i], intercept))
            #Computing log loss for the train data point
            weight = updated_weight
            intercept = updated_intercept 
        curr_loss = log_loss(X_train, Y_train, y_train, weight, intercept)
        train_loss.append(curr_loss)
        #Computing log loss for the test data point
        test_loss.append(log_loss(X_test, Y_test, y_test, weight, intercept))
        epoch_count += 1
        
    return train_loss, test_loss, weight, intercept, epoch_count

In [None]:
train_loss, test_loss, new_weight, new_intercept, epoch = update_using_gradient_descent(X_train, y_train, Y_train, w, b)

In [None]:
print(train_loss)
print(test_loss)
print(new_weight)
print(new_intercept)
print(epoch)

[0.08907955158557511, 0.08713018609851, 0.08529174624614042, 0.08355231588720972, 0.08190295498350453, 0.08033679298991948, 0.07884833806186146, 0.07743299565947767, 0.07608675235970862, 0.07480597724514532, 0.07358730190874273, 0.07242755075407527, 0.07132370229908636, 0.07027286887824599, 0.0692722867612919, 0.06831931177509168]
[0.08908283721850968, 0.08708576984417661, 0.08520472846826824, 0.08342735683990396, 0.08174421488087102, 0.08014793778068967, 0.07863257848598634, 0.07719313919747649, 0.07582525683681356, 0.07452500066163686, 0.07328874637335926, 0.07211310011689868, 0.07099485388570026, 0.06993096004061458, 0.06891851702710988, 0.06795476133009393]
[ 0.0088962  -0.06818712 -0.36157362 -0.45637939  1.05468689]
-0.4441695528512706
16


For a given data point from $X_{test}$, $P(Y=1|X) = \frac{1}{1+exp(-(W*f_{test}+ b))}$ where ` `$f_{test}$ ```= decision_function(```$X_{test}$```)```, W and b will be learned as metioned in the above step

In [None]:
def positive_class_probability(data_point):
  '''This function returns probability of a point belonging to the positive class'''
  
  return 1 / (1 + math.exp(-(np.dot(new_weight, data_point) + new_intercept)))

In [None]:
print(positive_class_probability(X_test[0]))

0.720033588138568


In [None]:
print(positive_class_probability(X_test[10]))

0.10934162322037293
