In [167]:
#packages
import numpy as np
import pandas as pd
import time
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.linalg import qr
from scipy.sparse import csr_matrix
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from autograd import grad, hessian
import autograd.numpy as anp
from sklearn.metrics import precision_score, recall_score, f1_score

In [168]:
def sketch_matrix(m, n_columns, non_zero_entries):
    """Generates a sketching matrix S with random ±1 entries."""
    S = np.zeros((m, n_columns))
    scaling_factor = 1 / np.sqrt(non_zero_entries)
    
    for col in range(n_columns):
        nz_positions = np.random.choice(m, non_zero_entries, replace=False)
        values = np.random.choice([1, -1], non_zero_entries) * scaling_factor
        for idx, value in zip(nz_positions, values):
            S[idx, col] = value
    
    return S

#f : function
def unconstrained_newton_sketch(f, x0, m, A, b, non_zero_entries,tolerance = 1e-6, a = 0.1, b_factor = 0.5 , max_iter=1000):
    grad_f = grad(f)  # Gradient of f
    hess_f = hessian(f)  # Hessian of f
    
    xt = x0  # Starting point
    n = len(x0)
    for t in range(max_iter):
        # Generate the sketching matrix at each iteration
        St = sketch_matrix(m, n, non_zero_entries)

        # Compute the gradient and Hessian at the current point using autograd
        grad_value = grad_f(xt, A, b)  # Gradient at xt
        hess_value = hess_f(xt, A, b)  # Hessian at xt

        #ensure it is invertible
        regularization_strength = 1e-5  # Choose a small value for regularization
        sketched_hessian = St @ hess_value @ St.T
        sketched_hessian += regularization_strength * np.eye(sketched_hessian.shape[0])
        sketched_grad = St @ grad_value
        delta_xt_sketched = np.linalg.solve(sketched_hessian, sketched_grad)
        delta_xt = St.T @ delta_xt_sketched

        # Compute the approximate Newton decrement
        lambda_t = np.dot(grad_value, delta_xt)

        # Check stopping condition
        if lambda_t**2 / 2 <= tolerance:
            break
        
        # Backtracking line search
        step_size = 1.0
        while f(xt - step_size * delta_xt, A, b) > f(xt, A, b) - a * step_size * lambda_t:
            step_size *= b_factor
        
        # Update
        xt = xt - step_size * delta_xt
    
    return xt, np.abs(lambda_t)

#function to be used
def least_squares_loss(x, A, b):
    """
    Computes the least squares loss: f(x) = ||Ax - b||_2^2
    """
    return anp.sum((A @ x - b) ** 2)

In [169]:
df = pd.read_excel("../Dataset/employability.xlsx")

In [170]:
df

Unnamed: 0,Name of Student,GENERAL APPEARANCE,MANNER OF SPEAKING,PHYSICAL CONDITION,MENTAL ALERTNESS,SELF-CONFIDENCE,ABILITY TO PRESENT IDEAS,COMMUNICATION SKILLS,Student Performance Rating,CLASS
0,Student 1,4,5,4,5,5,5,5,5,Employable
1,Student 2,4,4,4,4,4,4,3,5,Employable
2,Student 3,4,3,3,3,3,3,2,5,LessEmployable
3,Student 4,3,3,3,2,3,3,3,5,LessEmployable
4,Student 5,4,4,3,3,4,4,3,5,Employable
...,...,...,...,...,...,...,...,...,...,...
2977,Student 2996,4,3,3,3,3,3,2,5,Employable
2978,Student 2997,3,4,4,4,4,4,4,5,Employable
2979,Student 2998,4,5,4,5,4,4,4,5,Employable
2980,Student 2999,4,4,4,3,4,4,3,5,LessEmployable


In [171]:
# Drop non-numeric columns (like student names)
df = df.drop(columns=["Name of Student"])

df["CLASS"] = df["CLASS"].map({"Employable": 1, "LessEmployable": 0})

# Separate features (A) and target variable (b)
A = df.drop(columns=["CLASS"]).values  # Convert features to NumPy array
b = df["CLASS"].values  # Target variable
A = A.astype(float)
b = b.astype(float)

In [172]:
A

array([[4., 5., 4., ..., 5., 5., 5.],
       [4., 4., 4., ..., 4., 3., 5.],
       [4., 3., 3., ..., 3., 2., 5.],
       ...,
       [4., 5., 4., ..., 4., 4., 5.],
       [4., 4., 4., ..., 4., 3., 5.],
       [4., 4., 4., ..., 4., 4., 5.]])

In [173]:
b

array([1., 1., 0., ..., 1., 0., 1.])

In [174]:
x0 = np.random.randn(A.shape[1]) #columns = number of features
m = 2000  # Number of rows in sketch matrix
non_zero_entries = 10 

In [175]:
optimized_x, final_lambda = unconstrained_newton_sketch(least_squares_loss, x0, m, A, b, non_zero_entries)

In [176]:
y_pred = A @ optimized_x
y_pred = (y_pred > 0.5).astype(int)

In [177]:
precision = precision_score(b, y_pred)
recall = recall_score(b, y_pred)
f1 = f1_score(b, y_pred)
accuracy = np.mean(y_pred == b)


print(f"Model Accuracy: {accuracy * 100:.2f}%")
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

Model Accuracy: 60.26%
Precision: 0.6279397930385701
Recall: 0.7721226142278774
F1 Score: 0.6926070038910507
