In [None]:
import numpy as np


In [None]:
#Defining The Covariance Matrix
def Covariance_matrix(Sn_bar_features,Sn_bar_labels):
  n=Sn_bar_features.shape[1]
  k1=np.zeros((n,n))
  for i in range(n):
    x_i=Sn_bar_features[i].reshape(n,1)
    x_i_transpose=x_i.T
    k1=k1+x_i@x_i_transpose
  k2=k1/n
  k3=np.zeros((n,1))
  k4=np.zeros((1,n))
  for i in range(n):
    z_i=Sn_bar_features[i].reshape(n,1)
    y_i_bar=Sn_bar_labels[i]
    k3=k3+z_i*y_i_bar
    k4=k4+z_i.T*y_i_bar
  k5=(k3@k4)/(n**3)
  cov_matrix=k2-k5
  return cov_matrix



In [None]:
# gradient of the fucntion
import numpy as np

def objective_function(Sn, k,indices, w, eta, lambda_val, p,mu):
    n=len(Sn)
    d=Sn.shape[1]-1
    c=-(n-k)/(2*n)
    y=Sn[:,d]
    x=Sn[:,0:d]
    hinge_loss_sum_clean=0
    hinge_loss_sum_noisy=0

    for i in indices:
        if i < k:
            hinge_loss_sum_clean += max(0, 1 - y[i] * np.dot(x[i], w))

    # Compute hinge loss for noisy examples (indices greater than or equal to k)
    for i in indices:
        if i >= k:
            hinge_loss_sum_noisy += max(0, 1 - y[i] * np.dot(x[i], w))

    # Regularization term
    regularization_term = lambda_val * np.linalg.norm(w)**2
    d= (1 - 2 * p * eta )

    # Loss term for the noisy part
    loss_noisy = c * np.dot(w, mu)/d + regularization_term

    return (1/n) * (hinge_loss_sum_clean) + (1/(2*n)) * (hinge_loss_sum_noisy) + loss_noisy


In [None]:
import numpy as np

def objective_function_gradient(Sn, k,indices, w, eta, lambda_val, p,mu):

    n = len(Sn)
    d = Sn.shape[1] - 1
    c = -(n - k) / (2 * n)
    y = Sn[:, d]
    x=Sn[:,0:d]

    # Gradient of the empirical risk term using hinge loss
    hinge_loss_grad_clean = np.zeros_like(w).astype(float)
    hinge_loss_grad_noisy = np.zeros_like(w).astype(float)

    # Compute gradient for clean examples
    for i in indices:
        if i < k:
            if 1 - y[i] * np.dot(x[i], w) > 0:
                hinge_loss_grad_clean -= y[i] * x[i]
        else:
            if 1 - y[i] * np.dot(x[i], w) > 0:
                hinge_loss_grad_noisy -= y[i] * x[i]

    # Regularization term gradient
    regularization_term_grad = 2 * lambda_val * w

    # Gradient of the loss term for the noisy part
    d=d= (1 - 2 * p * eta )
    loss_noisy_grad = (c /d)*mu

    # Combine gradients
    gradient = (1 / n) * hinge_loss_grad_clean + (1 / (2 * n)) * hinge_loss_grad_noisy + loss_noisy_grad + regularization_term_grad

    return gradient






In [None]:
def exact_line_search(f, gradient,indices,x,alpha, beta,Sn,k,eta,lambda_val,p,mu, tol=1e-3, max_iter=100):
    a = 0.0
    b = 1.0

    iter_count = 0
    while iter_count < max_iter:
        # Compute function values at points a and b
        gr=gradient(Sn, k,indices, x, eta, lambda_val, p,mu)
        fa = f(Sn, k,indices, x-a*gr, eta, lambda_val, p,mu)
        fb = f(Sn, k, indices,x-b*gr, eta, lambda_val, p,mu)

        # Check if the interval is small enough
        if b - a < tol:
            break

        # Compute the midpoint of the interval
        t = (a + b) / 2

        # Compute function value at the midpoint
        ft = f(Sn, k, indices,x-t*gr, eta, lambda_val, p,mu)

        # Update the interval based on function values
        if ft < fa and ft < fb:
            b = t
        else:
            a = t

        iter_count += 1

    # Return the midpoint of the final interval as the optimal step size
    return (a + b) / 2





In [None]:
def median_of_mean(Sn_bar,g):
  # split the data into g equal parts
  num_parts=len(Sn_bar)//g
  groups=[Sn_bar[i*(num_parts):(i+1)*num_parts,:] for i in range(g)]

  # calcuate the mean of each part and store it in a list
  means = [np.mean(part, axis=0) for part in groups]

  # calculate the differences and store it in a list for each part
  differences=[]
  for i , mean_i in enumerate(means):
    diff=[]
    for j, mean_j in enumerate(means):
      if i!=j:
        diff.append(np.abs(mean_i-mean_j))
    differences.append(diff)

  # store the medians for each group in a list
  median_list=[]
  for arr in differences:
    med_arr=[]
    for ele in arr:
      me=np.median(ele)
      med_arr.append(me)
    med_final=np.median(med_arr)
    median_list.append(med_final)
  for i , median in enumerate(median_list):
    if median==min(median_list):
      gr_oup=i
      break
  u_Sn=means[i]
  return u_Sn





In [None]:
def stochastic_gradient_descent_armijo(func, grad, x0, Sn, k, eta, lambda_val, mu, p, beta, tol, batch_size=50, alpha=0.5,  max_iter=1000):
    iter_count = 0
    x = x0

    while iter_count < max_iter:
        # Randomly select a mini-batch
        indices = np.random.choice(len(Sn), batch_size, replace=False)


        # Compute the gradient at the current point using the mini-batch
        grad_x = grad(Sn, k, indices, x, eta, lambda_val, p, mu)

        # Compute the search direction (negative gradient)
        d = -grad_x

        # Determine the step size using Armijo condition
        t = exact_line_search(func, grad, indices,x, alpha, beta, Sn, k, eta, lambda_val, p, mu, tol=1e-3, max_iter=100)

        # Update the variable using the step size
        x_new = x + t * d
        iter_count += 1

        # Check for convergence
        if np.linalg.norm(x_new - x) < tol:
            break

        x = x_new

    return x


In [None]:
# Loss Decomposition And Centroid Estimation Algorithm
def LDCE_algorithm(Sn_bar,eta,lambda_val,beta,k):
  #calling algorithm 1 to give mucap
  n=len(Sn_bar)
  d=Sn_bar.shape[1]-1
  label=Sn_bar[:,d]
  feature_space=Sn_bar[:,0:d]
  Corr_sample=feature_space[k:]
  corr_label=label[k:]
  g=5
  mu_bar=median_of_mean(Corr_sample,g)
  # Calculating The covariance MAtrix
  cov_matrix=Covariance_matrix(Corr_sample,corr_label)
  t=0
  w0=np.zeros(d)
  # Calculating Mu
  try:
    cov_inverse = np.linalg.inv(cov_matrix)

  except np.linalg.LinAlgError:

    cov_inverse = np.linalg.pinv(cov_matrix)




  w=w0.reshape(d,1)
  epsilon = 1e-5
  denominator = w.T @ cov_inverse @ w
  denominator_safe = np.maximum(denominator, epsilon)
  term = np.sqrt(beta / denominator_safe)
  final_mu = mu_bar + cov_inverse @ w0 * term

  f_u=np.squeeze(final_mu)
  fun=objective_function
  gra=objective_function_gradient
  p=k/(n*(1-eta))
  tol=1e-1
  k1=k
  converged_w=stochastic_gradient_descent_armijo(fun,gra,w0,Sn_bar,k1,eta, lambda_val, f_u, p, beta,tol,batch_size=50, alpha=0.5, max_iter=1000)
  return converged_w



In [None]:
from sklearn.datasets import fetch_openml

# Load USPS handwritten digit dataset
usps = fetch_openml('usps', version=2)

# Extract features and target labels
X = usps.data
y = usps.target

print("Shape of features:", X.shape)
print("Shape of target labels:", y.shape)


  warn(


Shape of features: (9298, 256)
Shape of target labels: (9298,)


In [None]:
import numpy as np
ar1=X.values
ar2=y.values
arr_label = np.concatenate((ar1, ar2.reshape(-1,1)), axis=1)


In [None]:
Positive1=[]
negative1=[]
for i in range(len(X)):
  if arr_label[i,256]=='1':
    Positive1.append(arr_label[i])
  else:
    negative1.append(arr_label[i])

In [None]:
# for neta to be 0.2 the value of k came out to be
k_ups=int(1553-0.2*1553)
# Extracting out Only features
final_ups_array=np.vstack(Positive1+negative1)
final_ups_features=final_ups_array[:,0:final_ups_array.shape[1]-1]

In [None]:
corr_label_list1=[-1]*k_ups +[1]*(len(X)-k_ups)
corr_label_array1=np.array(corr_label_list1).reshape(-1,1)
corr_data_ups=np.hstack((final_ups_features,corr_label_array1))
corr_data_ups = corr_data_ups.astype(int)


In [None]:
corr_data_ups[:,corr_data_ups.shape[1]-1]

array([ 1,  1,  1, ..., -1, -1, -1])

In [None]:
corr_label_list1=[-1]*k_ups +[1]*(len(X)-k_ups)

In [None]:
Y_true_label=np.array(corr_label_list1)

In [None]:
ups_corr=corr_data_ups
eta1=0.2
lambda_vall=0.5
beta1=0.5

weight_ups=LDCE_algorithm(ups_corr,eta1,lambda_vall,beta1,k_ups)

In [None]:
def classify_data_points(X, w):
    # Classify data points based on the sign of the dot product between feature vector and weight vector
    y_pred = np.sign(np.dot(X, w))
    return y_pred

def compute_f1_score(y_true, y_pred):
    # Compute true positives, false positives, and false negatives
    TP = np.sum((y_true == 1) & (y_pred == 1))
    FP = np.sum((y_true == -1) & (y_pred == 1))
    FN = np.sum((y_true == 1) & (y_pred == -1))

    # Compute precision and recall
    precision = TP / (TP + FP) if (TP + FP) > 0 else 0
    recall = TP / (TP + FN) if (TP + FN) > 0 else 0

    # Compute F1 score
    f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    return f1_score

# Example usage
if __name__ == "__main__":
    # Example weight vector
    w = weight_ups  # Example weight vector

    # Example data points
    X = corr_data_ups[:,0:corr_data_ups.shape[1]-1]

    # Example true labels
    y_true = Y_true_label

    # Classify data points
    y_pred = classify_data_points(X, w)

    # Compute F1 score
    f1_score = compute_f1_score(y_true, y_pred)
    print("F1 Score:", f1_score)


F1 Score: 0.9390825688073395


In [None]:
import pandas as pd

# Load the dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/balance-scale/balance-scale.data"
column_names = ["Class", "Left-Weight", "Left-Distance", "Right-Weight", "Right-Distance"]
data = pd.read_csv(url, names=column_names)

# Display the first few rows of the dataset
print(data)


    Class  Left-Weight  Left-Distance  Right-Weight  Right-Distance
0       B            1              1             1               1
1       R            1              1             1               2
2       R            1              1             1               3
3       R            1              1             1               4
4       R            1              1             1               5
..    ...          ...            ...           ...             ...
620     L            5              5             5               1
621     L            5              5             5               2
622     L            5              5             5               3
623     L            5              5             5               4
624     B            5              5             5               5

[625 rows x 5 columns]


In [None]:
balance_data=data.values

In [None]:
balance_data_features=balance_data[:,1:data.shape[1]]

In [None]:
from sklearn.preprocessing import MinMaxScaler



# Create a MinMaxScaler object
scaler = MinMaxScaler()

# Fit the scaler to your data and transform it
normalized_data = scaler.fit_transform(balance_data_features)
normalized_data

array([[0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.25],
       [0.  , 0.  , 0.  , 0.5 ],
       ...,
       [1.  , 1.  , 1.  , 0.5 ],
       [1.  , 1.  , 1.  , 0.75],
       [1.  , 1.  , 1.  , 1.  ]])

In [None]:
normalized_data[1]

array([0.  , 0.  , 0.  , 0.25])

In [None]:
l1=data['Class'].tolist()

In [None]:
Positive2=[]
Negative2=[]


In [None]:
Positive2=[]
Negative2=[]
for i in range(len(l1)):
  if l1[i]=="R":
    Positive2.append(normalized_data[i])
  else:
    Negative2.append(normalized_data[i])


In [None]:
eta=0.2
k_balance=int(288-0.2*288)


#Creating the corrupted version of the dataset

In [None]:
corr_label_bal=[1]*k_balance + [-1]*(len(data)-k_balance)
corr_bal=np.vstack(Positive2 + Negative2)
corr_label_bal1=np.array(corr_label_bal).reshape(-1,1)



In [None]:
corr_data_balance=np.hstack((corr_bal,corr_label_bal1))

In [None]:
bal_corr=corr_data_balance
eta1=0.2
lambda_vall=0.5
beta1=0.5

weight_balance=LDCE_algorithm(bal_corr,eta1,lambda_vall,beta1,k_balance)

In [None]:

def accuracy_data(data,weigh):
  s1=0
  s2=0
  n=len(data)
  for i in range(288):

    f=data[i]
    if np.dot(weigh,f)>=0:
      s1+=1
  for i in range(288,n):

    f=data[i]
    if np.dot(weigh,f)<=0:
      s2+=1
  accur=(s1+s2)/(n)
  return accur

In [None]:
if __name__ == "__main__":
    # Example weight vector
    w1 = weight_balance  # Example weight vector

    # Example data points
    X1 = corr_data_balance[:,0:corr_data_balance.shape[1]-1]

    # Example true labels
    y_true1 = corr_data_balance[:,corr_data_balance.shape[1]-1]

    # Classify data points
    y_pred1 = classify_data_points(X1, w1)

    # Compute F1 score
    f1_score1 = compute_f1_score(y_true1, y_pred1)
    print("F1 Score for balance dataset:", f1_score1)

F1 Score for balance dataset: 0.5386416861826698
