In [0]:
import numpy as np
from sklearn.linear_model import LogisticRegression as logreg
from sklearn.model_selection import train_test_split
import sklearn as sk
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score
import time
import statistics

# Unbiased Estimator

### MNIST Dataset

In [0]:
dataset = np.load('mnist_dataset.npz')

In [0]:
def loss_func_modif(S_train, rh0 = 0.2, rh1= 0.4):
  alpha = float(1 - rh1 + rh0) / 2
  sample_weight = np.ones(np.shape(S_train)) * (1 - alpha)
  sample_weight[S_train==0] = alpha
  sample_weight = np.ravel(sample_weight)
  return sample_weight

In [0]:
def Prepare_datset(dataset):
    Xtr = dataset['Xtr']
    Str = dataset['Str']
    Xts = dataset['Xts']
    Yts = dataset['Yts']
    X_train= np.asarray(Xtr/255)
    S_train = np.asarray(Str)
    X_test= np.asarray(Xts/255)
    Y_test = np.asarray(Yts)
    S_train = np.ravel(S_train)
    Y_test = np.ravel(Y_test)
    return X_train, X_test, S_train, Y_test

In [0]:
def kfold_Model_fit(X_train, S_train, X_test, Y_test, clf, sample_weight_indic = False, kfold = 10):
  start_time= time.time()
  accuracy_val = np.zeros(kfold)
  accuracy_test = np.zeros(kfold)
  S_train = np.ravel(S_train)
  for i in range(kfold):
    X_t, X_v, S_t, S_v= train_test_split(X_train, S_train, test_size = 0.2)
    if(sample_weight_indic == True):
      sample_weight = loss_func_modif(S_t)
    else:
      sample_weight = None
    clf.fit(X_t, S_t, sample_weight = sample_weight)
    S_pred_val = clf.predict(X_v)
    accuracy_val[i] = accuracy_score(S_v, S_pred_val)
    Y_pred_test = clf.predict(X_test)
    accuracy_test[i] = accuracy_score(Y_test, Y_pred_test)
    i =i+ 1
  print("Running time: "+ str(int(time.time()-start_time))+" seconds")
  return accuracy_val, accuracy_test

### Baseline

In [0]:
clf=logreg()
X_train, X_test, S_train, Y_test = Prepare_datset(dataset)
accuracy_val, accuracy_test = kfold_Model_fit(X_train, S_train, X_test, Y_test, clf, False, 10)
print(np.mean(accuracy_val), np.mean(accuracy_test), statistics.stdev(accuracy_test))

Running time: 59 seconds
0.6521 0.7918000000000001 0.005287301349039572


### Using weighted loss function

In [0]:
clf=logreg()
X_train, X_test, S_train, Y_test = Prepare_datset(dataset)
accuracy_val, accuracy_test = kfold_Model_fit(X_train, S_train, X_test, Y_test, clf, True, 10)
print(np.mean(accuracy_val), np.mean(accuracy_test), statistics.stdev(accuracy_test))

Running time: 51 seconds
0.6607999999999999 0.8741 0.0057242175593408854


### CIFAR Dataset

In [0]:
dataset = np.load('cifar_dataset.npz')

### Baseline

In [0]:
clf=logreg()
X_train, X_test, S_train, Y_test = Prepare_datset(dataset)
accuracy_val, accuracy_test = kfold_Model_fit(X_train, S_train, X_test, Y_test, clf, False, 10)
print(np.mean(accuracy_val), np.mean(accuracy_test), statistics.stdev(accuracy_test))

Running time: 892 seconds
0.5974999999999999 0.6664999999999999 0.006123724356957949


### Using weighted loss function

In [0]:
clf=logreg()
X_train, X_test, S_train, Y_test = Prepare_datset(dataset)
accuracy_val, accuracy_test = kfold_Model_fit(X_train, S_train, X_test, Y_test, clf, True, 10)
print(np.mean(accuracy_val), np.mean(accuracy_test), statistics.stdev(accuracy_test))

Running time: 698 seconds
0.5936999999999999 0.7048 0.005991660871578087
