In [170]:
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score
from sklearn.model_selection import LeaveOneOut
import sklearn
from sklearn.linear_model import LinearRegression

In [171]:
file = 'glcm_with_clusters.csv'
# Seed the pseudo randomization
data = np.genfromtxt(file, delimiter=',')[1:]

# Seed the pseudo randomization
np.random.seed(7)

# Shuffle the dataset
#np.random.shuffle(data)

X = data[:,:-3]

y=data[:,-3]
clusters = data[:,-1]
data_label = data[:,-2]

In [172]:
X

array([[4.16311518e-01, 1.67665368e-01, 6.19384374e+00, 4.71693242e-01,
        9.51033941e+01, 3.86721568e-01, 3.69006307e-01, 4.25327936e+00,
        4.72745427e-01, 4.77439351e+01, 3.30560122e-02, 7.77017057e-01,
        7.72750926e+00, 1.81124620e-01, 1.44614576e+02],
       [3.91213879e-01, 1.11009408e-01, 6.67269411e+00, 4.42667972e-01,
        1.07659862e+02, 4.40535943e-01, 3.31949054e-01, 3.49684133e+00,
        5.22690378e-01, 3.53852754e+01, 3.58731938e-02, 7.93548694e-01,
        7.33986784e+00, 1.82822774e-01, 1.20951331e+02],
       [4.20729278e-01, 1.17172741e-01, 5.86424468e+00, 4.74182859e-01,
        8.89660992e+01, 4.53521837e-01, 2.96440524e-01, 3.09517605e+00,
        5.42205314e-01, 2.86626146e+01, 3.94059213e-02, 8.57673004e-01,
        5.54578107e+00, 2.18410660e-01, 6.94188670e+01],
       [4.00602129e-01, 1.56332802e-01, 4.74884529e+00, 4.69574559e-01,
        5.46779289e+01, 4.51209378e-01, 3.74894607e-01, 3.16005761e+00,
        5.39136518e-01, 2.90918606e+0

In [173]:
y

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [174]:
# Separate train and test
sep_idx = round(2/3* X.shape[0])

X_train = X[:sep_idx]
X_test = X[sep_idx:]

y_train = y[:sep_idx]
y_test = y[sep_idx:]

#print(X_train, X_test, y_train, y_test)#

# Standardize train and test data using training data
X_train_std = np.std(X_train, ddof = 0, axis=0)  # Mean and std on axis=0
X_train_mean = np.mean(X_train, axis=0)
X_train = (X_train - X_train_mean) / X_train_std
X_test = (X_test - X_train_mean) / X_train_std

### Logistic Regression on GLCM data after standardization

#### 0.66/0.33 split training/testing data

In [175]:
clf = LogisticRegression(random_state=0).fit(X_train, y_train)

In [176]:
y_pred = clf.predict(X_test)

In [177]:
print(y_pred)
print(y_test)
accuracy_score(y_test, y_pred)

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]


0.0

In [178]:
f1_score(y_test, y_pred, average='binary')

0.0

#### LOOCV

In [179]:
# Standardizing X
X_std = np.std(X, ddof = 0, axis=0)  # Mean and std on axis=0
X_mean = np.mean(X, axis=0)
X = (X - X_mean) / X_std
# LOOCV
loo = LeaveOneOut()
loo.get_n_splits(X)
i=1
cum_acc = 0
for train_index, test_index in loo.split(X):
    #print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    #print(X_train, X_test, y_train, y_test)
    clf = LogisticRegression(random_state=0).fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    cum_acc += accuracy_score(y_test, y_pred)
    i+=1

print("LOOCV accuracy: %f"%(cum_acc/i))

LOOCV accuracy: 0.530612


### Find weights of features per instance using Weighted Linear Regression (weights are calculated using cosine similarity)

In [183]:
weight_arr = []
for i in range(0,X.shape[0]):
    print(X[i])
    print(clusters[i])
    X_sample = X[clusters==clusters[i]]
    y_sample = y[clusters==clusters[i]]
    kernel_width = 0.25
    distances = sklearn.metrics.pairwise_distances(np.expand_dims(X[i], axis=0), X_sample, metric='cosine')
    weights = np.sqrt(np.exp(-(distances**2)/kernel_width**2)) #Kernel function
    reg = LinearRegression().fit(X_sample, y_sample, weights.squeeze())
    weight_arr.append(reg.coef_)

[ 0.17673336 -0.11349888  0.46448997  0.04699404  0.37358548 -0.45297995
  0.03206158  1.49408406 -0.78232311  1.53811503 -1.13059712 -0.35002421
  1.35859076 -1.0426937   1.41805565]
0.0
[-0.6108289  -1.03138079  0.92042826 -0.83850954  0.80703122  0.98162774
 -0.57779529 -0.10688187  0.67101761  0.03172517 -0.47988393 -0.03945067
  0.81640327 -0.94915792  0.43275803]
2.0
[ 0.3153624  -0.93152878  0.15066161  0.12294733  0.16172779  1.3278115
 -1.1621646  -0.95698779  1.23887983 -0.78769606  0.33610622  1.16522822
 -1.69295549  1.01105356 -1.71296685]
1.0
[-0.31622622 -0.29709751 -0.91136804 -0.01764279 -1.0218902   1.26616494
  0.12896623 -0.81966898  1.14958138 -0.73537552  0.38624138  0.09702667
 -0.869869    0.9662778  -0.6130848 ]
1.0
[-1.32185007 -0.36863425  1.45158304 -1.45118641  1.40345216 -1.16754572
 -0.37836437  2.05624566 -1.44142137  2.08970929 -1.35303084 -0.1456083
  1.58557169 -1.832383    1.25170568]
0.0
[-0.54780773  0.37385104 -0.3332001  -0.34607082 -0.46549121 -

In [184]:
np.array(weight_arr)

array([[ 7.09963496e-02, -6.52207114e-01, -1.69463561e-01,
        -5.78595060e-02,  6.01185264e-02, -3.26233892e-02,
         1.29912014e-01, -1.21947164e-01, -2.52868866e-01,
        -2.52496839e-01,  1.11810951e-01, -4.11361983e-03,
        -4.82965551e-01, -2.75579514e-01,  3.15260689e-01],
       [-1.60531249e-01,  6.92871674e-01, -1.52004382e-01,
         2.24123832e-01, -2.02939128e-01,  1.31488593e-01,
        -1.03078678e-01,  1.67368997e-01,  6.75959285e-01,
         6.79748522e-01, -1.05523899e+00, -5.48813055e-01,
        -1.15994443e+00,  7.83665835e-01, -2.24271700e-01],
       [ 9.91693078e-01, -1.54840643e-01, -1.25103997e+01,
        -1.95012557e+00,  1.37814142e+01,  7.64497338e+00,
         9.66744073e-01, -2.41758689e+00, -1.05250176e+01,
        -2.43071978e+00, -1.26924407e+00, -1.13502968e+00,
        -7.39387354e-01, -2.20706537e-01,  1.51362099e+00],
       [ 1.38963669e+00,  1.48739361e-01, -9.16119091e+00,
        -1.98378804e+00,  1.04444459e+01,  4.60017006

##### Saving weights for each instance into a csv file

In [185]:
np.savetxt("GLCM_local_weights.csv", weight_arr, delimiter=",")

##### Validating the weights

In [148]:
i=9
print(X[i])

print(clusters[i])
X_sample = X[clusters==clusters[i]]
y_sample = y[clusters==clusters[i]]

print(X_sample)

kernel_width = 0.25
distances = sklearn.metrics.pairwise_distances(np.expand_dims(X[i], axis=0), X_sample, metric='cosine')
weights = np.sqrt(np.exp(-(distances**2)/kernel_width**2)) #Kernel function
reg = LinearRegression().fit(X_sample, y_sample, weights.squeeze())
print(weights)
print(reg.score(X_sample, y_sample))
reg.coef_

[-0.92994591 -1.23217027  2.30898118 -1.27339401  2.73607566 -0.24027392
 -1.85859217  0.54755703 -0.4634835   0.46950683 -0.232552   -1.28971847
  0.27825715 -0.98340731 -0.25285191]
2.0
[[-0.6108289  -1.03138079  0.92042826 -0.83850954  0.80703122  0.98162774
  -0.57779529 -0.10688187  0.67101761  0.03172517 -0.47988393 -0.03945067
   0.81640327 -0.94915792  0.43275803]
 [-1.32185007 -0.36863425  1.45158304 -1.45118641  1.40345216 -1.16754572
  -0.37836437  2.05624566 -1.44142137  2.08970929 -1.35303084 -0.1456083
   1.58557169 -1.832383    1.25170568]
 [-2.72067607 -1.1207703   2.02184031 -2.79507813  1.59398902 -1.47392627
  -0.70210258  1.01121448 -1.59099998  0.55674763  0.8465811  -1.43143746
  -0.540352   -0.28774145 -1.02824385]
 [-0.93087507 -1.12582797  1.39418353 -1.21967866  1.30442612  0.1341445
  -0.96260566  0.68984842 -0.16875356  0.8078971  -0.84317701 -0.30749986
   0.81868423 -1.31755778  0.38017199]
 [-0.92994591 -1.23217027  2.30898118 -1.27339401  2.73607566 -0.2

array([-0.88906045,  0.57704621, -0.76863266,  0.92081257,  0.28006913,
       -0.37571594,  0.06487817,  0.07049596,  0.76191599,  0.33869209,
       -1.25279376, -0.64824823, -1.12833154,  0.68085599,  0.04507452])

In [149]:
y_sample

array([0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1.])

In [150]:
reg = LinearRegression().fit(X_sample, y_sample, weights.squeeze())

In [151]:
reg.coef_

array([-0.88906045,  0.57704621, -0.76863266,  0.92081257,  0.28006913,
       -0.37571594,  0.06487817,  0.07049596,  0.76191599,  0.33869209,
       -1.25279376, -0.64824823, -1.12833154,  0.68085599,  0.04507452])