In [211]:
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score
from sklearn.model_selection import LeaveOneOut
import sklearn
from sklearn.linear_model import LinearRegression

In [213]:
file = 'glcm_texture_feature_aug2_cluster8.csv'
# Seed the pseudo randomization
data = np.genfromtxt(file, delimiter=',')[1:]

# Seed the pseudo randomization
np.random.seed(7)

# Shuffle the dataset
#np.random.shuffle(data)

X = data[:,:-3]

y=data[:,-3]
clusters = data[:,-1]
data_label = data[:,-2]

In [214]:
X

array([[4.16311518e-01, 1.67665368e-01, 6.19384374e+00, 4.71693242e-01,
        9.51033941e+01, 3.86721568e-01, 3.69006307e-01, 4.25327936e+00,
        4.72745427e-01, 4.77439351e+01, 3.30560122e-02, 7.77017057e-01,
        7.72750926e+00, 1.81124620e-01, 1.44614576e+02],
       [3.91213879e-01, 1.11009408e-01, 6.67269411e+00, 4.42667972e-01,
        1.07659862e+02, 4.40535943e-01, 3.31949054e-01, 3.49684133e+00,
        5.22690378e-01, 3.53852754e+01, 3.58731938e-02, 7.93548694e-01,
        7.33986784e+00, 1.82822774e-01, 1.20951331e+02],
       [4.20729278e-01, 1.17172741e-01, 5.86424468e+00, 4.74182859e-01,
        8.89660992e+01, 4.53521837e-01, 2.96440524e-01, 3.09517605e+00,
        5.42205314e-01, 2.86626146e+01, 3.94059213e-02, 8.57673004e-01,
        5.54578107e+00, 2.18410660e-01, 6.94188670e+01],
       [4.00602129e-01, 1.56332802e-01, 4.74884529e+00, 4.69574559e-01,
        5.46779289e+01, 4.51209378e-01, 3.74894607e-01, 3.16005761e+00,
        5.39136518e-01, 2.90918606e+0

In [215]:
y

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0.])

In [216]:
# Separate train and test
sep_idx = round(2/3* X.shape[0])

X_train = X[:sep_idx]
X_test = X[sep_idx:]

y_train = y[:sep_idx]
y_test = y[sep_idx:]

#print(X_train, X_test, y_train, y_test)#

# Standardize train and test data using training data
X_train_std = np.std(X_train, ddof = 0, axis=0)  # Mean and std on axis=0
X_train_mean = np.mean(X_train, axis=0)
X_train = (X_train - X_train_mean) / X_train_std
X_test = (X_test - X_train_mean) / X_train_std

### Logistic Regression on GLCM data after standardization

#### 0.66/0.33 split training/testing data

In [217]:
clf = LogisticRegression(random_state=0).fit(X_train, y_train)

In [218]:
y_pred = clf.predict(X_test)

In [219]:
print(y_pred)
print(y_test)
accuracy_score(y_test, y_pred)

[0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0.]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0.]


0.23529411764705882

In [220]:
f1_score(y_test, y_pred, average='binary')

0.23529411764705882

#### LOOCV

In [221]:
# Standardizing X
X_std = np.std(X, ddof = 0, axis=0)  # Mean and std on axis=0
X_mean = np.mean(X, axis=0)
X = (X - X_mean) / X_std
# LOOCV
loo = LeaveOneOut()
loo.get_n_splits(X)
i=1
cum_acc = 0
for train_index, test_index in loo.split(X):
    #print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    #print(X_train, X_test, y_train, y_test)
    clf = LogisticRegression(random_state=0).fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    cum_acc += accuracy_score(y_test, y_pred)
    i+=1

print("LOOCV accuracy: %f"%(cum_acc/i))

LOOCV accuracy: 0.549020


### Find weights of features per instance using Weighted Linear Regression (weights are calculated using cosine similarity)

In [222]:
weight_arr = []
for i in range(0,X.shape[0]):
    print(X[i])
    print(clusters[i])
    X_sample = X[clusters==clusters[i]]
    y_sample = y[clusters==clusters[i]]
    kernel_width = 0.25
    distances = sklearn.metrics.pairwise_distances(np.expand_dims(X[i], axis=0), X_sample, metric='cosine')
    weights = np.sqrt(np.exp(-(distances**2)/kernel_width**2)) #Kernel function
    reg = LinearRegression().fit(X_sample, y_sample, weights.squeeze())
    weight_arr.append(reg.coef_)

[ 0.22709379 -0.09169056  0.40060902  0.1060311   0.32924506 -0.4201625
  0.04364137  1.45547691 -0.73801334  1.51601517 -1.16922074 -0.30008345
  1.3528297  -1.04469376  1.41587901]
3.0
[-5.21364861e-01 -1.01671532e+00  8.45895456e-01 -7.32119927e-01
  7.59648815e-01  1.01244857e+00 -5.75502551e-01 -1.46915243e-01
  7.06519111e-01 -2.88432194e-04 -5.10917223e-01  7.96082980e-03
  8.08228771e-01 -9.49544387e-01  4.31937240e-01]
1.0
[ 0.35883969 -0.91608627  0.09411247  0.17792276  0.11887421  1.35815053
 -1.16877078 -0.99777847  1.27093968 -0.82510237  0.31459116  1.20282904
 -1.71229993  1.04448285 -1.71083496]
7.0
[-0.24138963 -0.27671794 -0.94310552  0.04485074 -1.05643699  1.29658976
  0.14202171 -0.86033734  1.18218245 -0.7724375   0.36531114  0.14332673
 -0.88554962  0.99893465 -0.61246644]
2.0
[-1.19708281 -0.34881136  1.36464114 -1.31203372  1.351884   -1.13373378
 -0.37303465  2.01813931 -1.39311699  2.07123953 -1.39424908 -0.09733227
  1.580821   -1.84800606  1.24975795]
3.0


In [223]:
np.array(weight_arr)

array([[-0.05313536, -0.43473392, -0.20840572, -0.00743268, -0.02833411,
        -0.12406313, -0.00860591, -0.17027008, -0.0935675 , -0.19157228,
         0.04905788, -0.08791866, -0.57917438, -0.35802109,  0.27714083],
       [-0.05189695,  0.379825  ,  0.18965523,  0.42577486,  0.41696761,
        -0.03129675, -0.09639582,  0.12075948,  0.33405441,  0.40798327,
        -0.2180343 ,  0.07434648, -0.49487909,  0.97065872,  0.29612557],
       [ 0.20115621, -0.04922264,  0.03267155,  0.20221861, -0.11225222,
        -0.30553163,  0.39652156, -0.12250731, -0.14509387, -0.11866817,
        -0.41899678,  0.39728612, -0.05431928,  0.11136886,  0.05982118],
       [-0.37774392,  0.60481729,  0.03202853,  0.29210187,  0.16162067,
         0.77432104, -0.38216402,  0.73939534, -0.2741613 , -0.02095029,
         0.38143118,  0.7154947 , -1.80333851, -1.3744227 , -0.05965038],
       [-0.10405114, -0.45903298, -0.25775222,  0.04478505,  0.01704786,
        -0.17529442, -0.03965955, -0.16721444, 

##### Saving weights for each instance into a csv file

In [225]:
np.savetxt("GLCM_local_weights.csv", weight_arr, delimiter=",")

##### Validating the weights

In [148]:
i=9
print(X[i])

print(clusters[i])
X_sample = X[clusters==clusters[i]]
y_sample = y[clusters==clusters[i]]

print(X_sample)

kernel_width = 0.25
distances = sklearn.metrics.pairwise_distances(np.expand_dims(X[i], axis=0), X_sample, metric='cosine')
weights = np.sqrt(np.exp(-(distances**2)/kernel_width**2)) #Kernel function
reg = LinearRegression().fit(X_sample, y_sample, weights.squeeze())
print(weights)
print(reg.score(X_sample, y_sample))
reg.coef_

[-0.92994591 -1.23217027  2.30898118 -1.27339401  2.73607566 -0.24027392
 -1.85859217  0.54755703 -0.4634835   0.46950683 -0.232552   -1.28971847
  0.27825715 -0.98340731 -0.25285191]
2.0
[[-0.6108289  -1.03138079  0.92042826 -0.83850954  0.80703122  0.98162774
  -0.57779529 -0.10688187  0.67101761  0.03172517 -0.47988393 -0.03945067
   0.81640327 -0.94915792  0.43275803]
 [-1.32185007 -0.36863425  1.45158304 -1.45118641  1.40345216 -1.16754572
  -0.37836437  2.05624566 -1.44142137  2.08970929 -1.35303084 -0.1456083
   1.58557169 -1.832383    1.25170568]
 [-2.72067607 -1.1207703   2.02184031 -2.79507813  1.59398902 -1.47392627
  -0.70210258  1.01121448 -1.59099998  0.55674763  0.8465811  -1.43143746
  -0.540352   -0.28774145 -1.02824385]
 [-0.93087507 -1.12582797  1.39418353 -1.21967866  1.30442612  0.1341445
  -0.96260566  0.68984842 -0.16875356  0.8078971  -0.84317701 -0.30749986
   0.81868423 -1.31755778  0.38017199]
 [-0.92994591 -1.23217027  2.30898118 -1.27339401  2.73607566 -0.2

array([-0.88906045,  0.57704621, -0.76863266,  0.92081257,  0.28006913,
       -0.37571594,  0.06487817,  0.07049596,  0.76191599,  0.33869209,
       -1.25279376, -0.64824823, -1.12833154,  0.68085599,  0.04507452])

In [149]:
y_sample

array([0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1.])

In [150]:
reg = LinearRegression().fit(X_sample, y_sample, weights.squeeze())

In [151]:
reg.coef_

array([-0.88906045,  0.57704621, -0.76863266,  0.92081257,  0.28006913,
       -0.37571594,  0.06487817,  0.07049596,  0.76191599,  0.33869209,
       -1.25279376, -0.64824823, -1.12833154,  0.68085599,  0.04507452])