In [98]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import sys
import os
import glob
import random
from __future__ import division
import pandas

import fnmatch

from scipy.io import loadmat

import sklearn
import h5py

## Read Retina Data

In [92]:
dirFeatures = 'SLICFeatures/*.mat'

fileFeatures = sorted(glob.glob(os.path.expanduser(dirFeatures)))

N = 83*2


fileFeaturesDis = fileFeatures[:83]
fileFeaturesHeal = fileFeatures[83:]


# 83 Diseased Samples
# 223 Healthy Samples
# -> We sample a subset of randomly chosen 83 healthy samples

random.seed(1)
fileFeatures = np.concatenate([random.sample(fileFeaturesHeal, 83), fileFeaturesDis])
random.shuffle(fileFeatures)


labels = np.zeros((N), dtype=np.int8)

# label 1 for diabetes positiv sample
# label 0 for diabetes negative sample
for f, file in enumerate(fileFeatures):
    if (fnmatch.fnmatch(file, '*disease*')):
        labels[f] = 1
        
        
        

(166,)

In [70]:
l = [1,2,3]
test = random.shuffle(l)

print l

[3, 2, 1]


In [202]:
data = np.array(pandas.read_csv('clean1.data'))


features = data[:, 3:-1]
label = data[:, -1]

print features.shape

features /= np.max(features, axis=0)


bags = []
bagsLabels = []

tempList = [features[0]]

for i in range(data.shape[0]-1):
    if (data[i,0] == data[i+1,0]):
        tempList.append(features[i+1])
    else:
        bags.append(np.array(tempList).reshape((len(tempList), features.shape[1])))
        bagsLabels.append(label[i])
        tempList = []
        tempList.append(features[i+1])
        
bags.append(np.array(tempList).reshape((len(tempList), features.shape[1])))        
bagsLabels.append(label[data.shape[0]-1])




(475, 165)


## Build Kernel

In [224]:
def gaussianKernel(x1, x2, sigma=1.):
    diff = x1-x2
    return np.exp(-np.dot(diff, diff)/(2.*sigma**2))

def gaussianDist(x1, x2, sigma=1.):
    return 1-gaussianKernel(x1,x2,sigma)

# thresholded affinity matrix within the bag
def calculateActiveEdgeMatrix(bag, sigma, delta):
    '''
    Funktion to calculate all the inner bag edges.
    
    :param bag: feature matrix, dimension n x D 
    :param delta: thresholding distances to determine edges
    
    :return: 
    
    '''
    
    n = bag.shape[0]
    
    distMat = np.zeros((n, n), dtype=np.float32)
    
    for i in range(n):
        for j in range(i, n):
            dist = gaussianDist(bag[i], bag[j], sigma=sigma)
            distMat[i, j] = dist
    distMat = distMat + distMat.transpose()
        
    distMat = (distMat < delta).astype(np.int8)
    
    return 1./(np.sum(distMat, axis=1)-1)
   
    
calculateActiveEdgeMatrix(bags[1], 10., 0.25)


array([ 0.33333333,  0.33333333,  0.33333333,  0.33333333])

In [228]:
def calculate_miKernel(bags, sigma, delta):
    
    kernel = np.zeros((len(bags), len(bags)))
    
    for i in range(len(bags)):
        for j in range(i, len(bags)):
            
            bag1 = bags[i]
            bag2 = bags[j]
            
            n1 = bag1.shape[0]
            n2 = bag2.shape[0]    
            
            activeEdges1 = calculateActiveEdgeMatrix(bag1, sigma, delta)
            activeEdges2 = calculateActiveEdgeMatrix(bag2, sigma, delta)
            
            result = 0
            
            for a in range(n1):
                for b in range(n2):
                    
                    result += activeEdges1[a]*activeEdges2[b] * gaussianKernel(bag1[a, :], bag2[b, :])
                    
            result /= np.sum(activeEdges1)
            result /= np.sum(activeEdges2)
            
            print kernel
            
            kernel[i, j] = result
            
    return kernel

calculate_miKernel(bags, 10., 0.25)

[[ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 ..., 
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]]
[[ 0.33341153  0.          0.         ...,  0.          0.          0.        ]
 [ 0.          0.          0.         ...,  0.          0.          0.        ]
 [ 0.          0.          0.         ...,  0.          0.          0.        ]
 ..., 
 [ 0.          0.          0.         ...,  0.          0.          0.        ]
 [ 0.          0.          0.         ...,  0.          0.          0.        ]
 [ 0.          0.          0.         ...,  0.          0.          0.        ]]
[[ 0.33341153  0.00892781  0.         ...,  0.          0.          0.        ]
 [ 0.          0.          0.         ...,  0.          0.          0.        ]
 [ 0.          0.          0.         ...,  0.          0.          0.        ]
 ..., 
 [ 0.          0.          0.         ...,  0.          0.       

array([[  3.33411531e-01,   8.92780812e-03,   4.52712805e-07, ...,
          1.70099954e-28,   3.11467378e-31,              nan],
       [  0.00000000e+00,   2.50074426e-01,   1.03494084e-04, ...,
          8.05861058e-27,   5.54071868e-32,              nan],
       [  0.00000000e+00,   0.00000000e+00,   5.00000001e-01, ...,
          3.02073141e-19,   3.02891496e-30,              nan],
       ..., 
       [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00, ...,
          2.50799753e-01,   3.76189422e-45,              nan],
       [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00, ...,
          0.00000000e+00,   3.33719516e-01,              nan],
       [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00, ...,
          0.00000000e+00,   0.00000000e+00,              nan]])

In [201]:
A = np.array([[1,2], [3,4]], dtype=np.float32)


A /= np.max(A, axis=0)

A

array([[ 0.33333334,  0.5       ],
       [ 1.        ,  1.        ]], dtype=float32)