In [1]:
import numpy as np
import scipy.signal as signal
from sklearn.cluster import AffinityPropagation
from sklearn import metrics

In [2]:
def fix(array):
    n = np.array([])
    for x in array:
        n = np.append(n, np.zeros((x-1)/100.0))
        n = np.append(n, [1])
    return n

def printClusterMetrics(clusterModel, observations):
    clusters = clusterModel.fit(observations)
    centers = clusters.cluster_centers_indices_
    labels = clusters.labels_
    if centers is None or len(centers) < 1:
        print("Clustering algorithm didn't converge.")
    else:        
        silhouetteMetric = metrics.silhouette_score(observations, labels, metric='euclidean')
        print(\
            'There are {} obervations and {} clusters, with a confidence of {}.\nThe labels were: {}'.format(observations.shape[0], len(centers), silhouetteMetric, labels))

In [6]:
h_peak

array([  6,  13,  40,  46,  53,  69,  75,  78,  81,  90, 104, 112, 119, 122])

In [17]:
samples = None
files = ['data/alice1.txt', 'data/eve1.txt', 'data/bob1.txt', 'data/bob2.txt', 'data/alice2.txt']

for index, filename in enumerate(files):
    intervals = np.loadtxt(filename)
    intervals = intervals[intervals < 2000][0:1000] 
    frequencySpectrum, amplitude = signal.welch(fix(intervals), 5, scaling='spectrum')
    if samples is None:
        samples = amplitude[np.newaxis]
    else:
        samples = np.concatenate((samples, amplitude[np.newaxis]), axis=0)
    print (\
        '{}) {}. Both the spectrum: {} and amplitude: {} should had have the same length. The currnet # of samples is: {}.\n Amplitude (1st five): {}'.\
        format(str(index + 1), filename, len(frequencySpectrum), len(amplitude), samples.shape[0], amplitude[0:5]))

printClusterMetrics(AffinityPropagation(max_iter=5000), samples)






1) data/alice1.txt. Both the spectrum: 129 and amplitude: 129 should had have the same length. The currnet # of samples is: 1.
 Amplitude (1st five): [  8.31841922e-06   4.62329127e-05   4.05553263e-05   2.75373388e-05
   1.42692978e-05]
2) data/eve1.txt. Both the spectrum: 129 and amplitude: 129 should had have the same length. The currnet # of samples is: 2.
 Amplitude (1st five): [  1.35029040e-05   5.44252765e-05   1.19388351e-04   5.80933853e-05
   2.16519506e-05]
3) data/bob1.txt. Both the spectrum: 129 and amplitude: 129 should had have the same length. The currnet # of samples is: 3.
 Amplitude (1st five): [  9.33411383e-06   4.05932607e-05   3.82830492e-05   1.59216068e-05
   8.75379624e-06]
4) data/bob2.txt. Both the spectrum: 129 and amplitude: 129 should had have the same length. The currnet # of samples is: 4.
 Amplitude (1st five): [  7.15253543e-06   3.24039377e-05   3.63528577e-05   2.25078209e-05
   1.16273779e-05]
5) data/alice2.txt. Both the spectrum: 129 and amplitu



### Experimenting with clustering on heart beat intervals

FFT really help. When we try to cluster on just the raw beat intervals the cluster cannot detect there are three individuals. It's good that it at least clusters the common ones.

With FFT we cluster the like onces and can discrimiate the odd person out.

In [16]:
samples = None
files = ['data/alice1.txt', 'data/eve1.txt', 'data/bob1.txt', 'data/bob2.txt', 'data/alice2.txt']
message = "{}) # features (expect 1000): {}. Current # of samples is: {}.\n Intervals (1st five): "\
        ""
            
for index, filename in enumerate(files):
    intervals = np.loadtxt(filename)
    intervals = intervals[intervals < 2000][0:1000] 
    #frequencySpectrum, amplitude = signal.welch(fix(intervals), 5, scaling='spectrum')
    if samples is None:
        samples = intervals[np.newaxis]
    else:
        samples = np.concatenate((samples, intervals[np.newaxis]), axis=0)
    print (message.format(str(index), intervals.size, samples.shape[0], intervals[0:5],\
            np.average(intervals), np.max(intervals), np.min(intervals), np.std(intervals)))

printClusterMetrics(AffinityPropagation(max_iter=5000), samples)

0) # features (expect 1000): 1000. Current # of samples is: 1.
 Intervals (1st five): 
1) # features (expect 1000): 1000. Current # of samples is: 2.
 Intervals (1st five): 
2) # features (expect 1000): 1000. Current # of samples is: 3.
 Intervals (1st five): 
3) # features (expect 1000): 1000. Current # of samples is: 4.
 Intervals (1st five): 
4) # features (expect 1000): 1000. Current # of samples is: 5.
 Intervals (1st five): 
There are 5 obervations and 2 clusters, with a confidence of 0.237648018209.
The labels were: [1 1 0 0 1]


In [15]:
intervals = np.loadtxt('data/alice1.txt')[0:1000]
print('The type is {} and the size is {}.'.format(type(intervals), intervals.size))
print(intervals[0:5])


The type is <type 'numpy.ndarray'> and the size is 1000.
[ 784.  794.  838.  794.  778.]
