In [1]:
import numpy as np
import os
from sklearn.cluster import KMeans, MiniBatchKMeans
import csv
import time

In [2]:
channels = ["FP1-F7", "F7-T7", "T7-P7", "P7-O1", "FP1-F3", "F3-C3", 
            "C3-P3", "P3-O1", "FP2-F4", "F4-C4", "C4-P4", "P4-O2", 
            "FP2-F8", "F8-T8", "P8-O2", "FZ-CZ", "CZ-PZ"]

number_of_channels = len(channels)

In [3]:
number_of_clusters = [2, 3, 4, 5, 6, 7, 10, 15, 30, 50, 100]
classes_duration = 60 * 60
length = 60
interval = 300 * 60
number_of_classes = interval // classes_duration

In [4]:
classes = ['1', '5']
classes_to_classify = [['1', '5'],
                       ['1', '2', '3', '4', '5']]

In [5]:
def clustring(B, cluster):
    kmeans = MiniBatchKMeans(n_clusters=cluster, random_state=0).fit(B)
    centeroids = kmeans.cluster_centers_
    
    return centeroids

In [6]:
def distance(a, b):
    
    x1, x2 = a[0], b[0]
    y1, y2 = a[1], b[1]
    X, Y = x2-x1, y2-y1
    
    return np.sqrt(X**2 + Y**2)

## Make Directories

In [7]:
for ctc in classes_to_classify:
    os.makedirs(f"Data/Interval-300-classes-length-{classes_duration//60}/Features/classification-{'-'.join(ctc)}")
    for cluster in number_of_clusters:
        os.makedirs(f"Data/Interval-300-classes-length-{classes_duration//60}/Features/classification-{'-'.join(ctc)}/{cluster}-clusters")
        for i in ctc:
            os.makedirs(f"Data/Interval-300-classes-length-{classes_duration//60}/Features/classification-{'-'.join(ctc)}/{cluster}-clusters/class-{i}")

## Needed Paths

In [7]:
datapoints_after_pca_path = f"Data/Interval-300-classes-length-{classes_duration//60}/data-points-after-pca"
combined_channels_data_path = f"Data/Interval-300-classes-length-{classes_duration//60}/combined-channels-data"
cluster_centroids_path = f"Data/Interval-300-classes-length-{classes_duration//60}/cluster_centroids"

# Clustering And Feature Extraction

## Phase 1 (Performing clustering on each of the classes) : 

In [8]:
number_of_clusters[7:]

[15, 30, 50, 100]

In [9]:
# a = np.load(f"{combined_channels_data_path}/class-{1}/channel_{0}.npy")

In [10]:
# for i in range(number_of_channels):
#     channel_data = np.load(f"{combined_channels_data_path}/class-{1}/channel_{i}.npy")
#     d = clustring(channel_data, 50)
#     del channel_data
#     print('yes')

In [9]:
classes = ['1', '2', '3', '4', '5']

In [10]:
for cluster in number_of_clusters[7:]:    
    for _class in classes[1:-1]:
        centeroids = []
        t1 = time.time()

        for i in range(number_of_channels):
            channel_data = np.load(f"{combined_channels_data_path}/class-{_class}/channel_{i}.npy")

            centeroids.append(clustring(channel_data, cluster))
            del channel_data
            print('yes')

        np.save(f"{cluster_centroids_path}/class-{_class}/{cluster}_clusters_centeroids.npy", centeroids)
        t2 = time.time()
        print(t2 - t1)

yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
647.7032568454742
yes
yes
yes
yes
yes
yes
yes
yes
yes


KeyboardInterrupt: 

## Phase 2 (Declaring number of datapoints in each cluster) :

In [9]:
classes_to_classify = [['1', '5']]
for ctc in classes_to_classify:
    print(ctc[0:1])


['1']


In [10]:
a = [1, 3]
a[1:]

[3]

In [None]:
for ctc in classes_to_classify:
    features_path = f"Data/Interval-300-classes-length-{length}/Features/classification-{'-'.join(ctc)}"
    print(features_path)
    
    for cluster in number_of_clusters[10:11]:    
        # Fist load all of this cluster centroids in order to do things faster
        centroids = []
        for _class in ctc:
            centroids.append(np.load(f"{cluster_centroids_path}/class-{_class}/{cluster}_clusters_centeroids.npy"))

        for class1 in ctc[1:]:
            t1 = time.time()
            current_class_path = f"{datapoints_after_pca_path}/class-{class1}"
            current_class_list = os.listdir(current_class_path)

            for file in current_class_list[:11000]:
                current_class_data = np.load(current_class_path + '/' + file)

                features = []
                for i in range(number_of_channels):
                    channel_number_in_cluster = list(np.zeros((cluster * len(ctc)), dtype=int))

                    for datapoint in current_class_data[i]:
                        index_jump = 0

                        for count in range(len(ctc)): 
                            # Calculate number of current class datapoints in all classes clusters
                            distances = [distance(datapoint, centeroid) for centeroid in centroids[count][i]]
                            channel_number_in_cluster[distances.index(min(distances)) + index_jump] += 1
                            index_jump += cluster

                    features.append(channel_number_in_cluster)

                # Saving features
                np.save(f"{features_path}/{cluster}-clusters/class-{class1}/{file}", features)


            t2 = time.time()
            print(t2 - t1)

Data/Interval-300-classes-length-60/Features/classification-1-5


In [None]:
os.system("shutdown /s /t 1")

In [None]:
os.system('systemctl poweroff') 