# Import libraries

In [1]:
import matplotlib.pyplot as plt 
from random import randint

# Helper classes

In [128]:
# ========================================== input format type  ==========================================
#
#
#
# ========================================== output format type ==========================================
#
#
#
# ======================================= class fields description =======================================
# format type:
# class_names        =    [        name1            ,          name2            ,          name3         ]
#
# class_instances    =    [  [  [f0 ... f55],            [  [f0 ... f54],            [  [f0 ... f54],    
#                               [f0 ... f55],               [f0 ... f55],               [f0 ... f55],
#                               [f0 ... f55],               [f0 ... f55],               [f0 ... f55],
#                                   ...,                      ...,                        ...,
#                               [f0 ... f55]  ]     ,       [f0 ... f55]  ]     ,       [f0 ... f55]  ]  ]
#
# class_means        =    [  [  mf0 ... mf55  ]     ,    [  mf0 ... mf55  ]     ,    [  mf0 ... mf55  ]  ]
#
# class_dispersion   =    [  [  mf0 ... mf55  ]     ,    [  mf0 ... mf55  ]     ,    [  mf0 ... mf55  ]  ]

class Statistics:
    def __init__(self, addresses):
        # init arrays for classes
        self.class_names = [0]
        self.class_means = [[0 for i in range(56)]]
        self.class_stddev = [[0 for i in range(56)]]
        self.class_instances = [[]]

        # iterate through all of the files
        for i in range(len(addresses)):
            with open (addresses[i], 'r') as f:
                data = f.read()
             
            # iterate through features vectors
            data = data.split("\n")
            data.pop()
            
            # convert feature strings to lists
            for j in range(len(data)):
                data[j] = data[j].split(";")
                data[j].pop()                
                for k in range(len(data[j])):
                    data[j][k] = float(data[j][k])
                
                # break down feature vectors in classes
                if (data[j][56] in self.class_names):
                    
                    # find index corresponding to the class
                    index = 0
                    for l in range(len(self.class_names)):
                        if self.class_names[l] == data[j][56]: index = l
                            
                    self.class_instances[index].append(data[j])
                    
                else:
                    # add new classes to fields 
                    self.class_names.append(data[j][56])
                    self.class_means.append([0 for i in range(56)])
                    self.class_stddev.append([0 for i in range(56)])
                    self.class_instances.append([])
                    self.class_instances[len(self.class_instances) - 1].append(data[j])
                    
    
    # gather statistics
    def process_statistics(self):
        self.calculate_all_means()
        self.calculate_all_stddevs()    
    
    
    # process all classes and produces means
    def calculate_all_means(self):
        for i in range(len(self.class_means)):
            self.calculate_class_means(i)
    
    
    # claclulate means for all features in a given class
    def calculate_class_means(self, input_class_number):
        for i in range(len(self.class_means[input_class_number])):
            self.calculate_feature_mean(input_class_number, i)
    
    
    # allows to calculate mean for given feature for given class number (not class name!)
    def calculate_feature_mean(self, input_class_number, input_feature_index):
        mean = 0
        for i in range(len(self.class_instances[input_class_number])):
            mean += self.class_instances[input_class_number][i][input_feature_index]
        mean = mean / len(self.class_instances[input_class_number])
        self.class_means[input_class_number][input_feature_index] = mean

    
    # process all classes and produce stddevs
    def calculate_all_stddevs(self):
        for i in range(len(self.class_stddev)):
            self.calculate_class_stddevs(i)
    
    
    # claclulate standard deviations for given class
    def calculate_class_stddevs(self, input_class_number):
        for i in range(len(self.class_stddev[input_class_number])):
            self.calculate_feature_stddev(input_class_number, i)
        
        
    # allows to calculate standard deviation for given feature for given class number (not class name!)
    def calculate_feature_stddev(self, input_class_number, input_feature_index):
        stddev = 0
        for i in range(len(self.class_instances[input_class_number])):
            stddev += (self.class_instances[input_class_number][i][input_feature_index] - 
                      self.class_means[input_class_number][input_feature_index])**2
        stddev = (stddev / len(self.class_instances[input_class_number])) ** (1/2)
        self.class_stddev[input_class_number][input_feature_index] = stddev
            

    # print statistic to file
    def print_statisics(self, address):
        with open(address, "w") as f:
            # produce header
            f.write("================================= Mean Values =================================\n")
            f.write("class_name:\t\t")
            for i in range(len(self.class_names)):
                f.write(str(self.class_names[i]))
                f.write("\t\t\t")
            f.write("\n")
            
            # print features
            for i in range(len(self.class_means[0])):
                f.write("f: ")
                f.write(str(i))
                f.write("\t")
                for j in range(len(self.class_means)):
                    f.write("\t")
                    f.write(str(self.class_means[j][i]))
                f.write("\n")
                
            # produce header
            f.write("============================= Standard Deviations =============================\n")
            f.write("class_name:\t\t")
            for i in range(len(self.class_names)):
                f.write(str(self.class_names[i]))
                f.write("\t\t\t")
            f.write("\n")
            
            # print features
            for i in range(len(self.class_stddev[0])):
                f.write("f: ")
                f.write(str(i))
                f.write("\t")
                for j in range(len(self.class_stddev)):
                    f.write("\t")
                    f.write(str(self.class_stddev[j][i]))
                f.write("\n")
                
            
            
    # allows to check all feature vectors corresponding to a class in a shortened form
    def print_class(self, input_class_name):
        print("class =", input_class_name)

        # find index correspoding to class name
        index = 0
        for l in range(len(self.class_names)):
            if self.class_names[l] == input_class_name: index = l

        # print all vectors (only first and last 2 values)
        for i in range(len(self.class_instances[index])):
            print("feature", i, "\t-\t[", self.class_instances[index][i][0], ",", 
                                          self.class_instances[index][i][1], ",",
                                          "...", ",",
                                          self.class_instances[index][i][54], ",", 
                                          self.class_instances[index][i][55], "]")

# Addresses

In [2]:
directory = {
    "noise": "data/raw_data/noise/noise_reference 1.txt",

    # features data
    "index_finger_features1": "data/features1/taps/index_finger/",
    "middle_finger_features1": "data/features1/taps/middle_finger/",
    "little_finger_features1": "data/features1/taps/little_finger/",
    
    "hand_up_features1": "data/features1/hand_movement/up/",
    "hand_down_features1": "data/features1/hand_movement/down/",
    "hand_left_features1": "data/features1/hand_movement/left/",
    "hand_right_features1": "data/features1/hand_movement/right/"
    
}

# Workspace

In [130]:
input_file_list = []
for i in range(1, 11):
    input_file = directory["hand_up_features1"] + "set" + str(i) \
                + ", 2 gestures, 5 repeats, features, classified, clean, hand placed parallel to floor and moves up.txt"
    input_file_list.append(input_file)

    
output_file = "data/statistics.txt"



mstatistics = Statistics(input_file_list)
mstatistics.process_statistics()



mstatistics.print_statisics(output_file)