-
Notifications
You must be signed in to change notification settings - Fork 0
/
datasummarizer.py
58 lines (45 loc) · 2.56 KB
/
datasummarizer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Jan 22 19:32:19 2021
@author: rodrigosandon
"""
import numpy as np
import pickle
f = open("results4.txt", "a+")
class DataSummarizer:
def __init__(self, path, to_search):
self.summaries = dict()
self.class_count = dict()
self.imp_features = self.load_obj("features_mean")
return pickle.load(f)
def dataSummary(self, label, features): #to get some simple descriptive stats from each matrix
mean_matrix = np.zeros([32,32]) #make a mean matrix for all matrices
std_matrix = np.zeros([32,32]) #make a mean matrix for all matrices
label_count = 0
for row, col in self.data.iterrows():
if row["Class"] == label:
#if the current name of class we're on matches that of the label that's assigned to it
label_count += 1
data = row["Data"]
for important_feature in features:
row = int(important_feature[0] / mean_matrix.shape[0]) #normalizing feature value with number of cols?
col = important_feature[0] % std_matrix.shape[1] #remainder of dividing feature value with number of rows?
mean_matrix[row][col] += (1 - important_feature[1]) * (1 - important_feature[2]) * data[row][col]
#this doesn't look like its the mean features so what really is it?
mean_matrix = mean_matrix / label_count #this is where we acc get the mean matrix for the whole dataset
for row, col in self.data.iterrows():
if row["Class"] == label:
#what is the label based on before the code written above, bc it wasn't defined before right?
data = row["Data"]
for important_feature in features:
row = int(important_feature[0] / mean_matrix.shape[0])
col = important_feature[0] % std_matrix.shape[1]
std_matrix[row][col] += (((1 - important_feature[1]) * (1 - important_feature[2]) * data[row][col]) - mean_matrix[row][col]) ** 2
std_matrix = np.sqrt(std_matrix / (label_count - 1)) #when we acc get the stdev matrix
return mean_matrix, std_matrix, label_count
def generateDataSummary(self):
for region in self.regions:
means, stdevs, class_count = self.dataSummary(region, self.imp_features)
self.summaries[region] = [means, stdevs]
self.class_count[region] = class_count