In [1]:
#To generate statistics for the papaer WD2017
#Fernando Wario
#May 2017

%matplotlib notebook

import os
import csv
import cv2
import json
import sys
import glob
import math
import datetime
import pysolar
import pytz
import numpy as np
import matplotlib.mlab as mlab
import matplotlib.cm as cm
import matplotlib.image as mpimg
import matplotlib.pyplot as plt

from matplotlib import pyplot as plt
from joblib import Parallel, delayed
from sklearn import preprocessing
from sklearn.linear_model import TheilSenRegressor
from skimage.transform import radon, rescale
from scipy.stats import multivariate_normal
from scipy import stats

sys.path.append('../functions/')
sys.path.append('../utils/')

from DoG import DoG
from file_manager import File_manager
from wdd_decoder_functions import read_DD, def_orientation, subst_angles
from wdd_clustering_functions import data_format
from wdd_plotter_functions import SolarAzEl, translateToRelativeSunDirection

#Radian
radi = 180/np.pi

In [None]:
#GROUND TRUTH DATA
#Path to the Ground Truth data
path = '../Data/GTData'
folders = os.listdir(path)

#Ground Truth data is loaded
GTArray = {}

for i in folders:    
    #Path to next GT file
    inputFile = os.path.join(path,i)
    print('working on ' + inputFile)
    with open(inputFile, 'rt', encoding='utf-8-sig') as GTFile:
        for row in csv.reader(GTFile, delimiter = ','):
            key = row[0]
            #If it is the first time this WR is registered
            if (key not in GTArray):
                GTArray[key] = [float(row[-1])]
            #If not, it is added
            else:
                GTArray[key].append(float(row[-1]))

#DOT DETECTOR DATA
#Path to the Dot Detector data
path = '../Data/GTRuns/'
#Dot Detector data is loaded
DDArray = {}
DDArray = read_DD(path)
print('done with DDArray!')            

#DECODER DATA
#Path to the Decoder data
inputFile = '../Data/GTRuns_decoder.csv'

#Decoder data is loaded
DecArray = {}
with open(inputFile, 'rt', encoding='utf-8-sig') as DecFile:
    reader2 = csv.reader(DecFile, delimiter=',')
    #skiping header
    next(reader2)
    for row in reader2:    
        DecArray[row[0]] = float(row[2])*radi
    print('done with DecArray!')    

#Average and error data is computed
#Average Ground Truth
AvgGTArray = {}
#Error Ground Truth
ErrGTArray = {}
#Error Decoder
ErrDecArray = {}
for v in GTArray:
    #Average Ground Truth is computed using circular statistics with appropriate range
    AvgGTArray[v] = stats.circmean(GTArray[v], high=180, low=-180)
    #Error Decoder is computed using the function subst_angles()
    ErrDecArray[v] = subst_angles(AvgGTArray[v],DecArray[v])    
    #Error Ground Truth is computed using the function subst_angles()
    for i in GTArray[v]:
        if (v not in ErrGTArray):
            ErrGTArray[v] = [subst_angles(AvgGTArray[v],i)]
        #If not, it is added
        else:
            ErrGTArray[v].append(subst_angles(AvgGTArray[v],i))
print('done with AvGTArray, ErrGTArray and ErrDecArray')

In [None]:
#Graphic ErrGTArray distribution (each GT file)
lim = 140
#An array containing all keys to WRuns with errors over the limit is generated
ErrGTKey = [key for key, value in ErrGTArray.items() if np.max(np.abs(value)) > lim]
#and the errors under that limit
list_values = [value for key, value in ErrGTArray.items() if np.max(np.abs(value)) < lim]

list_values = np.array(list_values)

list_values = list_values[np.logical_and(list_values>-lim,list_values<lim)]

print(len(list_values))

mean=np.mean(list_values)
sigma=np.std(list_values)

print('mean = ' + str(mean))
print('std = ' + str(sigma))

fig1 = plt.figure(dpi=80)
plt.hist(list_values, bins=150, normed=True)

x = np.linspace(np.amin(list_values), np.amax(list_values), 200)
plt.plot(x, mlab.normpdf(x, mean, sigma))

plt.annotate('M = %5.6f' %mean, xy=(-28, .075))
plt.annotate('SD = %5.6f' %sigma, xy=(-28, .07))

plt.title("Ground Truth Distribution @ WR Level")
plt.xlabel('Error', fontsize=12)
plt.ylabel('Frequency', fontsize=12)

plt.show()

# save figure
# Save the image in memory in PNG format
#fig1.savefig('png1.png', format='png')

In [None]:
#Graphic ErrDecArray distribution
lim = 140

#For ErrDecArray
#An array containing all keys to WRuns with errors over the limit is generated
ErrDecKey = [key for key, value in ErrDecArray.items() if np.max(np.abs(value)) > lim]
#and the errors under that limit
list_values = [value for key, value in ErrDecArray.items() if np.max(np.abs(value)) < lim]

list_values = np.array(list_values)

print(len(list_values))

mean=np.mean(list_values)
sigma=np.std(list_values)

print('mean = ' + str(mean))
print('std = ' + str(sigma))

fig2 = plt.figure(dpi=80)
plt.hist(list_values, bins=100, normed=True)

x = np.linspace(min(list_values), max(list_values), 100)
plt.plot(x, mlab.normpdf(x, mean, sigma))

plt.annotate('M = %5.6f' %mean, xy=(-29, .09))
plt.annotate('SD = %5.6f' %sigma, xy=(-29, .085))

plt.title("OM Error Distribution @ WR Level (inliers)")
plt.xlabel('Error', fontsize=12)
plt.ylabel('Frequency', fontsize=12)

plt.show()

# save figure
# Save the image in memory in PNG format
#fig2.savefig('png2.png', format='png')

In [None]:
#Graphic ErrCluster distribution

#Reading data from GTClusters.csv
GTFile = '../Data/GTClusters.csv'
GTClusters = {}
with open(GTFile, 'rt', encoding='utf-8-sig') as GTClusterFile:    
    reader3 = csv.reader(GTClusterFile, delimiter=',', quotechar='', quoting=csv.QUOTE_NONE)
    #skiping header
    next(reader3)
    for row in reader3:    
        GTClusters[row[0]] = float(row[1])*radi, row[3:]
    print('done with %3d GTClusters!' %len(GTClusters))
#print(GTClusters)

#Reading data from GTAverage.csv
AvgGTFile = '../Data/GTAverage.csv'
AvgGTArray = {}
with open(AvgGTFile, 'rt', encoding='utf-8-sig') as AvgGT:
    reader4 = csv.reader(AvgGT, delimiter=',')    
    #skiping header
    next(reader4)
    for row in reader4:
        AvgGTArray[row[0]] = float(row[1])*radi
    print('done with AvgGTArray!')

#AvgGT per cluster array
AvgGTCluster = {}
#Error Cluster
ErrClusArray = {}
#GTClusArray per cluster, per clicker
GTClusArray = {}
#Error GT Cluster
ErrGTClusArray = {}
#Error Decoder
ErrDecArray_IL = {}
for clus in GTClusters:
    #Average Ground Truth for the cluster is computed for using circular statistics with appropriate range
    AvgGTCluster[clus] = stats.circmean([AvgGTArray[key] for key in GTClusters[clus][1]], high=180, low=-180)
    ErrClusArray[clus] = subst_angles(AvgGTCluster[clus],GTClusters[clus][0])
    
    #Error Decoder is computed using the function subst_angles() -- only for inliers after RANSAC
    for key in GTClusters[clus][1]:        
        ErrDecArray_IL[key] = subst_angles(AvgGTArray[key],DecArray[key])
    
    #Ground Truth for the cluster is computed for each clicker using circular statistics with appropriate range
    for i in range(8):
        if (clus not in GTClusArray):
            GTClusArray[clus] = [stats.circmean([GTArray[key][i] for key in GTClusters[clus][1]], high=180, low=-180)]
            ErrGTClusArray[clus] = [subst_angles(AvgGTCluster[clus],GTClusArray[clus][i])]
        else:
            GTClusArray[clus].append(stats.circmean([GTArray[key][i] for key in GTClusters[clus][1]], high=180, low=-180))
            ErrGTClusArray[clus].append(subst_angles(AvgGTCluster[clus],GTClusArray[clus][i]))
    
print('done with AvgGTCluster, ErrGTClusArray and ErrClusArray')
#ErrClusArray
#ErrGTClusArray
#ErrDecArray

In [None]:
#Graphic ErrDecArray distribution
lim = 180

#For ErrDecArray
#An array containing all keys to WRuns with errors over the limit is generated
ErrDecKey = [key for key, value in ErrDecArray_IL.items() if np.max(np.abs(value)) > lim]
#and the errors under that limit
list_values = [value for key, value in ErrDecArray_IL.items() if np.max(np.abs(value)) < lim]

list_values = np.array(list_values)

print(len(list_values))

mean=np.mean(list_values)
sigma=np.std(list_values)

print('mean = ' + str(mean))
print('std = ' + str(sigma))

fig3 = plt.figure(dpi=80)
plt.hist(list_values, bins=100, normed=True)

x = np.linspace(min(list_values), max(list_values), 100)
plt.plot(x, mlab.normpdf(x, mean, sigma))

plt.annotate('M = %5.6f' %mean, xy=(-24, .15))
plt.annotate('SD = %5.6f' %sigma, xy=(-24, .14))

plt.title("OM Error Distribution @ WR Level (after RANSAC)")
plt.xlabel('Error', fontsize=12)
plt.ylabel('Frequency', fontsize=12)

plt.show()

# save figure
# Save the image in memory in PNG format
#fig3.savefig('png3.png', format='png')

In [None]:
#Graphic ErrGTClusArray distribution (each GT file)
lim = 150
#An array containing all keys to WRuns with errors over the limit is generated
ErrGTKey = [key for key, value in ErrGTClusArray.items() if np.max(np.abs(value)) > lim]
#and the errors under that limit
list_values = [value for key, value in ErrGTClusArray.items() if np.max(np.abs(value)) < lim]

list_values = np.array(list_values)

list_values = list_values[np.logical_and(list_values>-lim,list_values<lim)]

print(len(list_values))

mean=np.mean(list_values)
sigma=np.std(list_values)

print('mean = ' + str(mean))
print('std = ' + str(sigma))

fig4 = plt.figure(dpi=80)
plt.hist(list_values, bins=150, normed=True)

x = np.linspace(np.amin(list_values), np.amax(list_values), 200)
plt.plot(x, mlab.normpdf(x, mean, sigma))

plt.annotate('M = %5.6f' %mean, xy=(-9, .28))
plt.annotate('SD = %5.6f' %sigma, xy=(-9, .26))

plt.title("Ground Truth distribution @ Dance Level")
plt.xlabel('Error', fontsize=12)
plt.ylabel('Frequency', fontsize=12)

plt.show()

# save figure
# Save the image in memory in PNG format
#fig4.savefig('png4.png', format='png')

In [None]:
#For ErrClusKey
lim = 150
#An array containing all keys to WRuns with errors over the limit is generated
ErrClusKey = [key for key, value in ErrClusArray.items() if np.max(np.abs(value)) > lim]
#and the errors under that limit
list_values = [value for key, value in ErrClusArray.items() if np.max(np.abs(value)) < lim]

list_values = np.array(list_values)

print(len(list_values))

mean=np.mean(list_values)
sigma=np.std(list_values)

print('mean = ' + str(mean))
print('std = ' + str(sigma))

fig5 = plt.figure(dpi=80)
plt.hist(list_values, bins=30, normed=True)

x = np.linspace(min(list_values), max(list_values), 100)
plt.plot(x, mlab.normpdf(x, mean, sigma))

plt.annotate('M = %5.6f' %mean, xy=(-14.5, .33))
plt.annotate('SD = %5.6f' %sigma, xy=(-14.5, .31))

plt.title("OM Error Distribution @ Dance level (after RANSAC)")
plt.xlabel('Error', fontsize=12)
plt.ylabel('Frequency', fontsize=12)

plt.show()

# save figure
# Save the image in memory in PNG format
#fig5.savefig('png5.png', format='png')

In [2]:
#Translate angle to relative sun direction
YYYY = 2016
MM = 8
DD = 16
DiffUTC = 2
La = 52.457259
Lo = 13.296225
#Angle Hive-Feeder
AHF = -2.2853

#Decoder data WR is loaded
WRDecData={}
#Extract data for camera 1, remember to set plot parameter to 1 if plotting on a map
WRDecData = data_format('../Data/Filtering/20160816_total_filtered.csv', '1', 1)
WRRTS = {}
WRErrorRTS = {}
for key, value in WRDecData.items():
    angle = value[1]
    HH = int(value[5])
    mm = int(value[6])
    #NHAngle: Angle in the normal handedness
    [MapAngle, NHAngle] = translateToRelativeSunDirection(angle, YYYY, MM, DD, HH, mm, DiffUTC, La, Lo)        
    WRRTS[key] = NHAngle
    #Error to angle hive-feeder
    WRErrorRTS[key] = subst_angles(AHF*radi,NHAngle*radi)

#Decoder data Dance is loaded
DanceRTS = {}
DanceErrorRTS = {}
with open('../Data/Filtering/20160816_total_cam1_plot.csv', 'rt', encoding='utf-8-sig') as DecFile:
        reader = csv.reader(DecFile, delimiter = ',')
        for row in reader:
            key = row[0]
            angle = float(row[2])
            HH = int(row[3])
            mm = int(row[4])
            #NHAngle: Angle in the normal handedness
            [MapAngle, NHAngle] = translateToRelativeSunDirection(angle, YYYY, MM, DD, HH, mm, DiffUTC, La, Lo)
            DanceRTS[key] = NHAngle
            #Error to angle hive-feeder
            DanceErrorRTS[key] = subst_angles(AHF*radi,NHAngle*radi)

  (leap_seconds_base_year + len(leap_seconds_adjustments) - 1)


In [5]:
#Graphic DanceErrorRTSKey distribution
lim = 45

#For DanceErrorRTSKey
#An array containing all keys to Dances with errors within the limit is generated
DanceErrorRTSKey = [key for key, value in DanceErrorRTS.items() if np.abs(value) < lim]
#and the errors under that limit
list_values = [value for key, value in DanceErrorRTS.items() if np.abs(value) < lim]

list_values = np.array(list_values)

print(len(list_values))

mean=np.mean(list_values)
sigma=np.std(list_values)

print('M = ' + str(mean))
print('SD = ' + str(sigma))

fig6 = plt.figure(dpi=80)
plt.hist(list_values, bins=100, normed=True)

x = np.linspace(min(list_values), max(list_values), 100)
plt.plot(x, mlab.normpdf(x, mean, sigma))

plt.annotate('M = %5.6f' %mean, xy=(-48, .065))
plt.annotate('SD = %5.6f' %sigma, xy=(-48, .060))

plt.title("Error Distribution @ Dance Level (inliers)")
plt.xlabel('Error', fontsize=12)
plt.ylabel('Frequency', fontsize=12)

plt.show()
fig6.savefig('DL.png', format='png')

438
M = 2.33230940903
SD = 11.1240010021


<IPython.core.display.Javascript object>

In [6]:
#Graphic WRErrorRTSKey distribution
#An array containing all keys to Dances with errors within the limit
DanceErrorRTSKey
list_values = []
with open('../Data/Filtering/20160816_total_cam1.csv', 'rt', encoding='utf-8-sig') as DecFile:
    reader = csv.reader(DecFile, delimiter = ',')
    next(reader)
    for row in reader:
        if (row[0] in DanceErrorRTSKey):
            #print(row[0])
            for key in row[3:]:
                #print(WRErrorRTS[key])
                list_values.insert(-1,WRErrorRTS[key])

list_values = np.array(list_values)

print(len(list_values))

mean=np.mean(list_values)
sigma=np.std(list_values)

print('M = ' + str(mean))
print('SD = ' + str(sigma))

fig7 = plt.figure(dpi=80)
plt.hist(list_values, bins=100, normed=True)

x = np.linspace(min(list_values), max(list_values), 100)
plt.plot(x, mlab.normpdf(x, mean, sigma))

plt.annotate('M = %5.6f' %mean, xy=(-70, .037))
plt.annotate('SD = %5.6f' %sigma, xy=(-70, .035))

plt.title("Error Distribution @ WR Level (inliers)")
plt.xlabel('Error', fontsize=12)
plt.ylabel('Frequency', fontsize=12)

plt.show()
fig7.savefig('WR.png', format='png')

2756
M = 2.68469585583
SD = 14.3733492575


<IPython.core.display.Javascript object>

In [None]:
#Notebook's sandbox

#########################################################################
#Load GTData from all users, calculate an average value for each WRun and store them in GTAverage.csv

#GROUND TRUTH DATA
#Path to the Ground Truth data
path = '../Data/GTData'
folders = os.listdir(path)

#Ground Truth data is loaded
GTArray = {}
#Average Ground Truth
AvgGTArray = {}

for i in folders:    
    #Path to next GT file
    inputFile = os.path.join(path,i)
    print('working on ' + inputFile)
    with open(inputFile, 'rt', encoding='utf-8-sig') as GTFile:
        for row in csv.reader(GTFile, delimiter = ','):
            key = row[0]
            #If it is the first time this WR is registered
            if (key not in GTArray):
                GTArray[key] = [float(row[-1])]
            #If not, it is add
            else:
                GTArray[key].append(float(row[-1]))
for v in GTArray:
    #Average Ground Truth is computed using circular statistics with appropriate range
    AvgGTArray[v] = stats.circmean(GTArray[v], high=180, low=-180)

#All GTRuns
if (os.path.exists('../Data/GTAverage.csv')):
    os.remove('../Data/GTAverage.csv')

with open('../Data/GTAverage.csv', 'at', newline='') as out_file:
    writer = csv.writer(out_file, delimiter=',', quotechar=' ', quoting=csv.QUOTE_MINIMAL)        
    writer.writerow(['key, AvgGTAngle'])   
    for key in sorted(AvgGTArray.keys()):
        # key, GT_angle
        writer.writerow([key, AvgGTArray[key]/radi])

with open('../Data/GTAverage.json', 'w') as out_file:
    for key in sorted(AvgGTArray.keys()):
        # key, GT_angle
        json.dump([key, AvgGTArray[key]/radi], out_file)

#########################################################################

#To display histogram o dances on polar coordinates
#Number of bins
N = 40
#Decoder data adjusted to camera rotation
inputfile = '../Data/Filtering/20160816_13_Adj.csv'
Data = open(inputfile, 'rt', encoding='utf-8-sig')

#Angles of the runs are loaded
theta = []
theta = [float(row[1]) for row in csv.reader(Data, delimiter = ',')]
#With modulo operator angles are adjusted to the range [0, 2*pi]
mod_theta = [s % (2*np.pi) for s in theta]

#height is calculated through histogram
[height, angle] = np.histogram(mod_theta, bins = N)

width = 2*np.pi/N
width = angle[1] - angle[0]

fig3 = plt.figure()
fig3.suptitle('Waggle runs distribution for: ' + inputfile)
ax = plt.subplot(111, polar=True)


bars = ax.bar(angle[:-1], height, width = width)
for r,bar in zip(height, bars):
    bar.set_facecolor( cm.jet(r/10.))
    bar.set_alpha(0.4)

plt.show()

#########################################################################

for clus in sorted(AvgGTCluster.keys()):
    print(clus)
    print('AvgGTCluster {0}'.format(AvgGTCluster[clus]))
    print('GTClusters {0}'.format(GTClusters[clus][0]))
    print('ErrClusArray {0}'.format(ErrClusArray[clus]))
    print('GTClusArray {0}'.format(GTClusArray[clus][0]))
    print('ErrGTClusArray {0}'.format(ErrGTClusArray[clus][0]))