In [125]:
import scipy as sp
import numpy as np
import os
import pandas as pd
import sys
from scipy.spatial import distance 
cwd = os.getcwd()

# General tests

In [126]:
eucl_values = []
man_values = []
cheb_values = []
for i in range(10):
    x = np.random.normal(0,1000000, size=(4))
    y = np.random.normal(0,1000000, size=(4))
    print(x)
    print(y)
    eucl_dist = distance.euclidean(x,y)
    man_dist = distance.cityblock(x,y)
    cheb_dist = distance.chebyshev(x,y)
    eucl_row = np.concatenate((x,y,[eucl_dist]),axis=0)
    man_row = np.concatenate((x,y,[man_dist]),axis=0)
    cheb_row = np.concatenate((x,y,[cheb_dist]),axis=0)
    eucl_values.append(eucl_row)
    man_values.append(man_row)
    cheb_values.append(cheb_row)

[ 407929.68953623 -577797.3717119   722058.38121268 -500884.40524092]
[-1627452.97148523   708686.28282828 -2372012.3067377  -1216402.90756373]
[-164009.80198242 -435773.81217974  868804.82721754 -136392.6079666 ]
[ 530357.58029356 2446896.39353932  515188.13790447  904683.23386673]
[  473064.87101807   717943.69745363 -1621922.56290835  -297442.84110545]
[ -287054.88163399   -50043.79990091  -520868.21811067 -2133736.09935503]
[-2316510.7571252   1170102.97887489  -724165.82255716  -401779.35571307]
[-1634490.70676042   632398.85453218   845039.21337054  2125084.07636624]
[ 270074.41811212 -392966.26568544  474168.3973797   252922.36566978]
[ -32498.06293795 -441356.31350343 -203860.59387279  166819.68337851]
[ 1304823.73095087  -874497.97231841 -1580330.86772977   485207.55020181]
[-2171558.07555764   418003.43183448 -1319974.29470728  -440046.710217  ]
[-274361.84239714  287614.99382774 1011192.35760644  182121.49976581]
[2067310.03876174 2199967.16345638  276450.69763146 -379759.84

In [127]:
eucl_df = pd.DataFrame(eucl_values)
man_df = pd.DataFrame(man_values)
cheb_df = pd.DataFrame(cheb_values)
eucl_df.to_csv(os.path.join(cwd, "eucl_general"),header=False,index=False)
man_df.to_csv(os.path.join(cwd, "man_general"),header=False,index=False)
cheb_df.to_csv(os.path.join(cwd, "cheb_general"),header=False,index=False)

In [128]:
man_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,407929.7,-577797.4,722058.4,-500884.405241,-1627453.0,708686.3,-2372012.0,-1216403.0,7131456.0
1,-164009.8,-435773.8,868804.8,-136392.607967,530357.6,2446896.0,515188.1,904683.2,4971730.0
2,473064.9,717943.7,-1621923.0,-297442.841105,-287054.9,-50043.8,-520868.2,-2133736.0,4465455.0
3,-2316511.0,1170103.0,-724165.8,-401779.355713,-1634491.0,632398.9,845039.2,2125084.0,5315793.0
4,270074.4,-392966.3,474168.4,252922.36567,-32498.06,-441356.3,-203860.6,166819.7,1115094.0


# Same Points

In [129]:
same_values = []
for i in range(1000):
    x = np.random.normal(0,1000000, size=(4))
    row = np.concatenate((x,x),axis=0)
    same_values.append(row)

In [130]:
same_df = pd.DataFrame(same_values)
print(same_df.head())
cwd = os.getcwd()
same_df.to_csv(os.path.join(cwd, "same_points"),header=False,index=False)

               0             1             2              3              4  \
0 -916014.916807 -4.843266e+05 -9.464030e+05  919714.159495 -916014.916807   
1  282188.918481  1.290004e+05 -1.070534e+06  231757.956760  282188.918481   
2  230097.617298  1.950185e+06 -5.744311e+05 -668859.736480  230097.617298   
3  186866.846853  1.338950e+06 -1.126833e+06 -159054.488885  186866.846853   
4  640971.077036  3.219321e+05  1.205140e+06 -836765.892655  640971.077036   

              5             6              7  
0 -4.843266e+05 -9.464030e+05  919714.159495  
1  1.290004e+05 -1.070534e+06  231757.956760  
2  1.950185e+06 -5.744311e+05 -668859.736480  
3  1.338950e+06 -1.126833e+06 -159054.488885  
4  3.219321e+05  1.205140e+06 -836765.892655  


# Single Infinite

In [131]:
x = np.random.normal(0,1000000, size=(4))
y = np.random.normal(0,1000000, size=(4))
x[np.random.randint(4)] = np.inf
print(x)
dist = distance.euclidean(x,y)

[1338979.45391574 -145267.44549022 -956255.47628435              inf]


ValueError: array must not contain infs or NaNs

# Machine Precision

In [132]:
eps_values = []
eps = np.finfo(float).eps
count = 0
for i in range(1000):
    x = np.random.normal(0,1000000, size=(4))
    y = np.copy(x)
    y[0] = y[0] + eps
    dist = distance.euclidean(x,y)
    row = np.concatenate((x,x),axis=0)
    eps_values.append(row)
    if dist != eps:
        count += 1
print(count)

1000


In [133]:
np.finfo(float).eps

2.220446049250313e-16

# Variable Lengths Minkowski

In [134]:
lengths = [10,100,1000]
p_vals = [5,10,20]
vl_vals = []
for l in lengths:
    x = np.random.normal(0,1000000, size=(l))
    y = np.random.normal(0,1000000, size=(l))
    r = distance.minkowski(x,y,10)
    row = np.concatenate(([r],x,y),axis=0)
    vl_vals.append(row)

In [135]:
vl_df = pd.DataFrame(vl_vals)
cwd = os.getcwd()
vl_df.to_csv(os.path.join(cwd, "mink_var_length"),header=False,index=False)

In [136]:
vl_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000
0,2399664.0,651484.7,1264937.0,-812155.7,-570427.3,-438870.329735,-145497.7,-900838.9,671465.529464,612017.1,...,,,,,,,,,,
1,3615716.0,-1315943.0,-1089472.0,2079451.0,-1094183.0,-867470.845486,1344521.0,663560.9,-957809.821081,-468234.0,...,,,,,,,,,,
2,5314226.0,-1081621.0,-261864.5,-138962.1,-1729511.0,-403285.917854,1815216.0,-1228075.0,-80502.403159,1103900.0,...,-3789.931183,428118.881616,-684157.459273,160706.967957,-176186.612403,1184093.0,185645.946573,-1929627.0,410275.536465,-910762.573893


# Kullback Leibler

In [137]:
mu = np.random.normal(0,1000000)
sigma = abs(np.random.normal())
s = np.random.normal(0, sigma, 5)
print(mu)
print(sigma)
print(s.sum())

29051.90746555675
0.40067346888861793
-0.4875798332986395


In [138]:
values = np.random.random(5)
values = values / values.sum()
print(values.sum())

0.9999999999999998


In [1]:
kl_vals = []
for i in range(1000):
    x = np.random.random(5)
    y = np.random.random(5)
    x = x/x.sum()
    y = y/y.sum()
    dist = distance.rel_entr(x,y).sum()
    dist = dist **2
    row = np.concatenate(([dist],x,y),axis=0)
    kl_vals.append(row)

NameError: name 'np' is not defined

In [140]:
# kl_df = pd.DataFrame(kl_vals)
# cwd = os.getcwd()
# kl_df.to_csv(os.path.join(cwd, "kl_general"),header=False,index=False)

# Binnning Tests

In [141]:
kl_x = [0.5,0,0,0.25,0,0,0,0.25,0]
kl_y = [0.25,0,0,0.25,0,0,0,0.25,0.25]
kl_dist = distance.rel_entr(kl_x,kl_y)
print(kl_dist)

[0.34657359 0.         0.         0.         0.         0.
 0.         0.         0.        ]
