<a href="https://colab.research.google.com/github/Sahel-Eskandar/Distance-Measures/blob/main/Distances.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install jellyfish

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting jellyfish
  Downloading jellyfish-0.9.0.tar.gz (132 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m132.6/132.6 KB[0m [31m13.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: jellyfish
  Building wheel for jellyfish (setup.py) ... [?25l[?25hdone
  Created wheel for jellyfish: filename=jellyfish-0.9.0-cp39-cp39-linux_x86_64.whl size=81474 sha256=21ab0fb5d8b0b0b392209abfff7cd889992acb9b9fbad2de9e6aadc46a576898
  Stored in directory: /root/.cache/pip/wheels/a6/28/ba/284e37010e5d3aeed5e45345b58ab8683f97bdce46c9e147f9
Successfully built jellyfish
Installing collected packages: jellyfish
Successfully installed jellyfish-0.9.0


In [15]:
import numpy as np
import jellyfish
from scipy.spatial import distance
from scipy.spatial.distance import cdist
from scipy.stats import pearsonr, spearmanr, chisquare
from scipy.spatial.distance import canberra, cosine, euclidean, minkowski, cityblock, hamming, jaccard, dice, braycurtis
from sklearn.metrics.pairwise import paired_distances
from scipy.special import rel_entr

# Euclidean distance
def euclidean_distance(p1, p2):
    return np.sqrt(np.sum((p1 - p2) ** 2))

# Manhattan distance
def manhattan_distance(p1, p2):
    return np.sum(np.abs(p1 - p2))

# Cosine similarity
def cosine_similarity(p1, p2):
    return 1 - distance.cosine(p1, p2)

# Minkowski distance
def minkowski_distance(p1, p2, r):
    return np.power(np.sum(np.power(np.abs(p1 - p2), r)), 1/r)

# Chebyshev distance
def chebyshev_distance(p1, p2):
    return np.max(np.abs(p1 - p2))

# Hamming distance
def hamming_distance(s1, s2):
    return jellyfish.hamming_distance(s1, s2)

# Levenshtein distance
def levenshtein_distance(s1, s2):
    return jellyfish.levenshtein_distance(s1, s2)

# Jaccard similarity
def jaccard_similarity(s1, s2):
    set1 = set(s1)
    set2 = set(s2)
    intersection = set1.intersection(set2)
    union = set1.union(set2)
    return len(intersection) / len(union)

# Sørensen-Dice index
def sorensen_dice_index(s1, s2):
    set1 = set(s1)
    set2 = set(s2)
    intersection = set1.intersection(set2)
    return (2 * len(intersection)) / (len(set1) + len(set2))

# Haversine Distance
def haversine_distance(lat1, lon1, lat2, lon2):
    R = 6371.0 # Radius of the earth in km
    dLat = np.deg2rad(lat2 - lat1)
    dLon = np.deg2rad(lon2 - lon1)
    a = np.sin(dLat / 2)**2 + np.cos(np.deg2rad(lat1)) * np.cos(np.deg2rad(lat2)) * np.sin(dLon / 2)**2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
    return R * c

# Mahalanobis distance
def mahalanobis_distance(X, Y):
    return cdist(X.reshape(1,-1), Y.reshape(1,-1), 'mahalanobis', VI=np.cov(X))

# Pearson Correlation
def pearson_correlation(X, Y):
    return pearsonr(X, Y)[0]

# Squared Euclidean Distance
def squared_euclidean_distance(X, Y):
    return euclidean(X, Y)**2

# Jensen-Shannon Divergence
def jensen_shannon_divergence(X, Y):
    M = 0.5 * (X + Y)
    return np.sqrt(0.5 * (rel_entr(X, M).sum() + rel_entr(Y, M).sum()))

# Chi-Square Distance
def chi_square_distance(X, Y):
    # Normalize the arrays
    X = X / np.sum(X)
    Y = Y / np.sum(Y)
    
    # Calculate chi-square distance
    return np.sum((X - Y) ** 2 / (X + Y))

# Spearman Correlation
def spearman_correlation(X, Y):
    return spearmanr(X, Y)[0]

# Canberra Distance
def canberra_distance(X, Y):
    return canberra(X, Y)

# Example usage
point1 = np.array([1, 2])
point2 = np.array([3, 4])
string1 = "hello"
string2 = "hallo"

print("Euclidean distance:", euclidean_distance(point1, point2))
print("Manhattan distance:", manhattan_distance(point1, point2))
print("Cosine similarity:", cosine_similarity(point1, point2))
print("Minkowski distance:", minkowski_distance(point1, point2, 3))
print("Chebyshev distance:", chebyshev_distance(point1, point2))
print("Hamming distance:", hamming_distance(string1, string2))
print("Levenshtein distance:", levenshtein_distance(string1, string2))
print("Jaccard similarity:", jaccard_similarity(string1, string2))
print("Sørensen-Dice index:", sorensen_dice_index(string1, string2))
print("Haversine distance:", haversine_distance(51.5074, 0.1278, 40.7128, -74.0060))
print("Mahalanobis distance:", mahalanobis_distance(point1, point2))
print("Pearson correlation:", pearson_correlation(point1, point2))
print("Squared Euclidean distance:", squared_euclidean_distance(point1, point2))
print("Jensen-Shannon divergence:", jensen_shannon_divergence(point1, point2))
print("Chi-Square distance:", chi_square_distance(point1, point2))
print("Spearman correlation:", spearman_correlation(point1, point2))
print("Canberra distance:", canberra_distance(point1, point2))


Euclidean distance: 2.8284271247461903
Manhattan distance: 4
Cosine similarity: 0.9838699100999074
Minkowski distance: 2.5198420997897464
Chebyshev distance: 2
Hamming distance: 1
Levenshtein distance: 1
Jaccard similarity: 0.6
Sørensen-Dice index: 0.75
Haversine distance: 5587.00681965736
Mahalanobis distance: [[1.41421356]]
Pearson correlation: 1.0
Squared Euclidean distance: 8.000000000000002
Jensen-Shannon divergence: 0.6569041853099059
Chi-Square distance: 0.01923076923076923
Spearman correlation: 0.9999999999999999
Canberra distance: 0.8333333333333333
