In [44]:
import numpy as np
import pandas as pd 
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import math
from scipy.stats import pearsonr
from sklearn.metrics import r2_score
from scipy.stats import entropy
from tqdm.notebook import tqdm, trange
import statsmodels.api as sm
from pylab import *
import matplotlib.pyplot as plt

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [5]:
# embedded vectors data
embeds = np.load("/content/drive/My Drive/MSc Statistics/track-embeddings-v2.npz", allow_pickle=True)

In [6]:
#shape of the song vectors
print(embeds["beta"].shape)
# shape of the link vector
print(embeds["mapping"].shape)

(384546, 100)
(384546,)


In [7]:
# these link vectors correspond to SongId
embeds["mapping"][1]

'SOAPDEY12A81C210A9'

In [19]:
len(embeds["beta"][1])

100

# TS-SS


In [8]:
import math
import numpy as np
import torch

class TS_SS:
    
    def Cosine(self, vec1: np.ndarray, vec2: np.ndarray):
        return np.dot(vec1, vec2.T)/(np.linalg.norm(vec1) * np.linalg.norm(vec2))

    def VectorSize(self, vec: np.ndarray):
        return np.linalg.norm(vec)

    def Euclidean(self, vec1: np.ndarray, vec2: np.ndarray):
        return np.linalg.norm(vec1-vec2)

    def Theta(self, vec1: np.ndarray, vec2: np.ndarray):
        return np.arccos(self.Cosine(vec1, vec2)) + np.radians(10)

    def Triangle(self, vec1: np.ndarray, vec2: np.ndarray):
        theta = np.radians(self.Theta(vec1, vec2))
        return (self.VectorSize(vec1) * self.VectorSize(vec2) * np.sin(theta))/2

    def Magnitude_Difference(self, vec1: np.ndarray, vec2: np.ndarray):
        return abs(self.VectorSize(vec1) - self.VectorSize(vec2))

    def Sector(self, vec1: np.ndarray, vec2: np.ndarray):
        ED = self.Euclidean(vec1, vec2)
        MD = self.Magnitude_Difference(vec1, vec2)
        theta = self.Theta(vec1, vec2)
        return math.pi * (ED + MD)**2 * theta/360


    def __call__(self, vec1: np.ndarray, vec2: np.ndarray):
        return self.Triangle(vec1, vec2) * self.Sector(vec1, vec2)


In [79]:
v1 = np.random.random_sample((10, 200))
v2 = np.random.random_sample((1, 200))
similarity = TS_SS() 
a = similarity(v1,v2)
print(a)

[[49.09836864]
 [49.62124358]
 [50.64130499]
 [50.91800216]
 [50.12669745]
 [50.05851948]
 [49.49825567]
 [50.25166387]
 [50.78302353]
 [50.70359587]]


In [20]:
b = similarity(embeds["beta"][1], embeds["beta"][2])

In [None]:
embeds["beta"][:2]

In [None]:
embeds["beta"][:2] * np.array([[2],[3]])

# Misc


In [58]:
x = np.array([3,10])
y = np.array([-5,-7])
z = np.array([1,1])
joint = np.array([x,y,z])

In [73]:
(joint * a)

array([[  6,  20],
       [-15, -21],
       [  2,   2]])

In [66]:
a = np.array([[2],[3], [2]])

In [74]:
def centroid(p,w):
  # assume we have a joint np array
  # w gotta be a n_i x 1 np array
  weighted = p * w
  joint = weighted.sum(axis=0)
  return joint/np.sum(w)

In [75]:
centroid(joint, a)

array([-1.        ,  0.14285714])

In [78]:
centroid(embeds["beta"][[1,4,66]], a)

array([3.44180090e-05, 5.33065255e-05, 3.76822006e-05, 3.21358649e-05,
       3.71509351e-05, 4.05466484e-05, 3.67388659e-05, 6.91730704e-04,
       2.26501678e-04, 3.79497033e-05, 3.48051466e-05, 3.81571798e-05,
       9.16803141e-05, 3.65392542e-05, 3.90541869e-05, 3.82787797e-05,
       3.99204686e-05, 3.80352877e-05, 1.11263151e-04, 3.88606412e-05,
       3.45725384e-05, 3.75478750e-05, 3.41145159e-05, 3.39872900e-05,
       3.76460344e-05, 3.47000688e-05, 3.75037598e-05, 3.50906199e-05,
       3.68194347e-05, 3.85686679e-05, 8.42396488e-04, 1.00138561e-02,
       3.34913244e-05, 3.67739017e-05, 3.38675912e-05, 9.40448378e-03,
       3.69680290e-05, 3.65273014e-05, 3.76697613e-05, 5.46349195e-02,
       3.38593849e-05, 3.40903612e-05, 3.69417841e-05, 3.50505735e-05,
       3.42968150e-05, 3.31765862e-05, 4.09790846e-05, 3.80623138e-05,
       1.30845267e-03, 1.03307133e-03, 3.79162385e-05, 3.83815256e-05,
       3.70302363e-05, 2.78952324e-03, 3.71648166e-05, 3.43613730e-05,
      