In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import string

alphabet = list(string.ascii_uppercase)


In [2]:
import random

def generate_random_points(n, x_min, x_max, y_min, y_max):
    x = np.random.uniform(x_min, x_max, n)
    y = np.random.uniform(y_min, y_max, n)
    return x, y

In [3]:
x,y = generate_random_points(26, 0, 512, 0, 512)
dataframe = pd.DataFrame(np.concatenate([x.reshape(-1,1),y.reshape(-1,1)], axis=1), index=alphabet)
dataframe.columns = ['x','y']

In [11]:
from sklearn.manifold import MDS, TSNE
from sklearn.decomposition import PCA

mds = PCA(n_components=1)

In [12]:
reduced = mds.fit_transform(dataframe[['x','y']])

In [6]:
## Check the variance explained 
mds.stress_

3492702.770971455

In [14]:
dataframe['ordering'] = reduced

In [9]:

import plotly.express as px
fig = px.scatter(dataframe, x='x', y='y', text=dataframe.index)
## Change the size of the figure
fig.update_layout(height=600, width=600)
## Increase the size of the text
fig.update_traces(textfont_size=18)

## Add title
fig.update_layout(title_text='Original Ordering')
fig

In [15]:
## Plot ordering in the dataset with plotly express

fig = px.scatter(dataframe, x='ordering', y=[0]*len(dataframe), text=dataframe.index)
## Increase the size of the text
fig.update_traces(textfont_size=18)
fig.update_layout(title_text='MDS ordering')

fig

In [21]:
from scipy.spatial import distance_matrix

In [39]:
order_distance = distance_matrix(dataframe[['ordering']],dataframe[['ordering']])
## minmax normalization
order_distance = (order_distance - order_distance.min())/(order_distance.max() - order_distance.min())

In [40]:
point_distance = distance_matrix(dataframe[['x','y']],dataframe[['x','y']])
## minmax normalization
point_distance = (point_distance - point_distance.min())/(point_distance.max() - point_distance.min())

In [46]:
np.argmax(point_distance)

381

In [47]:
np.argmax(order_distance)

382

In [41]:
((point_distance - order_distance)).mean()

0.12035136338459124

## Time test

In [16]:
import time
from tqdm.notebook import tqdm
import plotly.express as px
import matplotlib.pyplot as plt

In [17]:
mds = MDS(n_components=1,  normalized_stress='auto')

In [None]:
def test():
    dataframe = pd.DataFrame(np.concatenate([x.reshape(-1,1),y.reshape(-1,1)], axis=1))
    dataframe.columns = ['x','y']
    mds.fit_transform(dataframe[['x','y']])

In [45]:
ex_time = []
for i in tqdm(range(2,3000,100)):
    x,y = generate_random_points(i, 0, 512, 0, 512)
    dataframe = pd.DataFrame(np.concatenate([x.reshape(-1,1),y.reshape(-1,1)], axis=1))
    dataframe.columns = ['x','y']
    st = time.time()
    reduced = mds.fit_transform(dataframe[['x','y']])
    et = time.time()

    ex_time.append(et-st)

  0%|          | 0/30 [00:00<?, ?it/s]


The MDS API has changed. ``fit`` now constructs an dissimilarity matrix from data. To use a custom dissimilarity matrix, set ``dissimilarity='precomputed'``.



In [62]:
df = pd.DataFrame([list(range(2,3000,100)), ex_time]).T
df.columns = ['Number of Superpixels', 'Execution Time (s)']

px.line(df, x='Number of Superpixels', y='Execution Time (s)', title='Execution Time vs Number of Superpixels', markers=True)