## Comparaison des distances sur des simulations de modèles de ressorts perturbés

Chaque jeu de simulation a été simulé pour des paramètres `(F, k)` fixés. Le bruit et les conditions intiales sont aléatoires et varient selon chaque simulation.

In [1]:
import numpy as np
from matplotlib import pyplot as plt
import time
import pandas as pd
from tqdm import tqdm
import pickle
from sklearn import preprocessing

from descriptor import compute_descriptor
from wasserstein_distance import compute_wasserstein_distance
from kolmogorov_distance import compute_kolmogorov_dist
from data_functions import load_data, plot_data
import simu_ressort

### Load data

In [None]:
all_simus = simu_ressort.load_obj("./data/ressort/simus.pkl")

print("{} parameters tested, and {} simus for each parameter.".format(len(all_simus),
                                                                      all_simus[0]['N']))
print("The duration of each simu is {} dt.".format(np.shape(all_simus[0]['simu'])[1]))

`all_simu` contient les simulations, chaque élément contient les paramètres et toutes les simulations effectuées avec ces paramètres dans l'item `simu`

In [None]:
i, j = 10, 10
simu_ressort.plot_ressort(all_simus[i]['simu'][j, :], F=all_simus[i]['F'], k=all_simus[i]['k'])

#### Mettre sous la forme DataFrame comme les simus de poumons
Format qu'il faudrait que l'on change parce qu'il est pas ouf mais bon...

In [None]:
sets = simu_ressort.transform_simus_to_df(all_simus, nmax=100)

In [None]:
def plot_df(df, nmax=None):
    if not nmax:
        nmax = df.shape[1]
    for col in df.columns[:nmax]:
        df[col].plot()
    plt.show()

In [None]:
# Exemples de simus

plt.title("F={}, k={}".format(all_simus[0]['F'],all_simus[0]['k']))
plot_df(sets[0], nmax=5)

i = 42
plt.title("F={}, k={}".format(all_simus[i]['F'], all_simus[i]['k']))
plot_df(sets[i], nmax=5)

i = 80
plt.title("F={}, k={}".format(all_simus[i]['F'], all_simus[i]['k']))
plot_df(sets[i], nmax=5)

In [None]:
df1 = sets[0]
df1.head()

In [None]:
compute_kolmogorov_dist(sets[0], sets[42]), compute_kolmogorov_dist(sets[41], sets[42])

In [None]:
start_time = time.time()
d = compute_wasserstein_distance(sets[0], sets[42])
print("Execution time: {} min".format(round((time.time() - start_time) / 60, 2)))
print("Distance:", d)

## Calcul avec des poids

In [None]:
weights = [[0., 0.,  0., 0., 0., 0., 0.99999414990605873, 0.0034016158454299185, 0.]]

# Check que la somme fait bien 1
print(np.sum(weights))

# Renormaliser
weights /= np.sum(weights)
weights = weights[0]
print(weights)
np.sum(weights)

In [None]:
compute_kolmogorov_dist(sets[0], sets[42], weights), compute_kolmogorov_dist(sets[41], sets[42], weights)

In [None]:
arr2 = np.array(sets[42]).T
arr1 = np.array(sets[0]).T

print(arr1.shape, arr2.shape)

desc1 = [compute_descriptor(s1) for s1 in arr1]
desc2 = [compute_descriptor(s2) for s2 in arr2]

print(len(desc1), len(desc2))

In [None]:
start_time = time.time()
d = compute_wasserstein_distance(sets[0], sets[42], weights)
print("Execution time: {} min".format(round((time.time() - start_time) / 60, 2)))
print("Distance:", d)

### Calculer les distances entre chaque couple de simus

In [None]:
def compute_dist_sets(sets):
    distances = []
    
    for df1 in tqdm(sets):
    #for df1 in [sets[42]]:
        for df2 in tqdm(sets):
            kolm_dist = compute_kolmogorov_dist(df1, df2)
            wasser_dist = compute_wasserstein_distance(df1, df2)
            distances.append([kolm_dist, wasser_dist])
            
    return np.array(distances)

def plot_comparaison(distances):
    plt.scatter(distances[:, 0], distances[:, 1], marker='x')
    plt.xlabel("Kolmogorov distance")
    plt.ylabel("Wasserstein distance")
    plt.title("Comparaison des distances")
    plt.show()

In [None]:
distances = compute_dist_sets(sets)

save_obj(distances, './data/distances_ressort.pkl')

In [None]:
plot_comparaison(distances)

In [None]:
distances

In [None]:
plt.scatter(distances[:20, 0], distances[:20, 1], marker='x')
plt.show()