In [1]:
import pickle
from ts_cluster import *
from ts_preprocess import *
import os
import random
import time
from tqdm import tqdm
from sklearn.mixture import GaussianMixture

UPPER_DIR = "data/LENS-2023-11-CSV/LENS-2023-11-CSV/inside-out/active"
CITIES = ["chicago", "frankfurt", "ottawa", "seychelles", "vancouver", "victoria"]
FC_FILE = "data/potential_features.json"
fc_settings = get_fc_settings(path=FC_FILE)

In [2]:
# Learn initital Clustering and Features
df = []
timer = time.time()
for i, city in enumerate(CITIES):
    folder = random.choice(os.listdir(f"{UPPER_DIR}/{city}"))
    df_raw, start_time = load_ts(f"{UPPER_DIR}/{city}", folder)
    df_raw["id"] += 1000 * i
    df.append(df_raw)

print(f"Finished Loading: {(time.time()-timer):.2f} seconds")
timer = time.time()
df = pd.concat(df, ignore_index=True)
print(f"Finished concat: {(time.time()-timer):.2f} seconds")
timer = time.time()
filtered_features, top_feature_names = get_features(df_raw, fc_settings, top_feature_names=None)
print(f"Finished get_features: {(time.time()-timer):.2f} seconds")
timer = time.time()
df_clustered, cluster_map, gmm = cluster_features(filtered_features, df, gmm=None)
print(f"Finished clustering: {(time.time()-timer):.2f} seconds")

Finished Loading: 39.94 seconds
Finished concat: 0.24 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 32.25it/s]


Finished get_features: 104.44 seconds
Lowest score:  0.22998912648249992
Highest score:  0.35389965079947816
Finished clustering: 18.49 seconds


In [3]:
# top_feature_names = ['value__mean', 'value__quantile__q_0.125', 'value__truncated_mean__q_0.05', 'value__quantile__q_0.05', 'value__autocorrelation__lag_5', 'value__autocorrelation__lag_1', 'value__autocorrelation__lag_3']
# filename = "gmm_model.pkl"
# with open(filename, 'rb') as file:
#     gmm = pickle.load(file)

In [4]:
timer = time.time()
for city in CITIES:
    for folder in os.listdir(f"{UPPER_DIR}/{city}"):
        df_raw, start_time = load_ts(f"{UPPER_DIR}/{city}", folder)
        filtered_features, top_feature_names = get_features(df_raw, fc_settings, 
                                                            top_feature_names=top_feature_names)
        df_clustered, cluster_map, gmm = cluster_features(filtered_features, df_raw, gmm=gmm)
        save_clusters(f"{UPPER_DIR}/{city}/{folder}", df_clustered, cluster_map)
        df_raw, clusters = load_clusters(f"{UPPER_DIR}/{city}/{folder}/cluster_results.csv", df_raw)
        df_long, df_features = process_sequence(df_raw, start_time, clusters)
        save_sequence(df_long, f"{UPPER_DIR}/{city}/{folder}")
        print(f"Completed {city} {folder}: {(time.time()-timer):.2f} seconds")

Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 33.17it/s]


Completed chicago 2023-11-23: 53.05 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 34.15it/s]


Completed chicago 2023-11-30: 105.95 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 31.01it/s]


Completed chicago 2023-11-19: 163.22 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 37.71it/s]


Completed chicago 2023-11-20: 215.91 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 32.68it/s]


Completed chicago 2023-11-26: 272.77 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 33.27it/s]


Completed chicago 2023-11-18: 330.21 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 33.36it/s]


Completed chicago 2023-11-22: 384.33 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 36.65it/s]


Completed chicago 2023-11-17: 436.02 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 32.42it/s]


Completed chicago 2023-11-27: 493.65 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 31.41it/s]


Completed chicago 2023-11-24: 549.61 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 32.81it/s]


Completed chicago 2023-11-29: 605.92 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 34.31it/s]


Completed chicago 2023-11-25: 660.63 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 32.53it/s]


Completed chicago 2023-11-28: 717.94 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 32.15it/s]


Completed chicago 2023-11-21: 775.02 seconds


Feature Extraction: 100%|██████████| 80/80 [00:01<00:00, 44.14it/s]


Completed frankfurt 2023-11-23: 816.55 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 32.24it/s]


Completed frankfurt 2023-11-30: 872.65 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 33.05it/s]


Completed frankfurt 2023-11-19: 928.38 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 31.32it/s]


Completed frankfurt 2023-11-20: 984.93 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 30.61it/s]


Completed frankfurt 2023-11-26: 1038.20 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 33.74it/s]


Completed frankfurt 2023-11-18: 1093.09 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 36.35it/s]


Completed frankfurt 2023-11-22: 1142.13 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 38.43it/s]


Completed frankfurt 2023-11-17: 1193.86 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 33.01it/s]


Completed frankfurt 2023-11-27: 1246.98 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 32.01it/s]


Completed frankfurt 2023-11-24: 1304.67 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 36.48it/s]


Completed frankfurt 2023-11-29: 1356.29 seconds


Feature Extraction: 100%|██████████| 80/80 [00:01<00:00, 40.12it/s]


Completed frankfurt 2023-11-25: 1402.49 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 35.51it/s]


Completed frankfurt 2023-11-28: 1455.42 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 32.87it/s]


Completed frankfurt 2023-11-21: 1508.91 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 35.79it/s]


Completed ottawa 2023-11-23: 1558.34 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 32.11it/s]


Completed ottawa 2023-11-30: 1612.65 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 35.54it/s]


Completed ottawa 2023-11-19: 1662.79 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 35.15it/s]


Completed ottawa 2023-11-20: 1714.19 seconds


Feature Extraction: 100%|██████████| 79/79 [00:01<00:00, 48.66it/s]


Completed ottawa 2023-11-16: 1749.88 seconds


Feature Extraction: 100%|██████████| 80/80 [00:01<00:00, 50.18it/s]


Completed ottawa 2023-11-26: 1786.84 seconds


Feature Extraction: 100%|██████████| 80/80 [00:01<00:00, 73.97it/s]


Completed ottawa 2023-11-18: 1811.29 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 31.84it/s]


Completed ottawa 2023-11-22: 1866.38 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 32.46it/s]


Completed ottawa 2023-11-17: 1920.08 seconds


Feature Extraction: 100%|██████████| 79/79 [00:02<00:00, 34.13it/s]


Completed ottawa 2023-11-27: 1973.69 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 33.31it/s]


Completed ottawa 2023-11-24: 2029.14 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 31.62it/s]


Completed ottawa 2023-11-29: 2083.62 seconds


Feature Extraction: 100%|██████████| 79/79 [00:02<00:00, 32.87it/s]


Completed ottawa 2023-11-25: 2137.19 seconds


Feature Extraction: 100%|██████████| 79/79 [00:01<00:00, 52.34it/s]


Completed ottawa 2023-11-28: 2174.81 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 38.52it/s]


Completed ottawa 2023-11-21: 2225.94 seconds


Feature Extraction: 100%|██████████| 79/79 [00:01<00:00, 41.40it/s]


Completed seychelles 2023-11-23: 2269.22 seconds


Feature Extraction: 100%|██████████| 80/80 [00:01<00:00, 49.48it/s]


Completed seychelles 2023-11-30: 2306.01 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 36.48it/s]


Completed seychelles 2023-11-19: 2356.39 seconds


Feature Extraction: 100%|██████████| 79/79 [00:02<00:00, 38.10it/s]


Completed seychelles 2023-11-20: 2406.22 seconds


Feature Extraction: 100%|██████████| 78/78 [00:01<00:00, 67.85it/s]


Completed seychelles 2023-11-16: 2432.93 seconds


Feature Extraction: 100%|██████████| 80/80 [00:01<00:00, 48.18it/s]


Completed seychelles 2023-11-26: 2470.07 seconds


Feature Extraction: 100%|██████████| 79/79 [00:01<00:00, 39.89it/s]


Completed seychelles 2023-11-18: 2513.99 seconds


Feature Extraction: 100%|██████████| 80/80 [00:01<00:00, 70.71it/s]


Completed seychelles 2023-11-22: 2543.45 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 34.19it/s]


Completed seychelles 2023-11-17: 2594.85 seconds


Feature Extraction: 100%|██████████| 80/80 [00:01<00:00, 45.81it/s]


Completed seychelles 2023-11-27: 2639.27 seconds


Feature Extraction: 100%|██████████| 79/79 [00:01<00:00, 42.84it/s]


Completed seychelles 2023-11-24: 2679.58 seconds


Feature Extraction: 100%|██████████| 79/79 [00:01<00:00, 60.65it/s]


Completed seychelles 2023-11-29: 2711.81 seconds


Feature Extraction: 100%|██████████| 80/80 [00:01<00:00, 40.33it/s]


Completed seychelles 2023-11-25: 2756.91 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 38.00it/s]


Completed seychelles 2023-11-28: 2802.39 seconds


Feature Extraction: 100%|██████████| 79/79 [00:01<00:00, 44.27it/s]


Completed seychelles 2023-11-21: 2846.27 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 30.96it/s]


Completed vancouver 2023-11-23: 2902.80 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 30.55it/s]


Completed vancouver 2023-11-30: 2959.71 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 35.69it/s]


Completed vancouver 2023-11-19: 3012.90 seconds


Feature Extraction: 100%|██████████| 79/79 [00:02<00:00, 34.20it/s]


Completed vancouver 2023-11-20: 3067.10 seconds


Feature Extraction: 100%|██████████| 79/79 [00:01<00:00, 55.61it/s]


Completed vancouver 2023-11-16: 3100.57 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 33.31it/s]


Completed vancouver 2023-11-26: 3154.93 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 35.28it/s]


Completed vancouver 2023-11-18: 3208.90 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 28.94it/s]


Completed vancouver 2023-11-22: 3264.04 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 34.62it/s]


Completed vancouver 2023-11-17: 3318.09 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 33.82it/s]


Completed vancouver 2023-11-27: 3370.28 seconds


Feature Extraction: 100%|██████████| 79/79 [00:02<00:00, 30.94it/s]


Completed vancouver 2023-11-24: 3427.38 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 30.92it/s]


Completed vancouver 2023-11-29: 3484.21 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 30.83it/s]


Completed vancouver 2023-11-25: 3539.88 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 33.61it/s]


Completed vancouver 2023-11-28: 3594.46 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 33.22it/s]


Completed vancouver 2023-11-21: 3650.57 seconds


Feature Extraction: 100%|██████████| 80/80 [00:00<00:00, 172.59it/s]


Completed victoria 2023-11-23: 3660.24 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 35.25it/s]


Completed victoria 2023-11-30: 3712.31 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 33.33it/s]


Completed victoria 2023-11-19: 3769.06 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 35.90it/s]


Completed victoria 2023-11-20: 3823.34 seconds


Feature Extraction: 100%|██████████| 79/79 [00:01<00:00, 50.77it/s]


Completed victoria 2023-11-16: 3858.55 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 36.19it/s]


Completed victoria 2023-11-26: 3908.79 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 32.45it/s]


Completed victoria 2023-11-18: 3965.35 seconds


Feature Extraction: 100%|██████████| 79/79 [00:01<00:00, 54.22it/s]


Completed victoria 2023-11-22: 4001.93 seconds


Feature Extraction: 100%|██████████| 80/80 [00:01<00:00, 40.32it/s]


Completed victoria 2023-11-17: 4050.23 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 34.66it/s]


Completed victoria 2023-11-27: 4102.49 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 35.27it/s]


Completed victoria 2023-11-24: 4158.00 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 37.21it/s]


Completed victoria 2023-11-29: 4212.24 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 31.98it/s]


Completed victoria 2023-11-25: 4268.59 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 38.07it/s]


Completed victoria 2023-11-28: 4317.81 seconds


Feature Extraction: 100%|██████████| 80/80 [00:02<00:00, 32.85it/s]


Completed victoria 2023-11-21: 4371.95 seconds


In [5]:
print(top_feature_names)

['value__quantile__q_0.05', 'value__mean', 'value__truncated_mean__q_0.05', 'value__quantile__q_0.125', 'value__autocorrelation__lag_2', 'value__autocorrelation__lag_5', 'value__autocorrelation__lag_3']


In [6]:
filename = "gmm_model.pkl"
with open(filename, 'wb') as file:
    pickle.dump(gmm, file)
print(f"Model saved to {filename}")

with open(filename, 'rb') as file:
    gmm = pickle.load(file)
print(f"Model loaded from {filename}")

Model saved to gmm_model.pkl
Model loaded from gmm_model.pkl


In [7]:
start_timer = time.time()
df = pd.read_csv("/extra/datalab_scratch0/ctadler/time_series_models/timeseries_clustering/data/LENS-2023-11-CSV/LENS-2023-11-CSV/inside-out/active/frankfurt/2023-11-17/clustering_data.csv", index_col=None)
print(time.time()-start_timer)
display(df)

6.039671421051025


Unnamed: 0,id,timestamp,rtt_token,rtt_nearest
0,0,2023-11-17 01:00:12.000,47.894040,47.894040
1,0,2023-11-17 01:00:12.010,-1.000000,47.894040
2,0,2023-11-17 01:00:12.020,-1.000000,47.894040
3,0,2023-11-17 01:00:12.030,-1.000000,31.412807
4,0,2023-11-17 01:00:12.040,31.412807,31.412807
...,...,...,...,...
8230495,5518,2023-11-17 23:59:56.950,80.803849,80.803849
8230496,5518,2023-11-17 23:59:56.960,71.121652,71.121652
8230497,5518,2023-11-17 23:59:56.970,61.879582,61.879582
8230498,5518,2023-11-17 23:59:56.980,102.331549,102.331549
