In [None]:
import pandas as pd
import numpy as np
import os

DATANAME = 'SwedishLeaf'
eps_list = [0.005*i for i in range(1,21)]
eps_name = f" ({eps_list[0]} to {eps_list[-1]})" 
dataset_name = DATANAME
result_file = os.path.join("kfold_kNN_data", "saved_results","Results on " + dataset_name + eps_name + '.csv')
df = pd.read_csv(result_file)

In [2]:
min_records = {'eTiOT': [], 'eTAOT' : []}
eps_records = {'eTiOT': [], 'eTAOT' : []}

def extract_best_eps(indices, alg):
    min_err = np.inf
    min_eps = 0
    for i in indices:
        if float(df[alg][i]) <= min_err and df['eps'][i] != 'Final error':
            min_err = float(df[alg][i])
            min_eps = float(df['eps'][i])
    return min_eps

eps_records['eTiOT'].append(extract_best_eps(range(len(df)), 'eTiOT'))
eps_records['eTAOT'].append(extract_best_eps(range(len(df)), 'eTAOT'))
eps_records['eTiOT'].append(extract_best_eps(range(1,len(df),2), 'eTiOT'))
eps_records['eTAOT'].append(extract_best_eps(range(1,len(df),2), 'eTAOT'))


In [3]:
eps_list = [0.01*i for i in range(1,11)]
eps_name = f" ({eps_list[0]} to {eps_list[-1]})"   
result_file_test = os.path.join("kNN_data", "saved_results","Results on " + dataset_name + eps_name + '.csv')
col_map = {"eTiOT": "eTiOT", "eTAOT": r" eTAOT($\omega = \omega_{\text{grid}})$"}
df2 = pd.read_csv(result_file_test)

In [4]:
df2.columns

Index(['eps', 'eTiOT', 'eTAOT($\omega = \omega_{\text{grid}} \;/\; 5)$',
       ' eTAOT($\omega = \omega_{\text{grid}})$',
       ' eTAOT($\omega = \omega_{\text{grid}} \times 5)$'],
      dtype='object')

In [5]:
for i in range(len(df2)):
    if float(df2['eps'][i]) == eps_records['eTiOT'][1]:
        eps_records['eTiOT'].append(df2['eTiOT'][i])
    if float(df2['eps'][i]) == eps_records['eTAOT'][1]:
        eps_records['eTAOT'].append(df2[df2.columns[3]][i])

In [6]:
eps_records

{'eTiOT': [0.1, 0.1, np.float64(0.1797004991680532)],
 'eTAOT': [0.1, 0.1, np.float64(0.2346089850249584)]}

In [7]:
new_rows = []

# eps for final_error (grid=0.005)
new_rows.append({
    "eps": "eps_final_error",
    "eTiOT": eps_records["eTiOT"][0],
    "eTAOT": eps_records["eTAOT"][0]
})

# final_error for grid=0.01
new_rows.append({
    "eps": "eps_final_error_grid001",
    "eTiOT": eps_records["eTiOT"][1],
    "eTAOT": eps_records["eTAOT"][1],
})

# eps for grid=0.01
new_rows.append({
    "eps": "final_error_grid001",
    "eTiOT": eps_records["eTiOT"][2],
    "eTAOT": eps_records["eTAOT"][2],
})

# append new rows after the last row
df = pd.concat([df, pd.DataFrame(new_rows)], ignore_index=True)

# save back
df.to_csv(result_file, index=False)
print("Updated result_file with final_error + eps values.")

Updated result_file with final_error + eps values.


In [1]:
import numpy as np
import ot
import pandas as pd
from collections import defaultdict
import seaborn as sns
import matplotlib.pyplot as plt
import TiOT_lib
import os
file_path = 'DailyDelhiClimateTrain.csv'
data = pd.read_csv(file_path)
df = pd.DataFrame(data)
# Ensure 'date' is a datetime object
df['date'] = pd.to_datetime(df['date'])

# Set the date as the index if needed (optional)
df.set_index('date', inplace=True)

def process_data(dataset_name, start1, start2, numpoint ):
    filepath = os.path.join("time_series_kNN", dataset_name, dataset_name + "_TRAIN.txt" )

    with open(filepath, "r") as file:
        data = [line.strip().split() for line in file]

    # Convert to numerical values if needed
    data = [[float(value) for value in row] for row in data]

    X = [row[1:] for row in data]
    X1 = X[start1: start1 + numpoint]
    X2 = X[start2: start2 + numpoint]
    return X1, X2
dataset_name = 'SwedishLeaf' 
X1, X2 = process_data(dataset_name, start1=0, start2=10, numpoint=3)

In [24]:
print(len(X1[0]))

2000


In [2]:
import numpy as np


def gaussian_mixture_timeseries(length, n_components=3, weights=None, means=None, stds=None, random_state=None):
    """
    Generate a Gaussian mixture time series.
    
    Parameters:
        length (int): Length of the time series.
        n_components (int): Number of Gaussian components.
        weights (list or None): Mixing weights (must sum to 1). If None, uniform weights are used.
        means (list or None): Means of Gaussians. If None, random values are used.
        stds (list or None): Standard deviations of Gaussians. If None, random values are used.
        random_state (int or None): Random seed for reproducibility.

    Returns:
        ts (ndarray): Generated time series of shape (length,).
        component_ids (ndarray): The component index chosen at each time step.
    """
    rng = np.random.default_rng(random_state)

    # Default weights
    if weights is None:
        weights = np.ones(n_components) / n_components
    weights = np.array(weights) / np.sum(weights)  # normalize
    
    # Default means and stds
    if means is None:
        means = rng.uniform(-5, 5, size=n_components)
    if stds is None:
        stds = rng.uniform(0.5, 2.0, size=n_components)

    # Sample component for each time step
    component_ids = rng.choice(n_components, size=length, p=weights)
    
    # Draw samples from corresponding Gaussians
    ts = rng.normal(means[component_ids], stds[component_ids])
    
    return ts, component_ids


# Example usage
ts, comps = gaussian_mixture_timeseries(500, n_components=2, random_state=42)
ts2, comps = gaussian_mixture_timeseries(500, n_components=2, random_state=41)
print("Time series:", ts)
print("Component IDs:", comps)

X = np.concatenate((X1[0], X1[1]))
Y = np.concatenate((X2[0], X2[1]))


Time series: [ 2.43488930e+00 -1.81633595e+00  5.40526929e-02 -1.92773641e+00
  3.92955825e+00  4.67994173e+00  3.39488084e+00 -1.05377104e+00
  9.06388091e-02 -1.08844021e+00  4.41222227e+00 -5.34804738e-01
 -1.13008206e+00 -8.19806069e-01 -2.92265745e+00  1.49738639e+00
  7.72786968e-01  1.45320402e+00 -2.93416414e+00 -5.19453147e+00
 -1.45148794e+00  7.06701293e+00  3.51708872e+00  1.73910294e+00
  3.57107594e+00 -3.02453845e+00 -1.07089299e+00 -4.57418252e-01
  2.58562140e+00  4.15344028e+00  3.35575061e+00  3.93445849e+00
  1.50882183e+00  4.34476185e+00  5.65193177e+00 -2.11111725e+00
  1.15245219e+00  1.45397650e+00 -9.07043364e-01  5.24944837e+00
 -1.29539884e+00  1.63836058e+00  2.97464357e+00  3.20124699e+00
  1.80791989e+00  2.09275218e+00  1.05693123e+00  1.93757394e+00
  8.81162263e-02 -3.03196062e+00  3.74347589e-01 -1.44418410e+00
  4.79173116e+00 -4.31286664e+00  1.33326199e+00 -2.75670660e-01
 -1.88860936e+00  3.18236158e+00 -8.88309293e-01 -1.00295051e+00
 -8.57323698

In [5]:
len(X1[0])

128

In [70]:
d, w = TiOT_lib.TAOT(ts, ts2, w = 0.5)

Complete solving TiOT problem after 0.0013159000000086962 (s)


In [20]:
d, w = TiOT_lib.eTAOT(ts, ts2, eps = 0.1, freq = 1, verbose=True)


TAOT-BCD Algorithm converges after 16 iterations
Complete solving TiOT problem after 0.013160099999993236 (s)


In [32]:
d, pi, w = TiOT_lib.eTiOT(ts, ts2, eps = 0.1, freq = 1, solver='PGD', verbose=True)
w

Choose 'PGD' by default. 
TiOT-BCD Algorithm converges after 35 iterations
Complete solving eTiOT after 3.026085700000067 (s) with computation time for w = 0


0.6763775377735396

In [25]:
d, pi, w = TiOT_lib.eTiOT(ts, ts2, eps = 0.01, freq = 10, verbose=True)
w

TiOT-BCD Algorithm converges after 391 iterations
Complete solving eTiOT after 0.9709818000000041 (s) with computation time for w = 0


0.9011431847560162

In [80]:
d, w = TiOT_lib.TiOT(ts, ts2)
d

Complete solving TiOT problem after 0.10822419999999511 (s)


0.09244965866633473

In [44]:
w

0.8141189920317563

In [34]:
def f(x):
    return x*10, x*100
f(10)[-1]

1000

In [35]:
f(20)[-1]

2000