In [3]:
import pandas as pd
import numpy as np
from sklearn.metrics import silhouette_score, silhouette_samples
from collections import Counter
from scipy.stats import entropy
import warnings
warnings.filterwarnings('ignore')

from functions import join_stocks_crypto

In [4]:
df_all_stocks = pd.read_csv('stocks_data_filtered_volatility.csv',index_col='Date')
cryptos_df = pd.read_csv('cryptos_data.csv', index_col='Date')

joined_df = join_stocks_crypto(cryptos_df, df_all_stocks, mode = 'stocks_left') #mode - either do left with crypto and fill NA for stocks or do left on stocks and leave out some dates for cryptos
joined_df.index = pd.to_datetime(joined_df.index)

joined_df_weekly = joined_df.resample('W').last() #try aggregating on a weekly level
joined_df_3days = joined_df.resample('3D').last()# aggregating on a twice per week basis to arrive at the sweet spot of that 250 (1 year) timeseries length

In [5]:
def improved_clustering_tuning(df_dict, param_grid, n_trials=3, test_split=0.3):
    """
    Improved clustering hyperparameter tuning with proper validation.
    
    Parameters:
    -----------
    df_dict : dict
        Dictionary with dataframe name as key and stock prices dataframe as value
    param_grid : dict
        Grid of parameters to test, e.g.:
        {
            'n_clusters': [4, 6, 8, 10],
            'method': ['kmeans', 'ahc', 'kshape'],
            'linkage': ['average', 'complete', 'single'],  # Only for AHC
            'window': [1, 7, 14, 30],
            'aggregation': ['daily', '3day', 'weekly']
        }
    n_trials : int
        Number of random trials for stability testing
    test_split : float
        Fraction of data to hold out for validation
    
    Returns:
    --------
    pd.DataFrame
        Results with comprehensive metrics
    """
    
    results = []
    
    # Get available dataframes
    available_dfs = {
        'daily': df_dict.get('full', None),
        '3day': df_dict.get('3day', None), 
        'weekly': df_dict.get('weekly', None)
    }
    
    # Generate all parameter combinations
    param_combinations = _generate_param_combinations(param_grid)
    
    print(f"Testing {len(param_combinations)} parameter combinations...")
    
    for i, params in enumerate(param_combinations):
        if i % 10 == 0:
            print(f"Progress: {i}/{len(param_combinations)}")
            
        try:
            # Get the appropriate dataframe
            df = available_dfs.get(params['aggregation'])
            if df is None:
                continue
                
            # Run multiple trials for stability
            trial_results = []
            for trial in range(n_trials):
                trial_result = _evaluate_single_configuration(df, params, trial, test_split)
                if trial_result is not None:
                    trial_results.append(trial_result)
            
            # Aggregate trial results
            if trial_results:
                aggregated_result = _aggregate_trial_results(trial_results, params)
                results.append(aggregated_result)
                
        except Exception as e:
            print(f"Error with params {params}: {e}")
            continue
    
    return pd.DataFrame(results)

def _generate_param_combinations(param_grid):
    """Generate all combinations of parameters."""
    from itertools import product
    
    # Handle method-specific parameters
    combinations = []
    
    methods = param_grid.get('method', ['kmeans'])
    
    for method in methods:
        method_params = {
            'method': method,
            'n_clusters': param_grid.get('n_clusters', [6]),
            'window': param_grid.get('window', [1]),
            'aggregation': param_grid.get('aggregation', ['daily'])
        }
        
        # Add linkage only for AHC
        if method == 'ahc':
            method_params['linkage'] = param_grid.get('linkage', ['average'])
        else:
            method_params['linkage'] = ['not_applicable']
        
        # Generate combinations for this method
        keys = list(method_params.keys())
        values = list(method_params.values())
        
        for combination in product(*values):
            param_dict = dict(zip(keys, combination))
            combinations.append(param_dict)
    
    return combinations

def _evaluate_single_configuration(df, params, trial, test_split):
    """Evaluate a single parameter configuration."""
    
    # Apply smoothing (NO LOOK-AHEAD BIAS)
    if params['window'] > 1:
        df_smooth = df.rolling(window=params['window'], center=False).mean().dropna()
    else:
        df_smooth = df.copy()
    
    # Split data for validation
    split_idx = int(len(df_smooth) * (1 - test_split))
    df_train = df_smooth.iloc[:split_idx]
    df_test = df_smooth.iloc[split_idx:]
    
    # Ensure we have enough data
    if len(df_train) < 100 or len(df_test) < 50:
        return None
    
    try:
        # Train clustering model
        labels_train, tickers_labels_train, distance_matrix_train = _run_clustering_safe(
            df_train, params, trial_seed=trial
        )
        
        # Evaluate on training data
        train_metrics = _calculate_comprehensive_metrics(
            df_train, labels_train, tickers_labels_train, distance_matrix_train, params
        )
        
        # Evaluate stability on test data (if possible)
        try:
            labels_test, tickers_labels_test, distance_matrix_test = _run_clustering_safe(
                df_test, params, trial_seed=trial + 100
            )
            test_metrics = _calculate_comprehensive_metrics(
                df_test, labels_test, tickers_labels_test, distance_matrix_test, params
            )
            
            # Calculate stability score (how similar are train/test clusterings?)
            stability_score = _calculate_stability_score(tickers_labels_train, tickers_labels_test)
            
        except:
            test_metrics = None
            stability_score = None
        
        return {
            'trial': trial,
            'train_metrics': train_metrics,
            'test_metrics': test_metrics,
            'stability_score': stability_score,
            **params
        }
        
    except Exception as e:
        return None

def _run_clustering_safe(df, params, trial_seed):
    """Safely run clustering with error handling."""
    
    # Calculate returns
    df_returns = df.pct_change().dropna()
    data_clustering = df_returns.T.values
    tickers = list(df.columns)
    
    # Scale data
    from tslearn.preprocessing import TimeSeriesScalerMeanVariance
    scaler = TimeSeriesScalerMeanVariance()
    data_scaled = scaler.fit_transform(data_clustering)
    
    # Set random seed for reproducibility
    np.random.seed(trial_seed)
    
    method = params['method']
    n_clusters = params['n_clusters']
    
    if method == 'ahc':
        # Calculate distance matrix once
        distance_matrix = _calculate_dtw_matrix(data_scaled)
        
        from sklearn.cluster import AgglomerativeClustering
        model = AgglomerativeClustering(
            n_clusters=n_clusters, 
            metric='precomputed', 
            linkage=params['linkage']
        )
        labels = model.fit_predict(distance_matrix)
        
    elif method == 'kmeans':
        from tslearn.clustering import TimeSeriesKMeans
        model = TimeSeriesKMeans(
            n_clusters=n_clusters, 
            metric="dtw", 
            n_init=3, 
            init='random',
            random_state=trial_seed
        )
        labels = model.fit_predict(data_scaled)
        distance_matrix = None  # Will calculate if needed
        
    elif method == 'kshape':
        from tslearn.clustering import KShape
        model = KShape(
            n_clusters=n_clusters, 
            n_init=3,
            random_state=trial_seed
        )
        labels = model.fit_predict(data_scaled)
        distance_matrix = None  # Will calculate if needed
    
    # Create tickers to labels mapping
    tickers_with_labels = {ticker: int(label) for ticker, label in zip(tickers, labels)}
    
    return labels, tickers_with_labels, distance_matrix

def _calculate_dtw_matrix(data_scaled):
    """Calculate DTW distance matrix."""
    from tslearn.metrics import cdist_dtw
    return cdist_dtw(data_scaled)

def _calculate_comprehensive_metrics(df, labels, tickers_labels, distance_matrix, params):
    """Calculate comprehensive clustering metrics."""
    
    # Basic info
    n_assets = len(labels)
    n_clusters = len(set(labels))
    
    # Silhouette metrics
    if distance_matrix is not None:
        sil_score = silhouette_score(distance_matrix, labels, metric='precomputed')
        sil_samples = silhouette_samples(distance_matrix, labels, metric='precomputed')
    else:
        # Calculate distance matrix for silhouette
        df_returns = df.pct_change().dropna()
        data_scaled = df_returns.T.values
        try:
            distance_matrix = _calculate_dtw_matrix(data_scaled)
            sil_score = silhouette_score(distance_matrix, labels, metric='precomputed')
            sil_samples = silhouette_samples(distance_matrix, labels, metric='precomputed')
        except:
            sil_score = 0
            sil_samples = np.zeros(len(labels))
    
    # Balance metrics
    cluster_counts = Counter(labels)
    cluster_sizes = list(cluster_counts.values())
    
    # Advanced balance metrics
    cluster_proportions = np.array(cluster_sizes) / n_assets
    balance_entropy = entropy(cluster_proportions)
    max_entropy = np.log(n_clusters)
    normalized_entropy = balance_entropy / max_entropy if max_entropy > 0 else 0
    
    # Size metrics
    min_cluster_size = min(cluster_sizes)
    max_cluster_size = max(cluster_sizes)
    balance_ratio = min_cluster_size / max_cluster_size
    largest_cluster_pct = max_cluster_size / n_assets
    
    # Economic metrics (correlations within clusters)
    within_cluster_correlations = []
    df_returns = df.pct_change().dropna()
    
    for cluster_id in set(labels):
        cluster_assets = [ticker for ticker, label in tickers_labels.items() if label == cluster_id]
        if len(cluster_assets) >= 2:
            cluster_returns = df_returns[cluster_assets]
            corr_matrix = cluster_returns.corr()
            # Get upper triangle correlations
            mask = np.triu(np.ones_like(corr_matrix), k=1).astype(bool)
            correlations = corr_matrix.values[mask]
            within_cluster_correlations.extend(correlations)
    
    avg_within_cluster_corr = np.mean(within_cluster_correlations) if within_cluster_correlations else 0
    
    return {
        'silhouette_score': sil_score,
        'silhouette_std': np.std(sil_samples),
        'min_silhouette': np.min(sil_samples),
        'balance_entropy': normalized_entropy,
        'balance_ratio': balance_ratio,
        'largest_cluster_pct': largest_cluster_pct,
        'avg_within_cluster_corr': avg_within_cluster_corr,
        'n_clusters': n_clusters,
        'n_assets': n_assets
    }

def _calculate_stability_score(labels_1, labels_2):
    """Calculate stability between two clustering results."""
    # Use Adjusted Rand Index
    from sklearn.metrics import adjusted_rand_score
    
    # Get common assets
    common_assets = set(labels_1.keys()) & set(labels_2.keys())
    if len(common_assets) < 2:
        return 0
    
    labels_1_common = [labels_1[asset] for asset in common_assets]
    labels_2_common = [labels_2[asset] for asset in common_assets]
    
    return adjusted_rand_score(labels_1_common, labels_2_common)

def _aggregate_trial_results(trial_results, params):
    """Aggregate results across multiple trials."""
    
    # Extract training metrics
    train_metrics = [trial['train_metrics'] for trial in trial_results]
    
    # Calculate means and stds
    aggregated = {}
    for key in train_metrics[0].keys():
        values = [tm[key] for tm in train_metrics]
        aggregated[f'{key}_mean'] = np.mean(values)
        aggregated[f'{key}_std'] = np.std(values)
    
    # Stability scores
    stability_scores = [trial['stability_score'] for trial in trial_results if trial['stability_score'] is not None]
    if stability_scores:
        aggregated['stability_mean'] = np.mean(stability_scores)
        aggregated['stability_std'] = np.std(stability_scores)
    else:
        aggregated['stability_mean'] = None
        aggregated['stability_std'] = None
    
    # Add parameters
    aggregated.update(params)
    aggregated['n_trials'] = len(trial_results)
    
    return aggregated

# Usage example:

param_grid = {
    'n_clusters': [4, 6, 8, 10, 12],
    'method': ['kmeans', 'ahc', 'kshape'],
    'linkage': ['average', 'complete', 'single'],
    'window': [1, 7, 14, 30],
    'aggregation': ['daily', '3day', 'weekly']
}

df_dict = {
    'full': joined_df,
    '3day': joined_df_3days, 
    'weekly': joined_df_weekly
}

results = improved_clustering_tuning(df_dict, param_grid, n_trials=3)
results.to_csv('improved_clustering_results.csv', index=False)


Testing 1260 parameter combinations...
Progress: 0/1260
Progress: 10/1260
Progress: 20/1260
Progress: 30/1260
Progress: 40/1260
Progress: 50/1260
Progress: 60/1260
Progress: 70/1260
Progress: 80/1260
Progress: 90/1260
Progress: 100/1260
Progress: 110/1260
Progress: 120/1260
Progress: 130/1260
Progress: 140/1260
Progress: 150/1260
Progress: 160/1260
Progress: 170/1260
Progress: 180/1260
Progress: 190/1260
Progress: 200/1260
Progress: 210/1260
Progress: 220/1260
Progress: 230/1260
Progress: 240/1260
Progress: 250/1260
Progress: 260/1260
Progress: 270/1260
Progress: 280/1260
Progress: 290/1260
Progress: 300/1260
Progress: 310/1260
Progress: 320/1260
Progress: 330/1260
Progress: 340/1260
Progress: 350/1260
Progress: 360/1260
Progress: 370/1260
Progress: 380/1260
Progress: 390/1260
Progress: 400/1260
Progress: 410/1260
Progress: 420/1260
Progress: 430/1260
Progress: 440/1260
Progress: 450/1260
Progress: 460/1260
Progress: 470/1260
Progress: 480/1260
Progress: 490/1260
Progress: 500/1260
Pro

In [8]:
import pandas as pd
import numpy as np
from sklearn.metrics import silhouette_score, silhouette_samples
from collections import Counter
from scipy.stats import entropy
import warnings
warnings.filterwarnings('ignore')

def debug_clustering_tuning(df_dict, param_grid, n_trials=1, verbose=True):
    """
    Simplified clustering tuning with extensive debugging.
    
    Parameters:
    -----------
    df_dict : dict
        Dictionary with dataframe name as key and stock prices dataframe as value
    param_grid : dict
        Grid of parameters to test
    n_trials : int
        Number of trials (start with 1 for debugging)
    verbose : bool
        Print debug information
    
    Returns:
    --------
    pd.DataFrame
        Results with comprehensive metrics
    """
    
    print("=== DEBUGGING CLUSTERING TUNING ===")
    print(f"Available dataframes: {list(df_dict.keys())}")
    for name, df in df_dict.items():
        print(f"  {name}: {df.shape}")
    
    print(f"Parameter grid: {param_grid}")
    
    results = []
    
    # Simple parameter iteration - no complex combinations
    methods = param_grid.get('method', ['kmeans'])
    n_clusters_list = param_grid.get('n_clusters', [6])
    windows = param_grid.get('window', [1])
    aggregations = param_grid.get('aggregation', ['daily'])
    linkages = param_grid.get('linkage', ['average'])
    
    total_combinations = len(methods) * len(n_clusters_list) * len(windows) * len(aggregations)
    print(f"Total combinations to test: {total_combinations}")
    
    combination_count = 0
    
    for method in methods:
        for n_clusters in n_clusters_list:
            for window in windows:
                for aggregation in aggregations:
                    # Handle linkage
                    if method == 'ahc':
                        linkage_list = linkages
                    else:
                        linkage_list = ['not_applicable']
                    
                    for linkage in linkage_list:
                        combination_count += 1
                        
                        params = {
                            'method': method,
                            'n_clusters': n_clusters,
                            'window': window,
                            'aggregation': aggregation,
                            'linkage': linkage
                        }
                        
                        if verbose:
                            print(f"\n--- Combination {combination_count}/{total_combinations} ---")
                            print(f"Params: {params}")
                        
                        try:
                            # Get dataframe
                            df = _get_dataframe(df_dict, aggregation, verbose)
                            if df is None:
                                if verbose:
                                    print("  ❌ Dataframe not found")
                                continue
                            
                            # Test the combination
                            result = _test_single_combination(df, params, n_trials, verbose)
                            
                            if result is not None:
                                results.append(result)
                                if verbose:
                                    print(f"  ✅ Success! Silhouette: {result.get('silhouette_score', 'N/A'):.3f}")
                            else:
                                if verbose:
                                    print("  ❌ Failed to get result")
                                    
                        except Exception as e:
                            if verbose:
                                print(f"  ❌ Error: {str(e)}")
                            continue
    
    print(f"\n=== SUMMARY ===")
    print(f"Total combinations tested: {combination_count}")
    print(f"Successful results: {len(results)}")
    
    return pd.DataFrame(results)

def _get_dataframe(df_dict, aggregation, verbose=False):
    """Get the right dataframe based on aggregation."""
    
    # Mapping between aggregation names and df_dict keys
    mapping = {
        'daily': ['full', 'daily'],
        '3day': ['3day'],
        'weekly': ['weekly']
    }
    
    possible_keys = mapping.get(aggregation, [aggregation])
    
    for key in possible_keys:
        if key in df_dict:
            if verbose:
                print(f"  Using dataframe '{key}' for aggregation '{aggregation}'")
            return df_dict[key]
    
    if verbose:
        print(f"  No dataframe found for aggregation '{aggregation}'. Available: {list(df_dict.keys())}")
    return None

def _test_single_combination(df, params, n_trials, verbose=False):
    """Test a single parameter combination."""
    
    try:
        # Apply smoothing
        if params['window'] > 1:
            df_smooth = df.rolling(window=params['window']).mean().dropna()
            if verbose:
                print(f"  Applied {params['window']}-period smoothing: {df.shape} → {df_smooth.shape}")
        else:
            df_smooth = df.copy()
        
        # Check data size
        if len(df_smooth) < 50:
            if verbose:
                print(f"  ❌ Not enough data: {len(df_smooth)} rows")
            return None
        
        # Run clustering
        result = _run_simple_clustering(df_smooth, params, verbose)
        
        if result is not None:
            result.update(params)  # Add parameters to result
            
        return result
        
    except Exception as e:
        if verbose:
            print(f"  ❌ Exception in _test_single_combination: {str(e)}")
        return None

def _run_simple_clustering(df, params, verbose=False):
    """Run clustering and calculate metrics."""
    
    try:
        # Calculate returns
        df_returns = df.pct_change().dropna()
        
        if len(df_returns) < 20:
            if verbose:
                print("  ❌ Not enough return data")
            return None
            
        # Get data for clustering
        data_clustering = df_returns.T.values  # Assets as rows, time as columns
        tickers = list(df.columns)
        
        if verbose:
            print(f"  Data shape for clustering: {data_clustering.shape}")
        
        # Scale data
        try:
            from tslearn.preprocessing import TimeSeriesScalerMeanVariance
            scaler = TimeSeriesScalerMeanVariance()
            data_scaled = scaler.fit_transform(data_clustering)
        except:
            # Fallback to sklearn scaler
            from sklearn.preprocessing import StandardScaler
            scaler = StandardScaler()
            data_scaled = scaler.fit_transform(data_clustering)
        
        # Run clustering based on method
        labels = _run_clustering_method(data_scaled, params, verbose)
        
        if labels is None:
            return None
        
        # Calculate metrics
        metrics = _calculate_simple_metrics(data_scaled, labels, verbose)
        
        if metrics is not None:
            metrics['n_assets'] = len(tickers)
            metrics['n_clusters_actual'] = len(set(labels))
            
        return metrics
        
    except Exception as e:
        if verbose:
            print(f"  ❌ Exception in _run_simple_clustering: {str(e)}")
        return None

def _run_clustering_method(data_scaled, params, verbose=False):
    """Run the actual clustering algorithm."""
    
    method = params['method']
    n_clusters = params['n_clusters']
    
    try:
        if method == 'kmeans':
            try:
                from tslearn.clustering import TimeSeriesKMeans
                model = TimeSeriesKMeans(
                    n_clusters=n_clusters, 
                    metric="dtw", 
                    n_init=1,  # Reduce for debugging
                    init='random',
                    random_state=42
                )
                labels = model.fit_predict(data_scaled)
            except Exception as e:
                if verbose:
                    print(f"  TSLearn KMeans failed: {e}, trying sklearn...")
                # Fallback to regular KMeans
                from sklearn.cluster import KMeans
                model = KMeans(n_clusters=n_clusters, random_state=42, n_init=1)
                labels = model.fit_predict(data_scaled.reshape(data_scaled.shape[0], -1))
                
        elif method == 'ahc':
            from sklearn.cluster import AgglomerativeClustering
            
            # Use euclidean distance for now (DTW too slow for debugging)
            model = AgglomerativeClustering(
                n_clusters=n_clusters, 
                linkage=params['linkage']
            )
            data_flat = data_scaled.reshape(data_scaled.shape[0], -1)
            labels = model.fit_predict(data_flat)
            
        elif method == 'kshape':
            try:
                from tslearn.clustering import KShape
                model = KShape(n_clusters=n_clusters, n_init=1, random_state=42)
                labels = model.fit_predict(data_scaled)
            except Exception as e:
                if verbose:
                    print(f"  KShape failed: {e}, trying sklearn KMeans...")
                from sklearn.cluster import KMeans
                model = KMeans(n_clusters=n_clusters, random_state=42, n_init=1)
                labels = model.fit_predict(data_scaled.reshape(data_scaled.shape[0], -1))
        
        else:
            if verbose:
                print(f"  ❌ Unknown method: {method}")
            return None
        
        if verbose:
            print(f"  Clustering successful. Labels: {Counter(labels)}")
            
        return labels
        
    except Exception as e:
        if verbose:
            print(f"  ❌ Clustering failed: {str(e)}")
        return None

def _calculate_simple_metrics(data_scaled, labels, verbose=False):
    """Calculate basic metrics."""
    
    try:
        # Flatten data for distance calculation
        data_flat = data_scaled.reshape(data_scaled.shape[0], -1)
        
        # Silhouette score
        if len(set(labels)) > 1 and len(labels) > len(set(labels)):
            sil_score = silhouette_score(data_flat, labels)
        else:
            sil_score = -1  # Invalid clustering
        
        # Balance metrics
        cluster_counts = Counter(labels)
        cluster_sizes = list(cluster_counts.values())
        
        min_size = min(cluster_sizes)
        max_size = max(cluster_sizes)
        balance_ratio = min_size / max_size if max_size > 0 else 0
        
        # Entropy
        n_assets = len(labels)
        cluster_proportions = np.array(cluster_sizes) / n_assets
        balance_entropy = entropy(cluster_proportions)
        max_entropy = np.log(len(cluster_sizes))
        normalized_entropy = balance_entropy / max_entropy if max_entropy > 0 else 0
        
        metrics = {
            'silhouette_score': sil_score,
            'balance_ratio': balance_ratio,
            'balance_entropy': normalized_entropy,
            'largest_cluster_pct': max_size / n_assets,
            'cluster_counts': dict(cluster_counts)
        }
        
        if verbose:
            print(f"  Silhouette: {sil_score:.3f}, Balance ratio: {balance_ratio:.3f}")
            
        return metrics
        
    except Exception as e:
        if verbose:
            print(f"  ❌ Metrics calculation failed: {str(e)}")
        return None

# Simple test function
def test_clustering_simple(df_dict):
    """Simple test with minimal parameters."""
    
    param_grid = {
        'method': ['kmeans'],
        'n_clusters': [4, 6],
        'window': [1, 7],
        'aggregation': ['daily']
    }
    
    return debug_clustering_tuning(df_dict, param_grid, n_trials=1, verbose=True)

# Usage example:

# Test with your data
df_dict = {
    'full': joined_df,  # Make sure this matches your variable name
    '3day': joined_df_3days, 
    'weekly': joined_df_weekly
}

# Simple test first
results = test_clustering_simple(df_dict)
print(results)

# If that works, try the full grid
param_grid = {
    'method': ['kmeans', 'ahc'],
    'n_clusters': [4, 6, 8, 10],
    'window': [1, 7, 14],
    'aggregation': ['daily', '3day'],
    'linkage': ['average', 'complete']
}

full_results = debug_clustering_tuning(df_dict, param_grid, n_trials=1, verbose=True)


=== DEBUGGING CLUSTERING TUNING ===
Available dataframes: ['full', '3day', 'weekly']
  full: (520, 213)
  3day: (242, 213)
  weekly: (104, 213)
Parameter grid: {'method': ['kmeans'], 'n_clusters': [4, 6], 'window': [1, 7], 'aggregation': ['daily']}
Total combinations to test: 4

--- Combination 1/4 ---
Params: {'method': 'kmeans', 'n_clusters': 4, 'window': 1, 'aggregation': 'daily', 'linkage': 'not_applicable'}
  Using dataframe 'full' for aggregation 'daily'
  Data shape for clustering: (213, 519)
  Clustering successful. Labels: Counter({np.int64(2): 92, np.int64(1): 59, np.int64(0): 50, np.int64(3): 12})
  Silhouette: 0.037, Balance ratio: 0.130
  ✅ Success! Silhouette: 0.037

--- Combination 2/4 ---
Params: {'method': 'kmeans', 'n_clusters': 4, 'window': 7, 'aggregation': 'daily', 'linkage': 'not_applicable'}
  Using dataframe 'full' for aggregation 'daily'
  Applied 7-period smoothing: (520, 213) → (514, 213)
  Data shape for clustering: (213, 513)
  Clustering successful. Labels