In [None]:
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score
import seaborn as sns
import matplotlib.pyplot as plt


def crypto_analysis(file_path, n_components=10, n_clusters=4, selected_cryptos=None):
    """
    Perform complete cryptocurrency analysis including PCA, clustering, and correlation analysis.

    

    Returns:
    - dict: Contains clustering, correlation results, and visualizations.
    """
    results = {}

    #  Preprocessing
    raw_data = pd.read_csv(file_path, index_col=0)
    # Ensure the format matches "BTC-USD", "ETH-USD"
    raw_data.index = raw_data.index.str.upper()  
    raw_data.index = [f"{crypto.strip()}-USD" for crypto in raw_data.index]
    raw_data = raw_data.interpolate(method='linear')  
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(raw_data)

    #  PCA and Clustering
    pca = PCA(n_components=n_components)
    pca_result = pca.fit_transform(scaled_data)
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    clusters = kmeans.fit_predict(pca_result)
    silhouette_avg = silhouette_score(pca_result, clusters)

    # Create PCA DataFrame
    pca_columns = [f'PC{i+1}' for i in range(n_components)]
    pca_df = pd.DataFrame(pca_result, columns=pca_columns)
    pca_df['Cluster'] = clusters
    pca_df['Cryptocurrency'] = raw_data.index

    # Create a cluster table
    cluster_table = pca_df[['PC1', 'PC2', 'Cluster', 'Cryptocurrency']]

    # Select one random cryptocurrency from each cluster
    random_crypto_table = []
    for cluster, group in cluster_table.groupby('Cluster'):
        if cluster == 0:  
            ltc_crypto = group[group['Cryptocurrency'] == 'LTC-USD']
            if not ltc_crypto.empty:
                selected_crypto = ltc_crypto
            else:
                
                selected_crypto = group.sample(1)
        else:
            selected_crypto = group.sample(1)

        random_crypto_table.append({
            "Cluster": cluster,
            "Cryptocurrency": selected_crypto['Cryptocurrency'].values[0],
            "PC1": selected_crypto['PC1'].values[0],
            "PC2": selected_crypto['PC2'].values[0]
        })
    random_crypto_table = pd.DataFrame(random_crypto_table)

    # Correlation Analysis
    correlation_matrix = raw_data.transpose().corr()

    # Correlation Analysis for Random Cryptocurrencies
    selected_random_cryptos = random_crypto_table['Cryptocurrency'].tolist()
    random_crypto_correlations = {}
    for crypto in selected_random_cryptos:
        correlations = correlation_matrix[crypto].drop(crypto, errors='ignore')  # Exclude self-correlation
        top_positives = correlations.nlargest(4)  # Top 4 positive correlations
        top_negatives = correlations.nsmallest(4)  # Top 4 negative correlations
        random_crypto_correlations[crypto] = {
            'Top Positive': top_positives,
            'Top Negative': top_negatives
        }

    # Correlation Analysis for User-Selected Cryptocurrencies
    if selected_cryptos:
        selected_correlation_matrix = raw_data.loc[selected_cryptos].transpose().corr()
    else:
        selected_correlation_matrix = None

    # Global Correlation Heatmap
    fig_global, ax_global = plt.subplots(figsize=(12, 10))  
    sns.heatmap(
        correlation_matrix,
        annot=True, 
        fmt=".2f", 
        cmap='coolwarm',
        linewidths=0.5,  
        ax=ax_global,
        annot_kws={"size": 8} 
    )
    ax_global.set_title("Global Cryptocurrency Correlation Heatmap", fontsize=16)
    plt.xticks(rotation=45, ha='right', fontsize=10)  
    plt.yticks(rotation=0, fontsize=10)  
    plt.tight_layout()  
    plt.show()

   

    # Save results
    results['cluster_table'] = cluster_table
    results['random_crypto_table'] = random_crypto_table
    results['silhouette_score'] = silhouette_avg
    results['correlation_matrix'] = correlation_matrix
    results['random_crypto_correlations'] = random_crypto_correlations
    results['global_correlation_fig'] = fig_global
    

    return results