In [11]:
# =============================================================================
# Cell 1: Configuration & Setup
# =============================================================================
# This cell contains all user settings and imports all necessary libraries.
# Modify the file paths and parameters below to match your analysis needs.
# =============================================================================

# --- 1. General Configuration ---

# 1a. File Paths
INPUT_FILE = "/PATH/TO"
OUTPUT_PATH_BASE = "/users/aranpurdy/desktop/TEST/TESTPCA_Pathway_Specific"
PATHWAY_FILE = "/users/aranpurdy/desktop/cfps/pathway_enrichment/MAN_Pathways.xlsx"

# 1b. Metabolites to Exclude
METABOLITES_TO_EXCLUDE = ['9-Methylanthracene']

# 1c. Pretreatment Method
# Options: 'pareto', 'auto', 'log', 'log+pareto', 'log+auto'
PRETREATMENT_METHOD = 'pareto'


# --- 2. Analysis Scope ---
# Specify pathways to run the PCA on. A separate analysis and output file
# will be generated for EACH pathway in this list.
# If this list is empty, a single PCA will be run on ALL metabolites.
PATHWAYS_FOR_PCA = [
    "Amino Acid Metabolism",
    "TCA cycle"
]


# --- 3. Sample & Timepoint Configuration ---

# 3a. Sample Naming Patterns
SAMPLE_NAMING_PATTERNS = [
    (r'TM2A(?P<timepoint>\d+)_', '+ GFP'),
    (r'TM2An(?P<timepoint>\d+)_', '- GFP')
]

# 3b. Timepoint Mapping
TIMEPOINT_MAP = { '1': '0h', '2': '0.5h', '3': '2h', '4': '5h', '5': '10h' }

# 3c. Timepoint Plotting Order
TIMEPOINT_PLOT_ORDER = ['0h', '0.5h', '2h', '5h', '10h']


# --- 4. Pathway File Configuration ---
# This is used to read the pathway file for filtering metabolites.
PATHWAY_NAME_COLUMN = 0
METABOLITES_COLUMN = 2
METABOLITE_DELIMITER = ';'


# --- 5. Imports and Environment Setup ---
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
import re
from matplotlib.patches import Ellipse
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.transforms as mtransforms
import warnings

warnings.filterwarnings('ignore')
plt.style.use('default')
sns.set_palette("husl")

print("--- Configuration and Setup Complete ---")

--- Configuration and Setup Complete ---


In [12]:
# =============================================================================
# Cell 2: Analysis Pipeline
# =============================================================================
# This cell contains all functions and the execution logic for the PCA.
# Do not modify this cell. Run it after setting your parameters in Cell 1.
# =============================================================================

# --- A. Data Loading and Pretreatment Functions ---

def load_data(filepath, exclude_metabolites=None):
    """Load Excel data with metabolites as rows and samples as columns"""
    print(f"Loading data from: {filepath}")
    try:
        df = pd.read_excel(filepath, index_col=0)
        df.index = df.index.str.lower()
    except FileNotFoundError:
        print(f"ERROR: File not found at {filepath}. Please check the path in the configuration cell.")
        return None
    print(f"Initial data shape: {df.shape}")
    if exclude_metabolites:
        exclude_metabolites_lower = [m.lower() for m in exclude_metabolites]
        initial_count = df.shape[0]
        df = df.drop(index=exclude_metabolites_lower, errors='ignore')
        excluded_count = initial_count - df.shape[0]
        print(f"Excluded {excluded_count} specified metabolite(s).")
    print(f"Final data shape: {df.shape}")
    return df

def get_metabolites_from_pathways(pathway_file, pathways_to_include, name_col, met_col, delimiter):
    """Reads the pathway file and returns a set of all metabolites from the specified pathways."""
    if not pathway_file or not pathways_to_include:
        return set()
    print(f"Extracting metabolites for: {', '.join(pathways_to_include)}")
    try:
        df = pd.read_excel(pathway_file, header=None)
        all_metabolites = set()
        pathways_to_include_lower = {p.lower() for p in pathways_to_include}
        
        for _, row in df.iterrows():
            pathway_name = str(row[name_col]).strip().lower()
            if pathway_name in pathways_to_include_lower:
                metabolites_str = str(row[met_col])
                metabolites = {m.strip().lower() for m in metabolites_str.split(delimiter) if m.strip()}
                all_metabolites.update(metabolites)
        
        print(f"Found {len(all_metabolites)} unique metabolites in the specified pathway(s).")
        return all_metabolites
    except Exception as e:
        print(f"✗ ERROR: Could not read or parse the pathway file. Details: {e}")
        return set()

def log_transform(data):
    """Apply log1p transformation: log(1 + x)"""
    print("Applying log transformation...")
    return np.log1p(data).fillna(0)

def pareto_scaling(data):
    """Apply Pareto scaling: (x - mean) / sqrt(std)"""
    print("Applying Pareto scaling...")
    mean_values = data.mean(axis=1)
    std_values = data.std(axis=1, ddof=1)
    std_values[std_values == 0] = 1
    scaled_data = data.sub(mean_values, axis=0).div(np.sqrt(std_values), axis=0)
    return scaled_data.replace([np.inf, -np.inf], 0).fillna(0)

def auto_scaling(data):
    """Apply Auto scaling (Z-score): (x - mean) / std"""
    print("Applying Auto scaling...")
    mean_values = data.mean(axis=1)
    std_values = data.std(axis=1, ddof=1)
    std_values[std_values == 0] = 1
    scaled_data = data.sub(mean_values, axis=0).div(std_values, axis=0)
    return scaled_data.replace([np.inf, -np.inf], 0).fillna(0)

def apply_pretreatment(data, method):
    """Dispatcher function to apply the chosen pretreatment method."""
    print(f"\n--- Starting Data Pretreatment: {method} ---")
    if data.empty:
        print("Warning: Data is empty, skipping pretreatment.")
        return data
    if method.lower() == 'log': return log_transform(data)
    elif method.lower() == 'pareto': return pareto_scaling(data)
    elif method.lower() == 'auto': return auto_scaling(data)
    elif method.lower() == 'log+pareto': return pareto_scaling(log_transform(data))
    elif method.lower() == 'log+auto': return auto_scaling(log_transform(data))
    else: return data

# --- B. Core Analysis and Plotting Functions ---

def get_groups(sample_name, patterns, timepoint_map):
    """Parse sample names using a list of regex patterns."""
    for pattern, condition in patterns:
        match = re.search(pattern, sample_name)
        if match:
            try:
                timepoint_id = match.group('timepoint')
                timepoint_name = timepoint_map.get(timepoint_id, f"ID:{timepoint_id}")
                return timepoint_name, condition
            except IndexError: return "Unknown Timepoint", condition
    return "Unknown", "Unknown"


def perform_pca(scaled_data, n_components=20):
    """Perform PCA on scaled data."""
    if scaled_data.empty or scaled_data.shape[0] < 2:
        print("Not enough data to perform PCA. A minimum of 2 metabolites is required."); return None, None, None
    if scaled_data.shape[1] < 2:
        print("Not enough data to perform PCA. A minimum of 2 samples is required."); return None, None, None
    print("\nPerforming PCA..."); data_for_pca = scaled_data.T
    n_components = min(n_components, data_for_pca.shape[0], data_for_pca.shape[1])
    pca = PCA(n_components=n_components); scores = pca.fit_transform(data_for_pca)
    pc_labels = [f'PC{i+1}' for i in range(n_components)]
    scores_df = pd.DataFrame(scores, columns=pc_labels, index=data_for_pca.index)
    loadings = pca.components_.T
    loadings_df = pd.DataFrame(loadings, columns=pc_labels, index=scaled_data.index)
    print(f"Explained variance ratio (Top 5): {pca.explained_variance_ratio_[:5]}")
    return pca, scores_df, loadings_df

def add_confidence_ellipse(ax, x, y, n_std=2.0, facecolor='none', **kwargs):
    """Add a confidence ellipse to a scatter plot."""
    if len(x) < 3: return
    cov = np.cov(x, y); pearson = cov[0, 1] / np.sqrt(cov[0, 0] * cov[1, 1])
    ell_radius_x, ell_radius_y = np.sqrt(1 + pearson), np.sqrt(1 - pearson)
    ellipse = Ellipse((0, 0), width=ell_radius_x * 2, height=ell_radius_y * 2, facecolor=facecolor, **kwargs)
    scale_x, mean_x = np.sqrt(cov[0, 0]) * n_std, np.mean(x)
    scale_y, mean_y = np.sqrt(cov[1, 1]) * n_std, np.mean(y)
    transf = mtransforms.Affine2D().rotate_deg(45).scale(scale_x, scale_y).translate(mean_x, mean_y)
    ellipse.set_transform(transf + ax.transData); ax.add_patch(ellipse)

def timepoint_specific_pca(data, metadata, timepoint, config):
    """Perform a complete PCA analysis for a single timepoint."""
    print(f"\nAnalyzing timepoint: {timepoint}")
    mask = metadata['timepoint'] == timepoint; timepoint_data = data.loc[:, mask]
    if timepoint_data.shape[1] < 3:
        print(f"Skipping {timepoint}: not enough samples."); return None, None, None
    scaled_data = apply_pretreatment(timepoint_data, config['pretreatment_method'])
    pca, scores_df, loadings_df = perform_pca(scaled_data, n_components=20)
    return pca, scores_df, loadings_df

# <<< FIX: Renamed function to reflect its role for a single analysis run >>>
def run_single_pca_analysis(data_for_analysis, config, analysis_scope_title):
    """Create a multi-page PDF report for a given dataset (all metabolites or a single pathway)."""
    metadata = pd.DataFrame([get_groups(s, config['patterns'], config['tp_map']) for s in data_for_analysis.columns],
                            columns=['timepoint', 'condition'], index=data_for_analysis.columns)
    timepoint_order = config['tp_order']
    color_dict = dict(zip(timepoint_order, plt.cm.plasma_r(np.linspace(0.1, 0.9, len(timepoint_order)))))
    
    scaled_data_overall = apply_pretreatment(data_for_analysis, config['pretreatment_method'])
    pca, scores_df, loadings_df = perform_pca(scaled_data_overall)

    if pca is None:
        print("✗ Halting report generation as PCA could not be performed on the overall dataset.")
        return

    pdf_filename = f"{config['output_path']}_PCA.pdf"
    title_pretreatment = config['pretreatment_method'].replace("+", " + ").title()

    with PdfPages(pdf_filename) as pdf:
        print("\nGenerating plots...")
        # --- Overall Page 1: Score Plot & Scree Plot ---
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 10), constrained_layout=True)
        fig.suptitle(f'Overall PCA Analysis: {title_pretreatment}\n({analysis_scope_title})', fontsize=20, fontweight='bold')
        for tp in timepoint_order:
            for cond, marker in [('+ GFP', 'o'), ('- GFP', 's')]:
                mask = (metadata['timepoint'] == tp) & (metadata['condition'] == cond)
                if mask.any(): ax1.scatter(scores_df.loc[mask, 'PC1'], scores_df.loc[mask, 'PC2'], color=color_dict.get(tp, 'gray'), marker=marker, s=150, alpha=0.8, edgecolors='black', label=f'{tp} ({cond})')
        for tp in timepoint_order:
            if (metadata['timepoint'] == tp).sum() > 2: add_confidence_ellipse(ax1, scores_df.loc[metadata['timepoint'] == tp, 'PC1'], scores_df.loc[metadata['timepoint'] == tp, 'PC2'], edgecolor=color_dict.get(tp, 'gray'), linewidth=2)
        ax1.set_xlabel(f'PC1 ({pca.explained_variance_ratio_[0]:.1%})'); ax1.set_ylabel(f'PC2 ({pca.explained_variance_ratio_[1]:.1%})'); ax1.set_title('Score Plot: PC1 vs PC2', fontsize=16, fontweight='bold'); ax1.grid(True, alpha=0.3); ax1.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
        
        pc_nums = np.arange(1, min(11, len(pca.explained_variance_ratio_) + 1)); ax2.bar(pc_nums, pca.explained_variance_ratio_[:10], color='steelblue'); ax2_twin = ax2.twinx(); ax2_twin.plot(pc_nums, np.cumsum(pca.explained_variance_ratio_[:10]), 'r-o'); ax2_twin.set_ylabel('Cumulative Variance Ratio'); ax2.set_ylabel('Explained Variance Ratio'); ax2.set_xlabel('Principal Component'); ax2.set_title('Scree Plot', fontsize=16, fontweight='bold'); ax2.set_xticks(pc_nums)
        pdf.savefig(fig, bbox_inches='tight'); plt.close(fig)

        # --- Overall Page 2: Loading Plot & Biplot ---
        fig, (ax_load, ax_bi) = plt.subplots(1, 2, figsize=(22, 10), constrained_layout=True)
        fig.suptitle(f'Overall Loadings and Biplot: {title_pretreatment}\n({analysis_scope_title})', fontsize=20, fontweight='bold')
        top_loadings = np.sqrt(loadings_df['PC1']**2 + loadings_df['PC2']**2).nlargest(15).index; ax_load.scatter(loadings_df['PC1'], loadings_df['PC2'], alpha=0.6, c='gray')
        for met in top_loadings: ax_load.text(loadings_df.loc[met, 'PC1'], loadings_df.loc[met, 'PC2'], met.title(), fontsize=8, ha='center', bbox=dict(facecolor='white', alpha=0.5, boxstyle='round,pad=0.2'))
        ax_load.set_xlabel('PC1 Loadings'); ax_load.set_ylabel('PC2 Loadings'); ax_load.set_title('Loading Plot (PC1 vs PC2)', fontweight='bold'); ax_load.axhline(0, c='grey', ls='--'); ax_load.axvline(0, c='grey', ls='--')
        
        for tp in timepoint_order:
            for cond, marker in [('+ GFP', 'o'), ('- GFP', 's')]:
                mask = (metadata['timepoint'] == tp) & (metadata['condition'] == cond)
                if mask.any(): ax_bi.scatter(scores_df.loc[mask, 'PC1'], scores_df.loc[mask, 'PC2'], color=color_dict.get(tp, 'gray'), marker=marker, s=50, alpha=0.5)
        scale_factor = 0.6 * np.max(np.abs(scores_df[['PC1', 'PC2']].values)) / np.max(np.abs(loadings_df.loc[top_loadings, ['PC1', 'PC2']].values))
        for met in top_loadings:
            ax_bi.arrow(0, 0, loadings_df.loc[met, 'PC1']*scale_factor, loadings_df.loc[met, 'PC2']*scale_factor, color='r', head_width=0.2)
            ax_bi.text(loadings_df.loc[met, 'PC1']*scale_factor*1.15, loadings_df.loc[met, 'PC2']*scale_factor*1.15, met.title(), color='r', ha='center', va='center', fontsize=8)
        ax_bi.set_xlabel(f'PC1 Scores ({pca.explained_variance_ratio_[0]:.1%})'); ax_bi.set_ylabel(f'PC2 Scores ({pca.explained_variance_ratio_[1]:.1%})'); ax_bi.set_title('Biplot', fontweight='bold'); ax_bi.axhline(0, c='grey', ls='--'); ax_bi.axvline(0, c='grey', ls='--')
        pdf.savefig(fig, bbox_inches='tight'); plt.close(fig)

        # --- Timepoint-Specific Analysis Pages ---
        for timepoint in timepoint_order:
            pca_tp, scores_df_tp, loadings_df_tp = timepoint_specific_pca(data_for_analysis, metadata, timepoint, config)
            if pca_tp is None: continue
            
            print(f"  - Generating plots for Timepoint: {timepoint}...")
            # --- Timepoint Page 1: Score & Scree Plot ---
            fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 10), constrained_layout=True); current_meta = metadata.loc[scores_df_tp.index]
            fig.suptitle(f'PCA for {timepoint}: {title_pretreatment}\n({analysis_scope_title})', fontsize=20, fontweight='bold')
            for cond, marker, color in [('+ GFP', 'o', 'blue'), ('- GFP', 's', 'red')]:
                mask = current_meta['condition'] == cond
                if mask.any(): ax1.scatter(scores_df_tp.loc[mask, 'PC1'], scores_df_tp.loc[mask, 'PC2'], c=color, marker=marker, s=150, edgecolors='k', label=cond); add_confidence_ellipse(ax1, scores_df_tp.loc[mask, 'PC1'], scores_df_tp.loc[mask, 'PC2'], edgecolor=color, linewidth=2)
            ax1.set_xlabel(f'PC1 ({pca_tp.explained_variance_ratio_[0]:.1%})'); ax1.set_ylabel(f'PC2 ({pca_tp.explained_variance_ratio_[1]:.1%})'); ax1.set_title('Score Plot: PC1 vs PC2', fontsize=16, fontweight='bold'); ax1.grid(True, alpha=0.3); ax1.legend()
            
            pc_nums_tp = np.arange(1, min(11, len(pca_tp.explained_variance_ratio_) + 1)); ax2.bar(pc_nums_tp, pca_tp.explained_variance_ratio_[:10], color='steelblue'); ax2_twin = ax2.twinx(); ax2_twin.plot(pc_nums_tp, np.cumsum(pca_tp.explained_variance_ratio_[:10]), 'r-o'); ax2_twin.set_ylabel('Cumulative Variance Ratio'); ax2.set_ylabel('Explained Variance Ratio'); ax2.set_xlabel('Principal Component'); ax2.set_title('Scree Plot', fontsize=16, fontweight='bold'); ax2.set_xticks(pc_nums_tp)
            pdf.savefig(fig, bbox_inches='tight'); plt.close(fig)
            
            # --- Timepoint Page 2: Loading Plot & Biplot ---
            fig, (ax_load, ax_bi) = plt.subplots(1, 2, figsize=(22, 10), constrained_layout=True); top_loadings_tp = np.sqrt(loadings_df_tp['PC1']**2 + loadings_df_tp['PC2']**2).nlargest(15).index
            fig.suptitle(f'Loadings and Biplot for {timepoint}: {title_pretreatment}\n({analysis_scope_title})', fontsize=20, fontweight='bold')
            ax_load.scatter(loadings_df_tp['PC1'], loadings_df_tp['PC2'], alpha=0.6, c='gray')
            for met in top_loadings_tp: ax_load.text(loadings_df_tp.loc[met, 'PC1'], loadings_df_tp.loc[met, 'PC2'], met.title(), fontsize=8, ha='center', bbox=dict(facecolor='white', alpha=0.5, boxstyle='round,pad=0.2'))
            ax_load.set_xlabel('PC1 Loadings'); ax_load.set_ylabel('PC2 Loadings'); ax_load.set_title('Loading Plot (PC1 vs PC2)', fontweight='bold'); ax_load.axhline(0, c='grey', ls='--'); ax_load.axvline(0, c='grey', ls='--')
            
            for cond, marker, color in [('+ GFP', 'o', 'blue'), ('- GFP', 's', 'red')]:
                mask = current_meta['condition'] == cond
                if mask.any(): ax_bi.scatter(scores_df_tp.loc[mask, 'PC1'], scores_df_tp.loc[mask, 'PC2'], c=color, marker=marker, s=80, alpha=0.6, label=cond)
            ax_bi.legend()
            scale_factor_tp = 0.6 * np.max(np.abs(scores_df_tp[['PC1', 'PC2']].values)) / np.max(np.abs(loadings_df_tp.loc[top_loadings_tp, ['PC1', 'PC2']].values))
            for met in top_loadings_tp:
                ax_bi.arrow(0, 0, loadings_df_tp.loc[met, 'PC1']*scale_factor_tp, loadings_df_tp.loc[met, 'PC2']*scale_factor_tp, color='r', head_width=0.2)
                ax_bi.text(loadings_df_tp.loc[met, 'PC1']*scale_factor_tp*1.15, loadings_df_tp.loc[met, 'PC2']*scale_factor_tp*1.15, met.title(), color='r', ha='center', va='center', fontsize=8)
            ax_bi.set_xlabel(f'PC1 Scores ({pca_tp.explained_variance_ratio_[0]:.1%})'); ax_bi.set_ylabel(f'PC2 Scores ({pca_tp.explained_variance_ratio_[1]:.1%})'); ax_bi.set_title('Biplot', fontweight='bold'); ax_bi.axhline(0, c='grey', ls='--'); ax_bi.axvline(0, c='grey', ls='--')
            pdf.savefig(fig, bbox_inches='tight'); plt.close(fig)

    print(f"\n✓ PDF report saved to: {pdf_filename}")
    excel_filename = f"{config['output_path']}_PCA_Results.xlsx"
    with pd.ExcelWriter(excel_filename) as writer:
        pd.concat([metadata, scores_df], axis=1).to_excel(writer, sheet_name='Scores_Overall')
        loadings_df.to_excel(writer, sheet_name='Loadings_Overall')
        pd.DataFrame({'Explained_Variance_Ratio': pca.explained_variance_ratio_, 'Cumulative_Variance': np.cumsum(pca.explained_variance_ratio_)}, index=[f'PC{i+1}' for i in range(pca.n_components_)]).to_excel(writer, sheet_name='Variance_Overall')
    print(f"✓ Excel results saved to: {excel_filename}")

# --- C. Main Execution Block ---
print("\n\n>>> INITIATING PCA ANALYSIS PIPELINE <<<")
config = {
    "output_path": OUTPUT_PATH_BASE, "pathway_file": PATHWAY_FILE,
    "pretreatment_method": PRETREATMENT_METHOD, "patterns": SAMPLE_NAMING_PATTERNS,
    "tp_map": TIMEPOINT_MAP, "tp_order": TIMEPOINT_PLOT_ORDER,
    "name_col": PATHWAY_NAME_COLUMN, "met_col": METABOLITES_COLUMN,
    "delimiter": METABOLITE_DELIMITER,
    "pathways_for_pca": PATHWAYS_FOR_PCA
}
metabolomics_data = load_data(INPUT_FILE, METABOLITES_TO_EXCLUDE)
if metabolomics_data is not None:
    # <<< FIX: New execution loop for pathway-by-pathway analysis >>>
    pathways_to_run = config['pathways_for_pca']
    if not pathways_to_run:
        print("\n--- Running Single Analysis on All Metabolites ---")
        run_single_pca_analysis(metabolomics_data, config.copy(), "All Metabolites")
    else:
        for pathway_name in pathways_to_run:
            print(f"\n{'='*60}\n--- Running Analysis for Pathway: {pathway_name} ---\n{'='*60}")
            metabolites_to_keep = get_metabolites_from_pathways(
                config['pathway_file'], [pathway_name], config['name_col'],
                config['met_col'], config['delimiter']
            )
            data_for_this_pathway = metabolomics_data[metabolomics_data.index.isin(metabolites_to_keep)]
            
            if data_for_this_pathway.shape[0] < 2:
                print(f"✗ Skipping pathway '{pathway_name}': Not enough matching metabolites found in data ({data_for_this_pathway.shape[0]}).")
                continue
            
            pathway_config = config.copy()
            safe_pathway_name = re.sub(r'[\s/\\*?:"<>|]+', '_', pathway_name)[:50]
            pathway_config['output_path'] = f"{OUTPUT_PATH_BASE}_{safe_pathway_name}"
            
            run_single_pca_analysis(data_for_this_pathway, pathway_config, f"Pathway: {pathway_name}")

    print("\n\n--- All Analyses Complete! ---")
else:
    print("\n\n--- Analysis Halted due to data loading error. ---")



>>> INITIATING PCA ANALYSIS PIPELINE <<<
Loading data from: /users/aranpurdy/desktop/cfps/PCA/RF/MOD_RF_Imputed.xlsx
Initial data shape: (115, 50)
Excluded 1 specified metabolite(s).
Final data shape: (114, 50)

--- Running Analysis for Pathway: Amino Acid Metabolism ---
Extracting metabolites for: Amino Acid Metabolism
Found 37 unique metabolites in the specified pathway(s).

--- Starting Data Pretreatment: pareto ---
Applying Pareto scaling...

Performing PCA...
Explained variance ratio (Top 5): [0.3958735  0.21423675 0.13292105 0.11392365 0.04486715]

Generating plots...

Analyzing timepoint: 0h

--- Starting Data Pretreatment: pareto ---
Applying Pareto scaling...

Performing PCA...
Explained variance ratio (Top 5): [0.71829146 0.09399309 0.07749454 0.03475193 0.02175517]
  - Generating plots for Timepoint: 0h...

Analyzing timepoint: 0.5h

--- Starting Data Pretreatment: pareto ---
Applying Pareto scaling...

Performing PCA...
Explained variance ratio (Top 5): [0.72352815 0.0900