# 🧬 MATLAB Yeast Cell Cycle Analysis

This notebook calls MATLAB scripts to run the budding yeast cell cycle model and analyzes the results in Python.

**Model**: Kraikivski et al. 2015 - Budding Yeast Cell Cycle Model  
**Scripts**: `run_yeast_model.m` and `run_yeast_model_linux.m`  
**Analysis**: Python-based visualization and interpretation

---

## 📦 Import Required Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import subprocess
import os
import warnings
from pathlib import Path
import scipy.io as sio
from scipy.signal import find_peaks
import platform

# Configure plotting
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
warnings.filterwarnings('ignore')

# Set working directory
os.chdir('/Users/gijsbartholomeus/Documents/STUDIE/OxfordEvolution/Code')
print(f"Working directory: {os.getcwd()}")
print(f"Platform: {platform.system()}")

## 🚀 Execute MATLAB Simulation

Run the MATLAB script to generate the yeast cell cycle simulation data.

In [None]:
def run_matlab_script(script_name):
    """Execute MATLAB script and capture output"""
    try:
        print(f"🔄 Executing {script_name}...")
        
        # Try MATLAB first, then Octave as fallback
        try:
            result = subprocess.run(
                ['matlab', '-batch', f"run('{script_name.replace('.m', '')}');"],
                capture_output=True, text=True, timeout=300
            )
            engine = "MATLAB"
        except (subprocess.TimeoutExpired, FileNotFoundError):
            print("  MATLAB not found or timed out, trying Octave...")
            result = subprocess.run(
                ['octave', '--eval', f"run('{script_name}');"],
                capture_output=True, text=True, timeout=300
            )
            engine = "Octave"
        
        if result.returncode == 0:
            print(f"✅ {script_name} executed successfully using {engine}!")
            if result.stdout:
                print("Output:", result.stdout[-500:])  # Last 500 chars
        else:
            print(f"❌ Error executing {script_name}:")
            print("STDOUT:", result.stdout)
            print("STDERR:", result.stderr)
            
        return result.returncode == 0
        
    except Exception as e:
        print(f"❌ Exception running {script_name}: {e}")
        return False

# Choose script based on platform
if platform.system() == "Linux":
    script_name = "run_yeast_model_linux.m"
else:
    script_name = "run_yeast_model.m"

success = run_matlab_script(script_name)

if success:
    print("\n📁 Checking generated files...")
    expected_files = ['matlab_results.mat', 'time_data.csv', 'species_names.txt']
    for file in expected_files:
        if os.path.exists(file):
            size = os.path.getsize(file)
            print(f"  ✅ {file} ({size} bytes)")
        else:
            print(f"  ❌ {file} not found")
else:
    print("\n⚠️ MATLAB execution failed. Will try to load existing data files if available.")

## 📊 Load and Parse Data

Load the simulation results from CSV files and MATLAB data.

In [None]:
def load_simulation_data():
    """Load all simulation data from generated files"""
    data = {}
    
    try:
        # Load time data
        if os.path.exists('time_data.csv'):
            data['time'] = np.loadtxt('time_data.csv')
            print(f"✅ Loaded time data: {len(data['time'])} time points")
        
        # Load species names
        if os.path.exists('species_names.txt'):
            with open('species_names.txt', 'r') as f:
                data['species_names'] = [line.strip() for line in f.readlines()]
            print(f"✅ Loaded species names: {len(data['species_names'])} species")
        
        # Load concentration data
        concentration_files = ['species_concentrations.csv', 'species_data.csv']
        for file in concentration_files:
            if os.path.exists(file):
                data['concentrations'] = np.loadtxt(file)
                print(f"✅ Loaded concentration data from {file}: {data['concentrations'].shape}")
                break
        
        # Load MATLAB results if available
        if os.path.exists('matlab_results.mat'):
            mat_data = sio.loadmat('matlab_results.mat')
            data['matlab'] = mat_data
            print(f"✅ Loaded MATLAB data: {list(mat_data.keys())}")
        
        # Create DataFrame for easier analysis
        if 'time' in data and 'concentrations' in data and 'species_names' in data:
            # Handle different concentration data shapes
            if data['concentrations'].ndim == 1:
                # Single species or time series
                if len(data['concentrations']) == len(data['time']):
                    df_dict = {'Time': data['time'], 'Concentration': data['concentrations']}
                else:
                    df_dict = {'Time': data['time']}
                    for i, name in enumerate(data['species_names'][:min(len(data['species_names']), data['concentrations'].shape[0])]):
                        df_dict[name] = data['concentrations'][i] if data['concentrations'].ndim > 1 else data['concentrations']
            else:
                # Multiple species time series
                df_dict = {'Time': data['time']}
                n_species = min(len(data['species_names']), data['concentrations'].shape[1])
                for i in range(n_species):
                    species_name = data['species_names'][i] if i < len(data['species_names']) else f'Species_{i}'
                    df_dict[species_name] = data['concentrations'][:, i]
            
            data['df'] = pd.DataFrame(df_dict)
            print(f"✅ Created DataFrame: {data['df'].shape}")
            print(f"   Columns: {list(data['df'].columns)[:10]}...")
        
        return data
        
    except Exception as e:
        print(f"❌ Error loading data: {e}")
        return {}

# Load the simulation data
sim_data = load_simulation_data()

if 'df' in sim_data:
    print(f"\n📈 Data Summary:")
    print(f"  Time range: {sim_data['time'][0]:.2f} to {sim_data['time'][-1]:.2f}")
    print(f"  Number of species: {len([col for col in sim_data['df'].columns if col != 'Time'])}")
    print(f"  DataFrame shape: {sim_data['df'].shape}")
else:
    print("⚠️ No data loaded. Please check MATLAB execution.")

## 🔍 Key Species Analysis

Identify and analyze the most important cell cycle regulators.

In [None]:
def find_key_species(df):
    """Find key cell cycle regulatory species"""
    key_species = {}
    columns = df.columns.tolist()
    
    # Define patterns for key species
    patterns = {
        'CLN2': ['CLN2', 'Cln2'],  # G1/S cyclin
        'CLB2': ['CLB2', 'Clb2'],  # G2/M cyclin
        'SIC1': ['SIC1', 'Sic1', 'CKIT', 'CKIP'],  # CDK inhibitor
        'CDC20': ['CDC20', 'Cdc20'],  # APC activator
        'CDH1': ['CDH1', 'Cdh1'],  # APC activator
        'CLN3': ['CLN3', 'Cln3'],  # G1 cyclin
        'CLB5': ['CLB5', 'Clb5'],  # S-phase cyclin
    }
    
    for species, pattern_list in patterns.items():
        for pattern in pattern_list:
            matches = [col for col in columns if pattern in col]
            if matches:
                key_species[species] = matches[0]  # Take first match
                print(f"✅ Found {species}: {matches[0]}")
                break
        if species not in key_species:
            print(f"❌ {species} not found")
    
    return key_species

if 'df' in sim_data and not sim_data['df'].empty:
    key_species = find_key_species(sim_data['df'])
    
    # Display basic statistics for key species
    print("\n📊 Key Species Statistics:")
    for species, column in key_species.items():
        if column in sim_data['df'].columns:
            values = sim_data['df'][column]
            print(f"  {species:6} ({column:10}): min={values.min():.3f}, max={values.max():.3f}, mean={values.mean():.3f}")
else:
    print("⚠️ No DataFrame available for analysis")
    key_species = {}

## 📈 Visualization: Time Series Analysis

Plot the dynamics of key cell cycle regulators over time.

In [None]:
if 'df' in sim_data and key_species:
    # Create comprehensive figure
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    fig.suptitle('🧬 Budding Yeast Cell Cycle Dynamics', fontsize=16, fontweight='bold')
    
    # Plot 1: Key species time series
    ax1 = axes[0, 0]
    colors = ['red', 'blue', 'black', 'green', 'orange', 'purple', 'brown']
    
    for i, (species, column) in enumerate(key_species.items()):
        if column in sim_data['df'].columns:
            color = colors[i % len(colors)]
            linestyle = '-' if i < 3 else '--'
            ax1.plot(sim_data['df']['Time'], sim_data['df'][column], 
                    color=color, linewidth=2, linestyle=linestyle, label=f'{species}')
    
    ax1.set_xlabel('Time (arbitrary units)')
    ax1.set_ylabel('Concentration')
    ax1.set_title('Key Cell Cycle Regulators')
    ax1.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    ax1.grid(True, alpha=0.3)
    
    # Plot 2: Normalized comparison
    ax2 = axes[0, 1]
    
    for i, (species, column) in enumerate(key_species.items()):
        if column in sim_data['df'].columns:
            values = sim_data['df'][column]
            normalized = (values - values.min()) / (values.max() - values.min()) if values.max() > values.min() else values
            color = colors[i % len(colors)]
            linestyle = '-' if i < 3 else '--'
            ax2.plot(sim_data['df']['Time'], normalized, 
                    color=color, linewidth=2, linestyle=linestyle, label=f'{species} (norm)')
    
    ax2.set_xlabel('Time (arbitrary units)')
    ax2.set_ylabel('Normalized Concentration')
    ax2.set_title('Normalized Oscillations')
    ax2.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    ax2.grid(True, alpha=0.3)
    
    # Plot 3: Phase portrait (CLN2 vs CLB2)
    ax3 = axes[1, 0]
    
    if 'CLN2' in key_species and 'CLB2' in key_species:
        cln2_col = key_species['CLN2']
        clb2_col = key_species['CLB2']
        if cln2_col in sim_data['df'].columns and clb2_col in sim_data['df'].columns:
            x = sim_data['df'][cln2_col]
            y = sim_data['df'][clb2_col]
            ax3.plot(x, y, 'b-', linewidth=2, alpha=0.7)
            ax3.scatter(x.iloc[0], y.iloc[0], color='green', s=100, marker='o', label='Start', zorder=5)
            ax3.scatter(x.iloc[-1], y.iloc[-1], color='red', s=100, marker='s', label='End', zorder=5)
            ax3.set_xlabel(f'CLN2 ({cln2_col})')
            ax3.set_ylabel(f'CLB2 ({clb2_col})')
            ax3.set_title('Phase Portrait: CLN2 vs CLB2')
            ax3.legend()
    else:
        ax3.text(0.5, 0.5, 'CLN2 or CLB2 not found', ha='center', va='center', transform=ax3.transAxes)
        ax3.set_title('Phase Portrait: CLN2 vs CLB2')
    
    ax3.grid(True, alpha=0.3)
    
    # Plot 4: Period analysis (CLB2 oscillations)
    ax4 = axes[1, 1]
    
    if 'CLB2' in key_species:
        clb2_col = key_species['CLB2']
        if clb2_col in sim_data['df'].columns:
            time = sim_data['df']['Time'].values
            clb2_values = sim_data['df'][clb2_col].values
            
            ax4.plot(time, clb2_values, 'b-', linewidth=2, label='CLB2')
            
            # Simple peak detection
            try:
                peaks, _ = find_peaks(clb2_values, height=np.max(clb2_values)*0.5, distance=10)
                if len(peaks) > 1:
                    ax4.scatter(time[peaks], clb2_values[peaks], color='red', s=50, marker='o', label='Peaks')
                    
                    # Calculate periods
                    periods = np.diff(time[peaks])
                    avg_period = np.mean(periods)
                    std_period = np.std(periods)
                    
                    ax4.text(0.02, 0.98, f'Avg Period: {avg_period:.1f} ± {std_period:.1f}\nCycles: {len(peaks)}', 
                            transform=ax4.transAxes, va='top', ha='left',
                            bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.8))
            except Exception as e:
                print(f"Peak detection failed: {e}")
            
            ax4.set_xlabel('Time (arbitrary units)')
            ax4.set_ylabel('CLB2 Concentration')
            ax4.set_title('CLB2 Oscillations & Period Analysis')
            ax4.legend()
    else:
        ax4.text(0.5, 0.5, 'CLB2 not found', ha='center', va='center', transform=ax4.transAxes)
        ax4.set_title('CLB2 Oscillations & Period Analysis')
    
    ax4.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
else:
    print("⚠️ Cannot create visualizations - no data or key species found")

## 🔬 Detailed Analysis: Cell Cycle Characteristics

Analyze oscillation patterns, periods, and cell cycle phases.

In [None]:
def analyze_oscillations(df, species_dict):
    """Perform detailed oscillation analysis"""
    analysis = {}
    
    for species, column in species_dict.items():
        if column not in df.columns:
            continue
            
        time = df['Time'].values
        values = df[column].values
        
        # Basic statistics
        stats = {
            'min': np.min(values),
            'max': np.max(values),
            'mean': np.mean(values),
            'std': np.std(values),
            'amplitude': np.max(values) - np.min(values)
        }
        
        # Peak analysis
        try:
            peaks, properties = find_peaks(values, height=stats['max']*0.5, distance=10)
            if len(peaks) > 1:
                periods = np.diff(time[peaks])
                stats['avg_period'] = np.mean(periods)
                stats['period_std'] = np.std(periods)
                stats['num_cycles'] = len(peaks)
                stats['peak_times'] = time[peaks]
                stats['peak_values'] = values[peaks]
            else:
                stats['avg_period'] = None
                stats['num_cycles'] = 0
        except Exception as e:
            print(f"Peak analysis failed for {species}: {e}")
            stats['avg_period'] = None
            stats['num_cycles'] = 0
        
        analysis[species] = stats
    
    return analysis

if 'df' in sim_data and key_species:
    print("🔬 Detailed Oscillation Analysis\n")
    print("=" * 80)
    
    oscillation_analysis = analyze_oscillations(sim_data['df'], key_species)
    
    for species, stats in oscillation_analysis.items():
        print(f"\n{species:12} ({key_species[species]}):")
        print(f"  Range:      {stats['min']:.3f} to {stats['max']:.3f} (amplitude: {stats['amplitude']:.3f})")
        print(f"  Mean ± SD:  {stats['mean']:.3f} ± {stats['std']:.3f}")
        
        if stats['avg_period'] is not None:
            print(f"  Periodicity: {stats['avg_period']:.1f} ± {stats['period_std']:.1f} time units")
            print(f"  Cycles:     {stats['num_cycles']} complete cycles detected")
        else:
            print(f"  Periodicity: No clear oscillations detected")
    
    # Summary insights
    print("\n" + "=" * 80)
    print("📋 SUMMARY INSIGHTS")
    print("=" * 80)
    
    # Find most oscillatory species
    periods = [stats['avg_period'] for stats in oscillation_analysis.values() if stats['avg_period'] is not None]
    if periods:
        avg_period = np.mean(periods)
        print(f"\n🔄 Cell Cycle Period: {avg_period:.1f} ± {np.std(periods):.1f} time units")
        print(f"   Simulation covers ~{sim_data['time'][-1] / avg_period:.1f} cell cycles")
    
    # Identify key regulatory relationships
    print("\n🎯 Key Regulatory Dynamics:")
    for species in ['CLN2', 'CLB2', 'SIC1']:
        if species in oscillation_analysis:
            stats = oscillation_analysis[species]
            if species == 'CLN2':
                print(f"   • CLN2 (G1/S cyclin): Drives G1→S transition, peaks early in cycle")
            elif species == 'CLB2':
                print(f"   • CLB2 (G2/M cyclin): Controls G2→M transition, peaks for mitosis")
            elif species == 'SIC1':
                print(f"   • SIC1 (CDK inhibitor): Restrains cell cycle, inversely correlated with cyclins")
    
    print(f"\n📊 Total simulation time: {sim_data['time'][-1]:.1f} time units")
    print(f"📈 Data points collected: {len(sim_data['time'])}")
    print(f"🧬 Species monitored: {len([col for col in sim_data['df'].columns if col != 'Time'])}")
    
else:
    print("⚠️ Cannot perform detailed analysis - no data available")

## 📋 Results Summary & Export

Summarize findings and save results for further analysis.

In [None]:
def export_results(sim_data, key_species, analysis_results=None):
    """Export analysis results to files"""
    try:
        # Create summary DataFrame
        if 'df' in sim_data and key_species:
            # Export key species time series
            key_data = sim_data['df'][['Time'] + [col for col in key_species.values() if col in sim_data['df'].columns]]
            key_data.to_csv('yeast_key_species_timeseries.csv', index=False)
            print("✅ Exported key species time series to 'yeast_key_species_timeseries.csv'")
            
            # Create analysis summary
            if analysis_results:
                summary_data = []
                for species, stats in analysis_results.items():
                    summary_data.append({
                        'Species': species,
                        'Column': key_species.get(species, ''),
                        'Min': stats['min'],
                        'Max': stats['max'],
                        'Mean': stats['mean'],
                        'Std': stats['std'],
                        'Amplitude': stats['amplitude'],
                        'Avg_Period': stats.get('avg_period', None),
                        'Num_Cycles': stats.get('num_cycles', 0)
                    })
                
                summary_df = pd.DataFrame(summary_data)
                summary_df.to_csv('yeast_analysis_summary.csv', index=False)
                print("✅ Exported analysis summary to 'yeast_analysis_summary.csv'")
        
        return True
        
    except Exception as e:
        print(f"❌ Error exporting results: {e}")
        return False

# Export results
if 'df' in sim_data and key_species:
    print("💾 Exporting Results...\n")
    
    if 'oscillation_analysis' in locals():
        export_success = export_results(sim_data, key_species, oscillation_analysis)
    else:
        export_success = export_results(sim_data, key_species)
    
    if export_success:
        print("\n📁 Files Available:")
        files_to_check = [
            'matlab_results.mat',
            'time_data.csv',
            'species_concentrations.csv',
            'species_data.csv',
            'yeast_key_species_timeseries.csv',
            'yeast_analysis_summary.csv',
            'yeast_key_species_analysis.png',
            'yeast_analysis_linux.png'
        ]
        
        for file in files_to_check:
            if os.path.exists(file):
                size = os.path.getsize(file)
                print(f"  ✅ {file:<35} ({size:>8} bytes)")
    
    print("\n" + "=" * 80)
    print("🎉 ANALYSIS COMPLETE!")
    print("=" * 80)
    print("\n📊 This notebook successfully:")
    print("   • Executed MATLAB yeast cell cycle simulation")
    print("   • Loaded and parsed simulation data")
    print("   • Identified key cell cycle regulatory species")
    print("   • Analyzed oscillation patterns and periods")
    print("   • Created comprehensive visualizations")
    print("   • Exported results for further analysis")
    
    print("\n🔬 Model: Kraikivski et al. 2015 - Budding Yeast Cell Cycle")
    print("📈 Focus: Mathematical modeling of cell cycle regulation")
    print("🎯 Key insights: Cyclin oscillations drive cell cycle progression")
    
else:
    print("⚠️ Analysis incomplete - check MATLAB execution and data files")