In [5]:
import numpy as np
import pandas as pd
from scipy.signal.windows import hann, hamming, blackman, kaiser
from sklearn.metrics import r2_score

def calculate_metrics(y_true, y_pred, y_mean=None):
    """
    Calculate R² score and RMSE with proper normalization
    """
    if y_mean is None:
        y_mean = np.mean(y_true)
    
    # Calculate RMSE
    rmse = np.sqrt(np.mean((y_true - y_pred)**2))
    
    # Calculate R² manually to ensure proper normalization
    ss_tot = np.sum((y_true - y_mean)**2)
    ss_res = np.sum((y_true - y_pred)**2)
    
    r2 = 1 - (ss_res / ss_tot) if ss_tot != 0 else 0
    
    return rmse, r2

def fourier_components_by_order(theta, radial_idx, order):
    """Generate Fourier components for given order using radial index"""
    if order == 0:
        return np.array([1.0])
    
    # Use radial_idx directly instead of normalized radius
    components = [
        np.cos(order * theta),
        np.sin(order * theta),
        radial_idx * np.cos(order * np.pi * radial_idx/32),  # Assuming 32 is max radial index
        radial_idx * np.sin(order * np.pi * radial_idx/32),
        radial_idx * np.cos(order * theta) * np.cos(order * np.pi * radial_idx/32),
        radial_idx * np.sin(order * theta) * np.sin(order * np.pi * radial_idx/32)
    ]
    
    return np.array(components)

def fit_fourier_model_by_order(theta, radial_idx, k, order, k_mean=None):
    """Fit Fourier model for given order with proper centering"""
    if k_mean is None:
        k_mean = np.mean(k)
    
    # Center the data
    k_centered = k - k_mean if order > 0 else k
    
    X = []
    for i in range(len(theta)):
        X.append(fourier_components_by_order(theta[i], radial_idx[i], order))
    X = np.vstack(X)
    
    coeffs, residuals, rank, s = np.linalg.lstsq(X, k_centered, rcond=None)
    return coeffs

def reconstruct_from_coeffs(theta, radial_idx, coeffs, order, k_mean=0):
    """Reconstruct signal from coefficients"""
    X = []
    for i in range(len(theta)):
        X.append(fourier_components_by_order(theta[i], radial_idx[i], order))
    X = np.vstack(X)
    
    reconstruction = X @ coeffs
    if order > 0:
        reconstruction += k_mean
    
    return reconstruction

def analyze_with_windows(filepath):
    # Load data
    df = pd.read_csv(filepath)
    theta = df['Meridian_Angle_Rad'].values
    radial_idx = df['Radial_Index'].values  # Using Radial_Index instead of normalized radius
    k = df['KR_scaled'].values
    
    # Store original mean for proper normalization
    k_mean = np.mean(k)
    
    windows = ['none', 'kaiser']
    results = {}
    
    for window in windows:
        print(f"\nAnalysis with {window.upper()} window")
        print("-" * 40)
        
        # Apply window if needed
        if window == 'none':
            k_windowed = k
        else:
            k_windowed = k * kaiser(len(k), beta=14)
        
        total_reconstruction = np.zeros_like(k_windowed)
        coeffs_by_order = []
        
        # Analyze each order
        for order in range(3):  # Orders 0, 1, 2
            coeffs = fit_fourier_model_by_order(theta, radial_idx, k_windowed, order, k_mean)
            coeffs_by_order.append(coeffs)
            
            # Reconstruct this order's contribution
            order_reconstruction = reconstruct_from_coeffs(theta, radial_idx, coeffs, order, k_mean)
            total_reconstruction += order_reconstruction if order > 0 else order_reconstruction
            
            # Calculate metrics for this order
            rmse, r2 = calculate_metrics(k_windowed, order_reconstruction, k_mean)
            
            print(f"\nOrder {order}:")
            print(f"RMSE: {rmse:.6f}")
            print(f"R² score: {r2:.6f}")
            
            if order == 0:
                print(f"DC component: {coeffs[0]:.4f}")
            else:
                component_names = [
                    f"cos({order}θ)",
                    f"sin({order}θ)",
                    f"r*cos({order}πr)",
                    f"r*sin({order}πr)",
                    f"r*cos({order}θ)*cos({order}πr)",
                    f"r*sin({order}θ)*sin({order}πr)"
                ]
                for coeff, name in zip(coeffs, component_names):
                    print(f"{name}: {coeff:.4f}")
        
        # Calculate total metrics
        total_rmse, total_r2 = calculate_metrics(k_windowed, total_reconstruction, k_mean)
        
        print(f"\nTotal reconstruction metrics:")
        print(f"RMSE: {total_rmse:.6f}")
        print(f"R² score: {total_r2:.6f}")
        
        results[window] = {
            'rmse': total_rmse,
            'r2': total_r2,
            'coeffs': coeffs_by_order
        }
        
        # Print the full Fourier formula
        print("\nFull Fourier Formula:")
        formula_str = f"K(θ,r) = {coeffs_by_order[0][0]:.4f}"  # DC term
        
        for order in range(1, len(coeffs_by_order)):
            coeffs = coeffs_by_order[order]
            formula_str += (
                f" + {coeffs[0]:.4f}*cos({order}θ)"
                f" + {coeffs[1]:.4f}*sin({order}θ)"
                f" + {coeffs[2]:.4f}*r*cos({order}πr/32)"
                f" + {coeffs[3]:.4f}*r*sin({order}πr/32)"
                f" + {coeffs[4]:.4f}*r*cos({order}θ)*cos({order}πr/32)"
                f" + {coeffs[5]:.4f}*r*sin({order}θ)*sin({order}πr/32)"
            )
        
        print(formula_str)
    
    return results

if __name__ == "__main__":
    filepath = "/home/aricept094/mydata/sheets/conv/transformed/Sheet2_transformed.csv"
    results = analyze_with_windows(filepath)


    


In [7]:
import numpy as np
import pandas as pd

# Load the dataset
data_path = "/home/aricept094/mydata/sheets/conv/transformed/Sheet2_transformed.csv"  # Update with your file path
df = pd.read_csv(data_path)

def apply_window(data, window_type, **kwargs):
    """
    Applies a window function to the data.
    """
    if window_type is None:
        return data

    rows, cols = data.shape
    if window_type == 'hann':
        row_window = np.hanning(rows)
        col_window = np.hanning(cols)
    elif window_type == 'kaiser':
        beta = kwargs.get('beta', 8.6)
        row_window = np.kaiser(rows, beta)
        col_window = np.kaiser(cols, beta)
    elif window_type == 'blackman':
        row_window = np.blackman(rows)
        col_window = np.blackman(cols)
    else:
        raise ValueError("Invalid window type specified.")

    window_2d = np.outer(row_window, col_window)
    return data * window_2d

def inverse_window(data, window_type, **kwargs):
    """
    Applies an inverse window function to the data.
    """
    if window_type is None:
        return data

    rows, cols = data.shape
    if window_type == 'hann':
        row_window = np.hanning(rows)
        col_window = np.hanning(cols)
    elif window_type == 'kaiser':
        beta = kwargs.get('beta', 8.6)
        row_window = np.kaiser(rows, beta)
        col_window = np.kaiser(cols, beta)
    elif window_type == 'blackman':
        row_window = np.blackman(rows)
        col_window = np.blackman(cols)
    else:
        raise ValueError("Invalid window type specified.")

    window_2d = np.outer(row_window, col_window)
    epsilon = 1e-10
    window_2d[window_2d < epsilon] = epsilon
    return data / window_2d

def calculate_2d_fourier_coefficients(meridians, radial_indices, keratometry_values, window_type=None, **kwargs):
    """
    Calculates the 2D Fourier coefficients, excluding cross-terms, with optional windowing.
    """
    n_meridians = len(meridians)
    n_radial = len(radial_indices)
    coefficients = {}
    energy = {}

    windowed_data = apply_window(keratometry_values, window_type, **kwargs)
    reconstructed_signal = np.zeros_like(windowed_data)

    # DC component
    coefficients['a0_0'] = np.sum(windowed_data) / (n_meridians * n_radial)
    energy['a0_0'] = coefficients['a0_0'] ** 2
    reconstructed_signal += coefficients['a0_0']

    total_energy = energy['a0_0']

    # Calculate coefficients (increase range of m and r)
    for m in range(1, 5):  # Increased to 4
        meridian_grid = np.tile(meridians, (n_radial, 1))
        cos_term_m = np.cos(2 * np.pi * m * meridian_grid / n_meridians)
        sin_term_m = np.sin(2 * np.pi * m * meridian_grid / n_meridians)

        a_m0 = (2 / (n_meridians * n_radial)) * np.sum(windowed_data * cos_term_m)
        b_m0 = (2 / (n_meridians * n_radial)) * np.sum(windowed_data * sin_term_m)

        coefficients[f'a{m}_0'] = a_m0
        coefficients[f'b{m}_0'] = b_m0
        energy[f'a{m}_0'] = a_m0 ** 2
        energy[f'b{m}_0'] = b_m0 ** 2
        total_energy += a_m0 ** 2 + b_m0 ** 2
        reconstructed_signal += a_m0 * cos_term_m + b_m0 * sin_term_m

    for r in range(1, 5):  # Increased to 4
        radial_grid = np.tile(radial_indices.reshape(-1, 1), (1, n_meridians))
        cos_term_r = np.cos(2 * np.pi * r * radial_grid / n_radial)
        sin_term_r = np.sin(2 * np.pi * r * radial_grid / n_radial)

        a_0r = (2 / (n_meridians * n_radial)) * np.sum(windowed_data * cos_term_r)
        b_0r = (2 / (n_meridians * n_radial)) * np.sum(windowed_data * sin_term_r)

        coefficients[f'a0_{r}'] = a_0r
        coefficients[f'b0_{r}'] = b_0r
        energy[f'a0_{r}'] = a_0r ** 2
        energy[f'b0_{r}'] = b_0r ** 2
        total_energy += a_0r ** 2 + b_0r ** 2
        reconstructed_signal += a_0r * cos_term_r + b_0r * sin_term_r

    variance_explained = {}
    for key, value in energy.items():
        if key == 'a0_0':
            variance_explained[key] = (value / total_energy) * 100
        else:
            variance_explained[key] = (value / total_energy) * 100 * 2

    return coefficients, variance_explained, reconstructed_signal

def calculate_r2(original_signal, reconstructed_signal):
    """
    Calculates the R-squared (coefficient of determination).
    """
    ss_total = np.sum((original_signal - np.mean(original_signal)) ** 2)
    ss_residual = np.sum((original_signal - reconstructed_signal) ** 2)
    r2 = 1 - (ss_residual / ss_total)
    return r2

def perform_2d_fourier_analysis(df, window_type=None, **kwargs):
    """
    Performs the 2D Fourier analysis with different window types.
    """
    unique_meridians = np.sort(df['Meridian_Index'].unique())
    unique_radial_indices = np.sort(df['Radial_Index'].unique())

    keratometry_matrix = np.zeros((len(unique_radial_indices), len(unique_meridians)))
    for i, radial_index in enumerate(unique_radial_indices):
        for j, meridian in enumerate(unique_meridians):
            value = df[(df['Meridian_Index'] == meridian) & (df['Radial_Index'] == radial_index)]['Keratometry_Value'].values
            if value.size > 0:
                keratometry_matrix[i, j] = value[0]

    coefficients, variance_explained, reconstructed_signal = calculate_2d_fourier_coefficients(
        unique_meridians, unique_radial_indices, keratometry_matrix, window_type, **kwargs
    )

    # Calculate R-squared using the inverse window
    reconstructed_signal_unwindowed = inverse_window(reconstructed_signal, window_type, **kwargs)
    r2 = calculate_r2(keratometry_matrix, reconstructed_signal_unwindowed)

    return r2, coefficients, variance_explained

# Example usage with different window functions:
window_types = [None, 'hann', 'kaiser', 'blackman']
results = {}

for window_type in window_types:
    if window_type == 'kaiser':
        r2, coefficients, variance_explained = perform_2d_fourier_analysis(df, window_type, beta=8.6)
    else:
        r2, coefficients, variance_explained = perform_2d_fourier_analysis(df, window_type)

    results[window_type if window_type is not None else "no_window"] = r2
    print(f"Window: {window_type if window_type is not None else 'no_window'}, R2: {r2:.4f}")

# Create a DataFrame and save the results
results_df = pd.DataFrame.from_dict(results, orient='index', columns=['R2'])
output_path = "fourier_analysis_window_comparison_updated.csv"  # Choose a suitable file name
results_df.to_csv(output_path)
print(f"R2 results for different window functions saved to {output_path}")

In [8]:
import numpy as np
import pandas as pd
from sklearn.metrics import r2_score

# Configuration parameters
MAX_MERIDIONAL_COMPONENTS = 5  # Maximum meridional components
MAX_RADIAL_COMPONENTS = 2      # Maximum radial components
DATA_PATH = "/home/aricept094/mydata/sheets/conv/transformed/Sheet2_transformed.csv"

def reconstruct_signal(coefficients, meridians, radial_indices, max_meridional_components, max_radial_components):
    """
    Reconstructs the signal using the Fourier coefficients without cross-terms.
    """
    n_meridians = len(meridians)
    n_radial = len(radial_indices)
    reconstructed = np.zeros((len(radial_indices), len(meridians)))
    
    # Add DC component
    reconstructed += coefficients['a0']
    
    # Create meshgrids for meridional and radial components
    meridian_grid, _ = np.meshgrid(meridians, radial_indices)
    _, radial_grid = np.meshgrid(meridians, radial_indices)
    
    # Add meridional components
    for m in range(1, max_meridional_components + 1):
        cos_term = np.cos(2 * np.pi * m * meridian_grid / n_meridians)
        sin_term = np.sin(2 * np.pi * m * meridian_grid / n_meridians)
        
        am_key = f'am{m}'
        bm_key = f'bm{m}'
        if am_key in coefficients and bm_key in coefficients:
            reconstructed += coefficients[am_key] * cos_term
            reconstructed += coefficients[bm_key] * sin_term
    
    # Add radial components
    for r in range(1, max_radial_components + 1):
        cos_term = np.cos(2 * np.pi * r * radial_grid / n_radial)
        sin_term = np.sin(2 * np.pi * r * radial_grid / n_radial)
        
        ar_key = f'ar{r}'
        br_key = f'br{r}'
        if ar_key in coefficients and br_key in coefficients:
            reconstructed += coefficients[ar_key] * cos_term
            reconstructed += coefficients[br_key] * sin_term
            
    return reconstructed

def calculate_2d_fourier_coefficients(meridians, radial_indices, keratometry_values, max_meridional_components, max_radial_components):
    """
    Calculates the 2D Fourier coefficients without cross-terms.
    """
    n_meridians = len(meridians)
    n_radial = len(radial_indices)
    coefficients = {}
    energy = {}

    # DC component
    coefficients['a0'] = np.mean(keratometry_values)
    energy['a0'] = coefficients['a0']**2

    total_energy = energy['a0']
    
    # Create meshgrids
    meridian_grid, _ = np.meshgrid(meridians, radial_indices)
    _, radial_grid = np.meshgrid(meridians, radial_indices)

    # Calculate meridional components
    for m in range(1, max_meridional_components + 1):
        cos_term = np.cos(2 * np.pi * m * meridian_grid / n_meridians)
        sin_term = np.sin(2 * np.pi * m * meridian_grid / n_meridians)

        am = (2 / (n_meridians * n_radial)) * np.sum(keratometry_values * cos_term)
        bm = (2 / (n_meridians * n_radial)) * np.sum(keratometry_values * sin_term)

        coefficients[f'am{m}'] = am
        coefficients[f'bm{m}'] = bm
        
        energy[f'am{m}'] = am**2
        energy[f'bm{m}'] = bm**2
        
        total_energy += am**2 + bm**2

    # Calculate radial components
    for r in range(1, max_radial_components + 1):
        cos_term = np.cos(2 * np.pi * r * radial_grid / n_radial)
        sin_term = np.sin(2 * np.pi * r * radial_grid / n_radial)

        ar = (2 / (n_meridians * n_radial)) * np.sum(keratometry_values * cos_term)
        br = (2 / (n_meridians * n_radial)) * np.sum(keratometry_values * sin_term)

        coefficients[f'ar{r}'] = ar
        coefficients[f'br{r}'] = br
        
        energy[f'ar{r}'] = ar**2
        energy[f'br{r}'] = br**2
        
        total_energy += ar**2 + br**2

    # Calculate variance explained
    variance_explained = {}
    for key, value in energy.items():
        variance_explained[key] = (value / total_energy) * 100
            
    # Reconstruct signal and calculate R²
    reconstructed_values = reconstruct_signal(coefficients, meridians, radial_indices, 
                                           max_meridional_components, max_radial_components)
    r2 = r2_score(keratometry_values.flatten(), reconstructed_values.flatten())

    return coefficients, variance_explained, r2, reconstructed_values

def perform_2d_fourier_analysis(df, max_meridional_components, max_radial_components):
    """
    Performs 2D Fourier analysis on the input data.
    """
    unique_meridians = np.sort(df['Meridian_Index'].unique())
    unique_radial_indices = np.sort(df['Radial_Index'].unique())

    keratometry_matrix = np.zeros((len(unique_radial_indices), len(unique_meridians)))
    for i, radial_index in enumerate(unique_radial_indices):
        for j, meridian in enumerate(unique_meridians):
            value = df[(df['Meridian_Index'] == meridian) & 
                      (df['Radial_Index'] == radial_index)]['Keratometry_Value'].values
            if value.size > 0:
                keratometry_matrix[i, j] = value[0]

    # Calculate coefficients, variance explained, and R²
    coefficients, variance_explained, r2, reconstructed_values = calculate_2d_fourier_coefficients(
        unique_meridians, unique_radial_indices, keratometry_matrix, 
        max_meridional_components, max_radial_components
    )

    return coefficients, variance_explained, r2, reconstructed_values

def print_coefficients(coefficients, max_meridional_components, max_radial_components):
    """
    Prints the coefficients in a formatted way.
    """
    print("\nCoefficients:")
    print(f"DC Component (a0): {coefficients['a0']:.6f}")
    
    print("\nMeridional Components:")
    for m in range(1, max_meridional_components + 1):
        am_key = f'am{m}'
        bm_key = f'bm{m}'
        print(f"m={m}:")
        print(f"  {am_key}: {coefficients[am_key]:.6f}")
        print(f"  {bm_key}: {coefficients[bm_key]:.6f}")
    
    print("\nRadial Components:")
    for r in range(1, max_radial_components + 1):
        ar_key = f'ar{r}'
        br_key = f'br{r}'
        print(f"r={r}:")
        print(f"  {ar_key}: {coefficients[ar_key]:.6f}")
        print(f"  {br_key}: {coefficients[br_key]:.6f}")

def main():
    # Load the dataset
    df = pd.read_csv(DATA_PATH)

    # Store all coefficients for comparison
    all_coefficients = []
    all_results = []
    
    # Test different combinations of components
    for meridional_components in range(1, MAX_MERIDIONAL_COMPONENTS + 1):
        for radial_components in range(1, MAX_RADIAL_COMPONENTS + 1):
            print(f"\n{'='*50}")
            print(f"Testing with {meridional_components} meridional and {radial_components} radial components...")
            print('='*50)
            
            coefficients, variance_explained, r2, reconstructed_values = perform_2d_fourier_analysis(
                df, meridional_components, radial_components
            )
            
            # Print coefficients
            print_coefficients(coefficients, meridional_components, radial_components)
            
            # Calculate total variance explained (excluding a0)
            total_variance = sum(value for key, value in variance_explained.items() 
                                if key != 'a0')
            
            # Store results
            all_results.append({
                'meridional_components': meridional_components,
                'radial_components': radial_components,
                'r2_score': r2,
                'total_variance_explained': total_variance
            })
            
            # Store coefficients
            all_coefficients.append({
                'meridional_components': meridional_components,
                'radial_components': radial_components,
                **coefficients
            })
            
            print(f"\nR² Score: {r2:.4f}")
            print(f"Total Variance Explained: {total_variance:.2f}%")
            
            # Print variance explained by each component
            print("\nVariance Explained by Components:")
            for key, value in variance_explained.items():
                print(f"{key}: {value:.2f}%")

    # Create summary DataFrame
    summary_df = pd.DataFrame(all_results)
    print("\nSummary of all component combinations tested:")
    print(summary_df.to_string(index=False))

    # Create coefficients DataFrame
    coeff_df = pd.DataFrame(all_coefficients)
    
    # Save the results
    summary_df.to_csv("fourier_analysis_summary.csv", index=False)
    coeff_df.to_csv("fourier_analysis_coefficients.csv", index=False)
    print("\nResults saved to:")
    print("- fourier_analysis_summary.csv")
    print("- fourier_analysis_coefficients.csv")

if __name__ == "__main__":
    main()

In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import r2_score
import os
from pathlib import Path
import concurrent.futures
from functools import lru_cache
import multiprocessing
import warnings
warnings.filterwarnings('ignore')

MERIDIONAL_COMPONENTS = 2  
RADIAL_COMPONENTS = 5     
DATA_DIR = "/home/aricept094/mydata/sheets/conv/transformed/"
CHUNK_SIZE = 300  

MAX_WORKERS = max(1, int(multiprocessing.cpu_count() * 0.99))

@lru_cache(maxsize=None)
def calculate_trig_terms(m, n_points):

    angle = 2 * np.pi * m * np.arange(n_points) / n_points
    return np.cos(angle), np.sin(angle)

def reconstruct_signal(coefficients, meridians, radial_indices, max_meridional, max_radial):

    n_meridians = len(meridians)
    n_radial = len(radial_indices)
    reconstructed = np.full((len(radial_indices), len(meridians)), coefficients['a0'])
    
    meridian_indices = np.arange(n_meridians)
    radial_indices_array = np.arange(n_radial)
    
    # Vectorized computation for meridional components
    for m in range(1, max_meridional + 1):
        cos_m, sin_m = calculate_trig_terms(m, n_meridians)
        am_key, bm_key = f'am{m}', f'bm{m}'
        if am_key in coefficients and bm_key in coefficients:
            reconstructed += (coefficients[am_key] * cos_m[None, :] + 
                            coefficients[bm_key] * sin_m[None, :])
    
    # Vectorized computation for radial components
    for r in range(1, max_radial + 1):
        cos_r, sin_r = calculate_trig_terms(r, n_radial)
        ar_key, br_key = f'ar{r}', f'br{r}'
        if ar_key in coefficients and br_key in coefficients:
            reconstructed += (coefficients[ar_key] * cos_r[:, None] + 
                            coefficients[br_key] * sin_r[:, None])
            
    return reconstructed

def calculate_2d_fourier_coefficients(meridians, radial_indices, keratometry_values, max_meridional, max_radial):

    n_meridians = len(meridians)
    n_radial = len(radial_indices)
    coefficients = {}
    energy = {}
    norm_factor = 2 / (n_meridians * n_radial)

    # DC component
    coefficients['a0'] = np.mean(keratometry_values)
    energy['a0'] = coefficients['a0']**2
    total_energy = energy['a0']
    
    # Vectorized meridional components calculation
    for m in range(1, max_meridional + 1):
        cos_m, sin_m = calculate_trig_terms(m, n_meridians)
        cos_term = cos_m[None, :]
        sin_term = sin_m[None, :]
        
        am = norm_factor * np.sum(keratometry_values * cos_term)
        bm = norm_factor * np.sum(keratometry_values * sin_term)
        
        coefficients[f'am{m}'] = am
        coefficients[f'bm{m}'] = bm
        energy[f'am{m}'] = am**2
        energy[f'bm{m}'] = bm**2
        total_energy += am**2 + bm**2

    for r in range(1, max_radial + 1):
        cos_r, sin_r = calculate_trig_terms(r, n_radial)
        cos_term = cos_r[:, None]
        sin_term = sin_r[:, None]
        
        ar = norm_factor * np.sum(keratometry_values * cos_term)
        br = norm_factor * np.sum(keratometry_values * sin_term)
        
        coefficients[f'ar{r}'] = ar
        coefficients[f'br{r}'] = br
        energy[f'ar{r}'] = ar**2
        energy[f'br{r}'] = br**2
        total_energy += ar**2 + br**2

    variance_explained = {k: (v / total_energy) * 100 for k, v in energy.items()}
    
    reconstructed_values = reconstruct_signal(coefficients, meridians, radial_indices, 
                                           max_meridional, max_radial)
    r2 = r2_score(keratometry_values.flatten(), reconstructed_values.flatten())

    return coefficients, variance_explained, r2, reconstructed_values

def perform_2d_fourier_analysis(df):

    meridians = df['Meridian_Index'].unique()
    radial_indices = df['Radial_Index'].unique()
    meridians.sort()
    radial_indices.sort()
    
    # Create lookup dictionaries for faster indexing
    meridian_lookup = {m: i for i, m in enumerate(meridians)}
    radial_lookup = {r: i for i, r in enumerate(radial_indices)}
    
    # Pre-allocate matrix
    keratometry_matrix = np.zeros((len(radial_indices), len(meridians)))
    
    # Vectorized matrix filling
    df_values = df[['Radial_Index', 'Meridian_Index', 'KR_scaled']].values
    for radial, meridian, value in df_values:
        keratometry_matrix[radial_lookup[radial], meridian_lookup[meridian]] = value

    return calculate_2d_fourier_coefficients(
        meridians, radial_indices, keratometry_matrix, 
        MERIDIONAL_COMPONENTS, RADIAL_COMPONENTS
    )

def process_file_chunk(file_chunk):

    results = []
    for csv_file in file_chunk:
        try:
            df = pd.read_csv(csv_file)
            coefficients, variance_explained, r2, _ = perform_2d_fourier_analysis(df)
            
            result = {
                'filename': csv_file.name,
                'r2_score': r2,
                'meridional_components': MERIDIONAL_COMPONENTS,
                'radial_components': RADIAL_COMPONENTS
            }
            result.update({f'coef_{k}': v for k, v in coefficients.items()})
            result.update({f'variance_{k}': v for k, v in variance_explained.items()})
            
            results.append(result)
            print(f"Processed {csv_file.name}: R² = {r2:.4f}")
            
        except Exception as e:
            print(f"Error processing {csv_file.name}: {str(e)}")
            continue
            
    return results

def process_all_files():

    csv_files = list(Path(DATA_DIR).glob("*.csv"))
    chunks = [csv_files[i:i + CHUNK_SIZE] for i in range(0, len(csv_files), CHUNK_SIZE)]
    
    all_results = []
    with concurrent.futures.ProcessPoolExecutor(max_workers=MAX_WORKERS) as executor:
        futures = [executor.submit(process_file_chunk, chunk) for chunk in chunks]
        
        for future in concurrent.futures.as_completed(futures):
            chunk_results = future.result()
            all_results.extend(chunk_results)
    
    if not all_results:
        raise ValueError("No files were processed successfully")
        
    return pd.DataFrame(all_results)

def calculate_combined_coefficients(results_df):

    coef_names = [col for col in results_df.columns if col.startswith('coef_')]
    
    # Calculate Mallows weights based on MSE (1 - R²)
    n_samples = len(results_df)
    mse = 1 - results_df['r2_score'].values  # Convert R² to MSE
    
    # Add small epsilon to prevent division by zero
    epsilon = 1e-10
    mallows_weights = 1 / ((mse + epsilon) * n_samples)
    
    # Normalize weights to sum to 1
    mallows_weights = mallows_weights / np.sum(mallows_weights)
    
    # Apply weights to coefficients using vectorized operations
    coef_values = np.array([results_df[name].values for name in coef_names]).T
    weighted_coefs = coef_values * mallows_weights[:, None]
    combined_values = np.sum(weighted_coefs, axis=0)
    
    # Create dictionary of combined coefficients
    return {name[5:]: value for name, value in zip(coef_names, combined_values)}

def test_combined_model(combined_coefficients):

    csv_files = list(Path(DATA_DIR).glob("*.csv"))
    test_results = []
    
    for csv_file in csv_files:
        try:
            df = pd.read_csv(csv_file)
            meridians = df['Meridian_Index'].unique()
            radial_indices = df['Radial_Index'].unique()
            meridians.sort()
            radial_indices.sort()
            
            meridian_lookup = {m: i for i, m in enumerate(meridians)}
            radial_lookup = {r: i for i, r in enumerate(radial_indices)}
            keratometry_matrix = np.zeros((len(radial_indices), len(meridians)))
            
            df_values = df[['Radial_Index', 'Meridian_Index', 'KR_scaled']].values
            for radial, meridian, value in df_values:
                keratometry_matrix[radial_lookup[radial], meridian_lookup[meridian]] = value
            
            reconstructed_values = reconstruct_signal(
                combined_coefficients, 
                meridians, 
                radial_indices, 
                MERIDIONAL_COMPONENTS,
                RADIAL_COMPONENTS
            )
            
            r2 = r2_score(keratometry_matrix.flatten(), reconstructed_values.flatten())
            
            test_results.append({
                'filename': csv_file.name,
                'r2_score': r2,
                'meridional_components': MERIDIONAL_COMPONENTS,
                'radial_components': RADIAL_COMPONENTS,
                'original_values_mean': np.mean(keratometry_matrix),
                'reconstructed_values_mean': np.mean(reconstructed_values),
                'absolute_error': np.mean(np.abs(keratometry_matrix - reconstructed_values))
            })
            
            print(f"Tested combined model on {csv_file.name}: R² = {r2:.4f}")
            
        except Exception as e:
            print(f"Error testing {csv_file.name}: {str(e)}")
            continue
    
    return pd.DataFrame(test_results)

def main():
    print(f"Starting optimized Fourier analysis using {MAX_WORKERS} processes...")
    print(f"Meridional components: {MERIDIONAL_COMPONENTS}")
    print(f"Radial components: {RADIAL_COMPONENTS}")
    
    try:
        results_df = process_all_files()
        combined_coefs = calculate_combined_coefficients(results_df)
        
        combined_row = {
            'filename': 'COMBINED_MODEL', 
            'r2_score': np.nan,
            'meridional_components': MERIDIONAL_COMPONENTS,
            'radial_components': RADIAL_COMPONENTS
        }
        combined_row.update({f'coef_{k}': v for k, v in combined_coefs.items()})
        combined_row.update({f'variance_{k}': np.nan for k in combined_coefs.keys()})
        
        results_df = pd.concat([results_df, pd.DataFrame([combined_row])], ignore_index=True)
        
        results_df.to_csv("fourier_analysis_results.csv", index=False)
        print("\nInitial results saved to: fourier_analysis_results.csv")
        
        print("\nTesting combined model on all files...")
        test_results_df = test_combined_model(combined_coefs)
        
        test_results_df.to_csv("combined_model_test_results.csv", index=False)
        print("\nTest results saved to: combined_model_test_results.csv")
        
        print(f"\nTraining Summary:")
        print(f"Total files processed: {len(results_df) - 1}")
        print(f"Average Training R² Score: {results_df['r2_score'].iloc[:-1].mean():.4f}")
        
        print(f"\nTesting Summary:")
        print(f"Total files tested: {len(test_results_df)}")
        print(f"Average Test R² Score: {test_results_df['r2_score'].mean():.4f}")
        print(f"Average Absolute Error: {test_results_df['absolute_error'].mean():.4f}")
        
    except Exception as e:
        print(f"Error during analysis: {str(e)}")

if __name__ == "__main__":
    main()

In [3]:
import numpy as np
import pandas as pd
from sklearn.metrics import r2_score
import os
from pathlib import Path
import concurrent.futures
from functools import lru_cache
import multiprocessing
import warnings
warnings.filterwarnings('ignore')

RADIAL_COMPONENTS = 9     
DATA_DIR = "/home/aricept094/mydata/sheets/conv/transformed/"
CHUNK_SIZE = 300  

MAX_WORKERS = max(1, int(multiprocessing.cpu_count() * 0.99))

@lru_cache(maxsize=None)
def calculate_trig_terms(r, n_points):
    """Calculate trigonometric terms for Fourier series."""
    angle = 2 * np.pi * r * np.arange(n_points) / n_points
    return np.cos(angle), np.sin(angle)

def reconstruct_signal(coefficients, radial_indices, max_radial):
    """Reconstruct the signal using 1D Fourier coefficients."""
    n_radial = len(radial_indices)
    reconstructed = np.full(n_radial, coefficients['a0'])
    
    # Vectorized computation for radial components
    for r in range(1, max_radial + 1):
        cos_r, sin_r = calculate_trig_terms(r, n_radial)
        ar_key, br_key = f'ar{r}', f'br{r}'
        if ar_key in coefficients and br_key in coefficients:
            reconstructed += (coefficients[ar_key] * cos_r + 
                            coefficients[br_key] * sin_r)
            
    return reconstructed

def calculate_1d_fourier_coefficients(radial_indices, keratometry_values, max_radial):
    """Calculate 1D Fourier coefficients for radial components."""
    n_radial = len(radial_indices)
    coefficients = {}
    energy = {}
    norm_factor = 2 / n_radial

    # DC component
    coefficients['a0'] = np.mean(keratometry_values)
    energy['a0'] = coefficients['a0']**2
    total_energy = energy['a0']
    
    # Vectorized radial components calculation
    for r in range(1, max_radial + 1):
        cos_r, sin_r = calculate_trig_terms(r, n_radial)
        
        ar = norm_factor * np.sum(keratometry_values * cos_r)
        br = norm_factor * np.sum(keratometry_values * sin_r)
        
        coefficients[f'ar{r}'] = ar
        coefficients[f'br{r}'] = br
        energy[f'ar{r}'] = ar**2
        energy[f'br{r}'] = br**2
        total_energy += ar**2 + br**2

    variance_explained = {k: (v / total_energy) * 100 for k, v in energy.items()}
    
    reconstructed_values = reconstruct_signal(coefficients, radial_indices, max_radial)
    r2 = r2_score(keratometry_values, reconstructed_values)

    return coefficients, variance_explained, r2, reconstructed_values

def perform_1d_fourier_analysis(df):
    """Perform 1D Fourier analysis on the radial data."""
    # Average across meridians for each radial index
    radial_means = df.groupby('Meridian_Index')['Axial_Anterior_Value'].mean()
    radial_indices = np.array(sorted(radial_means.index))
    keratometry_values = radial_means[radial_indices].values

    return calculate_1d_fourier_coefficients(
        radial_indices, keratometry_values, RADIAL_COMPONENTS
    )

def process_file_chunk(file_chunk):
    """Process a chunk of files for 1D Fourier analysis."""
    results = []
    for csv_file in file_chunk:
        try:
            df = pd.read_csv(csv_file)
            coefficients, variance_explained, r2, _ = perform_1d_fourier_analysis(df)
            
            result = {
                'filename': csv_file.name,
                'r2_score': r2,
                'radial_components': RADIAL_COMPONENTS
            }
            result.update({f'coef_{k}': v for k, v in coefficients.items()})
            result.update({f'variance_{k}': v for k, v in variance_explained.items()})
            
            results.append(result)
            print(f"Processed {csv_file.name}: R² = {r2:.4f}")
            
        except Exception as e:
            print(f"Error processing {csv_file.name}: {str(e)}")
            continue
            
    return results

def process_all_files():
    """Process all files in the directory."""
    csv_files = list(Path(DATA_DIR).glob("*.csv"))
    chunks = [csv_files[i:i + CHUNK_SIZE] for i in range(0, len(csv_files), CHUNK_SIZE)]
    
    all_results = []
    with concurrent.futures.ProcessPoolExecutor(max_workers=MAX_WORKERS) as executor:
        futures = [executor.submit(process_file_chunk, chunk) for chunk in chunks]
        
        for future in concurrent.futures.as_completed(futures):
            chunk_results = future.result()
            all_results.extend(chunk_results)
    
    if not all_results:
        raise ValueError("No files were processed successfully")
        
    return pd.DataFrame(all_results)

def calculate_combined_coefficients(results_df):
    """Calculate combined coefficients using Mallows weights."""
    coef_names = [col for col in results_df.columns if col.startswith('coef_')]
    
    # Calculate Mallows weights based on MSE (1 - R²)
    n_samples = len(results_df)
    mse = 1 - results_df['r2_score'].values
    
    # Add small epsilon to prevent division by zero
    epsilon = 1e-10
    mallows_weights = 1 / ((mse + epsilon) * n_samples)
    mallows_weights = mallows_weights / np.sum(mallows_weights)
    
    # Apply weights to coefficients
    coef_values = np.array([results_df[name].values for name in coef_names]).T
    weighted_coefs = coef_values * mallows_weights[:, None]
    combined_values = np.sum(weighted_coefs, axis=0)
    
    return {name[5:]: value for name, value in zip(coef_names, combined_values)}

def test_combined_model(combined_coefficients):
    """Test the combined model on all files."""
    csv_files = list(Path(DATA_DIR).glob("*.csv"))
    test_results = []
    
    for csv_file in csv_files:
        try:
            df = pd.read_csv(csv_file)
            radial_means = df.groupby('Meridian_Index')['Axial_Anterior_Value'].mean()
            radial_indices = np.array(sorted(radial_means.index))
            keratometry_values = radial_means[radial_indices].values
            
            reconstructed_values = reconstruct_signal(
                combined_coefficients, 
                radial_indices,
                RADIAL_COMPONENTS
            )
            
            r2 = r2_score(keratometry_values, reconstructed_values)
            
            test_results.append({
                'filename': csv_file.name,
                'r2_score': r2,
                'radial_components': RADIAL_COMPONENTS,
                'original_values_mean': np.mean(keratometry_values),
                'reconstructed_values_mean': np.mean(reconstructed_values),
                'absolute_error': np.mean(np.abs(keratometry_values - reconstructed_values))
            })
            
            print(f"Tested combined model on {csv_file.name}: R² = {r2:.4f}")
            
        except Exception as e:
            print(f"Error testing {csv_file.name}: {str(e)}")
            continue
    
    return pd.DataFrame(test_results)

def main():
    print(f"Starting optimized 1D Fourier analysis using {MAX_WORKERS} processes...")
    print(f"Radial components: {RADIAL_COMPONENTS}")
    
    try:
        results_df = process_all_files()
        combined_coefs = calculate_combined_coefficients(results_df)
        
        combined_row = {
            'filename': 'COMBINED_MODEL', 
            'r2_score': np.nan,
            'radial_components': RADIAL_COMPONENTS
        }
        combined_row.update({f'coef_{k}': v for k, v in combined_coefs.items()})
        combined_row.update({f'variance_{k}': np.nan for k in combined_coefs.keys()})
        
        results_df = pd.concat([results_df, pd.DataFrame([combined_row])], ignore_index=True)
        
        results_df.to_csv("fourier_analysis_1d_results.csv", index=False)
        print("\nInitial results saved to: fourier_analysis_1d_results.csv")
        
        print("\nTesting combined model on all files...")
        test_results_df = test_combined_model(combined_coefs)
        
        test_results_df.to_csv("combined_model_1d_test_results.csv", index=False)
        print("\nTest results saved to: combined_model_1d_test_results.csv")
        
        print(f"\nTraining Summary:")
        print(f"Total files processed: {len(results_df) - 1}")
        print(f"Average Training R² Score: {results_df['r2_score'].iloc[:-1].mean():.4f}")
        
        print(f"\nTesting Summary:")
        print(f"Total files tested: {len(test_results_df)}")
        print(f"Average Test R² Score: {test_results_df['r2_score'].mean():.4f}")
        print(f"Average Absolute Error: {test_results_df['absolute_error'].mean():.4f}")
        
    except Exception as e:
        print(f"Error during analysis: {str(e)}")

if __name__ == "__main__":
    main()

Starting optimized 1D Fourier analysis using 15 processes...
Radial components: 9
Processed Sheet421_transformed.csv: R² = 0.9591Processed Sheet50_transformed.csv: R² = 0.9701

Processed Sheet348_transformed.csv: R² = 0.9666Processed Sheet313_transformed.csv: R² = 0.9693

Processed Sheet210_transformed.csv: R² = 0.9640Processed Sheet4_transformed.csv: R² = 0.9688

Processed Sheet90_transformed.csv: R² = 0.9687Processed Sheet264_transformed.csv: R² = 0.9723

Processed Sheet340_transformed.csv: R² = 0.9687Processed Sheet451_transformed.csv: R² = 0.9674

Processed Sheet46_transformed.csv: R² = 0.9713Processed Sheet329_transformed.csv: R² = 0.9676

Processed Sheet182_transformed.csv: R² = 0.9633Processed Sheet372_transformed.csv: R² = 0.9701

Processed Sheet271_transformed.csv: R² = 0.9719Processed Sheet21_transformed.csv: R² = 0.9681

Processed Sheet393_transformed.csv: R² = 0.9672Processed Sheet66_transformed.csv: R² = 0.9719

Processed Sheet122_transformed.csv: R² = 0.9691Processed Shee

In [10]:
import numpy as np
import pandas as pd
from sklearn.metrics import r2_score
from pathlib import Path
import concurrent.futures
from functools import lru_cache
import multiprocessing
import warnings
warnings.filterwarnings('ignore')

# Constants
MAX_MERIDIAN_COMPONENTS = 10  # Increased maximum components to test     
BASE_DATA_DIR = Path("/home/aricept094/mydata/sheets/combined_data")
OUTPUT_DIR = Path("/home/aricept094/mydata/sheets/combined_data/radial_results/Height_Anterior_Value")
RADIAL_FOLDERS = [1, 4, 8, 12, 16, 20, 24,]
VARIANCE_THRESHOLD = 99.99  # Setting threshold close to 100% to account for numerical precision
MAX_WORKERS = max(1, int(multiprocessing.cpu_count() * 0.99))

# Rest of the functions remain the same up until main()
@lru_cache(maxsize=None)
def calculate_trig_terms(m, n_points):
    """Calculate trigonometric terms for Fourier series."""
    angle = 2 * np.pi * m * np.arange(n_points) / n_points
    return np.cos(angle), np.sin(angle)

def calculate_fourier_components(meridian_indices, keratometry_values):
    """Calculate Fourier components until variance explained reaches threshold."""
    n_meridian = len(meridian_indices)
    norm_factor = 2 / n_meridian
    
    # Initialize result dictionaries
    coefficients = {'a0': np.mean(keratometry_values)}  # DC component
    energy = {'a0': coefficients['a0']**2}
    amplitudes = {}
    component_coeffs = {}  # New dictionary for storing coefficients
    total_energy = energy['a0']
    
    m = 1
    cumulative_variance = 0
    higher_order_amplitude = 0
    
    while cumulative_variance < VARIANCE_THRESHOLD and m <= MAX_MERIDIAN_COMPONENTS:
        cos_m, sin_m = calculate_trig_terms(m, n_meridian)
        am = norm_factor * np.sum(keratometry_values * cos_m)
        bm = norm_factor * np.sum(keratometry_values * sin_m)
        
        current_energy = am**2 + bm**2
        energy.update({f'am{m}': am**2, f'bm{m}': bm**2})
        total_energy += current_energy
        
        # Calculate amplitude for current component
        current_amplitude = np.sqrt(am**2 + bm**2)
        
        # Store specific components and their coefficients
        if m <= 2:
            amplitudes[f'component_{m}_amplitude'] = current_amplitude
            component_coeffs[f'component_{m}_cos'] = am  # Store cosine coefficient
            component_coeffs[f'component_{m}_sin'] = bm  # Store sine coefficient
        else:
            higher_order_amplitude += current_amplitude
            
        m += 1
    
    # Calculate final metrics
    components_used = m - 1
    cumulative_variance = (total_energy / total_energy) * 100
    
    # Add higher order amplitude sum
    amplitudes['higher_order_amplitude_sum'] = higher_order_amplitude
    
    # Calculate reconstructed signal
    reconstructed = np.full(n_meridian, coefficients['a0'])
    for m in range(1, components_used + 1):
        cos_m, sin_m = calculate_trig_terms(m, n_meridian)
        if m <= 2:
            reconstructed += (component_coeffs[f'component_{m}_cos'] * cos_m + 
                            component_coeffs[f'component_{m}_sin'] * sin_m)
        else:
            # Use regular Fourier series for higher components
            am = norm_factor * np.sum(keratometry_values * cos_m)
            bm = norm_factor * np.sum(keratometry_values * sin_m)
            reconstructed += (am * cos_m + bm * sin_m)
    
    r2 = r2_score(keratometry_values, reconstructed)
    
    return {
        'dc_component': coefficients['a0'],
        'components_used': components_used,
        'total_variance_explained': cumulative_variance,
        'r2_score': r2,
        **amplitudes,
        **component_coeffs  # Include the coefficients in the output
    }

def process_file(file_path):
    """Process a single file and return its Fourier analysis results."""
    try:
        df = pd.read_csv(file_path)
        meridian_means = df.groupby('Meridian_Index')['Height_Anterior_Value'].mean()
        meridian_indices = np.array(sorted(meridian_means.index))
        keratometry_values = meridian_means[meridian_indices].values
        
        results = calculate_fourier_components(meridian_indices, keratometry_values)
        results['filename'] = file_path.name
        
        print(f"Processed {file_path.name}: R² = {results['r2_score']:.4f}, Components = {results['components_used']}")
        return results
    
    except Exception as e:
        print(f"Error processing {file_path.name}: {str(e)}")
        return None

def calculate_combined_model(results_df):
    """Calculate combined model using Mallows weights."""
    result_columns = [
        'dc_component', 
        'component_1_amplitude', 
        'component_2_amplitude',
        'component_1_cos',
        'component_1_sin',
        'component_2_cos',
        'component_2_sin', 
        'higher_order_amplitude_sum'
    ]
    
    mse = 1 - results_df['r2_score'].values
    weights = 1 / (mse + 1e-10)
    weights /= weights.sum()
    
    combined_values = {col: np.average(results_df[col], weights=weights) 
                      for col in result_columns}
    
    return combined_values

def process_radial_folder(radial_number):
    """Process all files in a specific radial folder."""
    data_dir = BASE_DATA_DIR / f"radial_{radial_number}"
    output_file = OUTPUT_DIR / f"analysis_results_radial_{radial_number}.csv"
    
    print(f"\nProcessing radial_{radial_number} folder...")
    
    try:
        # Process files in parallel
        csv_files = list(data_dir.glob("*.csv"))
        if not csv_files:
            print(f"No CSV files found in {data_dir}")
            return
            
        with concurrent.futures.ProcessPoolExecutor(max_workers=MAX_WORKERS) as executor:
            results = list(filter(None, executor.map(process_file, csv_files)))
        
        # Create results DataFrame and calculate combined model
        results_df = pd.DataFrame(results)
        combined_model = calculate_combined_model(results_df)
        
        # Add combined model to results
        combined_row = {
            'filename': 'COMBINED_MODEL',
            'r2_score': np.nan,
            'components_used': np.nan,
            'total_variance_explained': np.nan,
            **combined_model
        }
        results_df = pd.concat([results_df, pd.DataFrame([combined_row])], ignore_index=True)
        
        # Save results and print summary
        output_file.parent.mkdir(parents=True, exist_ok=True)
        results_df.to_csv(output_file, index=False)
        print(f"Results saved to: {output_file}")
        print(f"Analysis Summary for radial_{radial_number}:")
        print(f"Files processed: {len(results_df) - 1}")
        print(f"Average R² Score: {results_df['r2_score'].iloc[:-1].mean():.4f}")
        print(f"Average Components Used: {results_df['components_used'].iloc[:-1].mean():.1f}")
        
    except Exception as e:
        print(f"Error processing radial_{radial_number}: {str(e)}")

def main():
    print(f"Starting Fourier analysis using {MAX_WORKERS} processes...")
    print(f"Will process the following radial folders: {RADIAL_FOLDERS}")
    
    # Create output directory if it doesn't exist
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    
    # Process each radial folder
    for radial_number in RADIAL_FOLDERS:
        process_radial_folder(radial_number)
    
    print("\nAll radial folders have been processed.")

if __name__ == "__main__":
    main()

Starting Fourier analysis using 15 processes...
Will process the following radial folders: [1, 4, 8, 12, 16, 20, 24]

Processing radial_1 folder...
Processed Sheet124_combined.csv: R² = 0.9960, Components = 10Processed Sheet480_combined.csv: R² = 0.9951, Components = 10Processed Sheet201_combined.csv: R² = 0.9925, Components = 10


Processed Sheet460_combined.csv: R² = 0.9731, Components = 10Processed Sheet295_combined.csv: R² = 0.9799, Components = 10Processed Sheet404_combined.csv: R² = 0.9757, Components = 10Processed Sheet23_combined.csv: R² = 0.9539, Components = 10Processed Sheet262_combined.csv: R² = 0.9553, Components = 10Processed Sheet91_combined.csv: R² = 0.9924, Components = 10Processed Sheet132_combined.csv: R² = 0.9758, Components = 10
Processed Sheet412_combined.csv: R² = 0.9724, Components = 10


Processed Sheet431_combined.csv: R² = 0.9812, Components = 10

Processed Sheet373_combined.csv: R² = 0.9813, Components = 10Processed Sheet125_combined.csv: R² = 0.9522, Compon

In [5]:
import numpy as np
import pandas as pd
from sklearn.metrics import r2_score
from pathlib import Path
import concurrent.futures
from functools import lru_cache
import multiprocessing
import warnings
warnings.filterwarnings('ignore')

# Constants
MAX_MERIDIAN_COMPONENTS = 10     
DATA_DIR = Path("/home/aricept094/mydata/sheets/combined_data")
VARIANCE_THRESHOLD = 99.99
MAX_WORKERS = max(1, int(multiprocessing.cpu_count() * 0.99))
OUTPUT_FILE = "fourier_analysis_results.csv"

@lru_cache(maxsize=None)
def calculate_trig_terms(m, n_points):
    """Calculate trigonometric terms for Fourier series."""
    angle = 2 * np.pi * m * np.arange(n_points) / n_points
    return np.cos(angle), np.sin(angle)

def calculate_phase_angle(am, bm):
    """Calculate phase angle from Fourier coefficients."""
    return np.arctan2(bm, am)

def calculate_fourier_components(meridian_indices, keratometry_values):
    """Calculate Fourier components using amplitudes (cn) approach."""
    n_meridian = len(meridian_indices)
    norm_factor = 2 / n_meridian
    
    # Initialize result dictionaries
    dc_component = np.mean(keratometry_values)
    amplitudes = {'a0': dc_component}
    phases = {}
    total_energy = dc_component**2
    
    m = 1
    cumulative_variance = 0
    higher_order_amplitude = 0
    
    while cumulative_variance < VARIANCE_THRESHOLD and m <= MAX_MERIDIAN_COMPONENTS:
        cos_m, sin_m = calculate_trig_terms(m, n_meridian)
        am = norm_factor * np.sum(keratometry_values * cos_m)
        bm = norm_factor * np.sum(keratometry_values * sin_m)
        
        # Calculate amplitude (cn) and phase
        cn = np.sqrt(am**2 + bm**2)
        phase = calculate_phase_angle(am, bm)
        
        current_energy = cn**2
        total_energy += current_energy
        
        # Store specific components
        if m <= 2:
            amplitudes[f'component_{m}_amplitude'] = cn
            phases[f'component_{m}_phase'] = phase
        else:
            higher_order_amplitude += cn
            
        m += 1
    
    # Calculate final metrics
    components_used = m - 1
    cumulative_variance = (total_energy / total_energy) * 100
    
    # Add higher order amplitude sum
    amplitudes['higher_order_amplitude_sum'] = higher_order_amplitude
    
    # Calculate reconstructed signal using amplitudes and phases
    reconstructed = np.full(n_meridian, dc_component)
    for m in range(1, components_used + 1):
        angle = 2 * np.pi * m * np.arange(n_meridian) / n_meridian
        if m <= 2:
            cn = amplitudes[f'component_{m}_amplitude']
            phase = phases[f'component_{m}_phase']
            reconstructed += cn * np.cos(angle - phase)
        else:
            cos_m, sin_m = calculate_trig_terms(m, n_meridian)
            am = norm_factor * np.sum(keratometry_values * cos_m)
            bm = norm_factor * np.sum(keratometry_values * sin_m)
            cn = np.sqrt(am**2 + bm**2)
            phase = calculate_phase_angle(am, bm)
            reconstructed += cn * np.cos(angle - phase)
    
    r2 = r2_score(keratometry_values, reconstructed)
    
    return {
        'dc_component': dc_component,
        'components_used': components_used,
        'total_variance_explained': cumulative_variance,
        'r2_score': r2,
        **amplitudes,
        **phases
    }

def process_file(file_path):
    """Process a single file and return its Fourier analysis results."""
    try:
        df = pd.read_csv(file_path)
        meridian_means = df.groupby('Meridian_Index')['Axial_Anterior_Value'].mean()
        meridian_indices = np.array(sorted(meridian_means.index))
        keratometry_values = meridian_means[meridian_indices].values
        
        results = calculate_fourier_components(meridian_indices, keratometry_values)
        results['filename'] = file_path.name
        
        print(f"Processed {file_path.name}: R² = {results['r2_score']:.4f}, Components = {results['components_used']}")
        return results
    
    except Exception as e:
        print(f"Error processing {file_path.name}: {str(e)}")
        return None

def calculate_combined_model(results_df):
    """Calculate combined model using Mallows weights with amplitudes."""
    result_columns = [
        'dc_component', 
        'component_1_amplitude', 
        'component_2_amplitude',
        'component_1_phase',
        'component_2_phase',
        'higher_order_amplitude_sum'
    ]
    
    mse = 1 - results_df['r2_score'].values
    weights = 1 / (mse + 1e-10)
    weights /= weights.sum()
    
    combined_values = {col: np.average(results_df[col], weights=weights) 
                      for col in result_columns}
    
    return combined_values

def reconstruct_signal_from_model(model_params, meridian_indices):
    """Reconstruct signal using model parameters."""
    n_meridian = len(meridian_indices)
    reconstructed = np.full(n_meridian, model_params['dc_component'])
    
    for m in range(1, 3):  # Using components 1 and 2
        angle = 2 * np.pi * m * np.arange(n_meridian) / n_meridian
        cn = model_params[f'component_{m}_amplitude']
        phase = model_params[f'component_{m}_phase']
        reconstructed += cn * np.cos(angle - phase)
    
    return reconstructed

def evaluate_combined_model(combined_model, original_data_files):
    """Test combined model against each original case and calculate R² scores."""
    r2_scores = []
    detailed_results = []
    
    print("\nEvaluating Combined Model Performance:")
    print("-" * 40)
    
    for file_path in original_data_files:
        try:
            # Read original data
            df = pd.read_csv(file_path)
            meridian_means = df.groupby('Meridian_Index')['Axial_Anterior_Value'].mean()
            meridian_indices = np.array(sorted(meridian_means.index))
            original_values = meridian_means[meridian_indices].values
            
            # Reconstruct signal using combined model
            reconstructed = reconstruct_signal_from_model(combined_model, meridian_indices)
            
            # Calculate R² score
            r2 = r2_score(original_values, reconstructed)
            r2_scores.append(r2)
            
            # Store detailed results
            detailed_results.append({
                'filename': file_path.name,
                'r2_score': r2,
                'original_values': original_values.tolist(),
                'reconstructed_values': reconstructed.tolist()
            })
            
            print(f"File: {file_path.name:<30} R² = {r2:.4f}")
            
        except Exception as e:
            print(f"Error evaluating {file_path.name}: {str(e)}")
    
    mean_r2 = np.mean(r2_scores)
    std_r2 = np.std(r2_scores)
    
    print("\nCombined Model Performance Summary:")
    print(f"Average R² Score: {mean_r2:.4f} ± {std_r2:.4f}")
    print(f"Min R² Score: {min(r2_scores):.4f}")
    print(f"Max R² Score: {max(r2_scores):.4f}")
    
    # Save detailed results to a file
    detailed_df = pd.DataFrame(detailed_results)
    detailed_df.to_csv("combined_model_detailed_results.csv", index=False)
    
    return {
        'mean_r2': mean_r2,
        'std_r2': std_r2,
        'min_r2': min(r2_scores),
        'max_r2': max(r2_scores),
        'individual_scores': r2_scores,
        'detailed_results': detailed_results
    }

def main():
    print(f"Starting Fourier analysis using {MAX_WORKERS} processes...")
    
    try:
        # Process files in parallel
        csv_files = list(DATA_DIR.glob("*.csv"))
        with concurrent.futures.ProcessPoolExecutor(max_workers=MAX_WORKERS) as executor:
            results = list(filter(None, executor.map(process_file, csv_files)))
        
        # Create results DataFrame and calculate combined model
        results_df = pd.DataFrame(results)
        combined_model = calculate_combined_model(results_df)
        
        # Add combined model to results
        combined_row = {
            'filename': 'COMBINED_MODEL',
            'r2_score': np.nan,
            'components_used': np.nan,
            'total_variance_explained': np.nan,
            **combined_model
        }
        results_df = pd.concat([results_df, pd.DataFrame([combined_row])], ignore_index=True)
        
        # Save initial results
        results_df.to_csv(OUTPUT_FILE, index=False)
        print(f"\nResults saved to: {OUTPUT_FILE}")
        
        # Print initial analysis summary
        print(f"\nInitial Analysis Summary:")
        print(f"Files processed: {len(results_df) - 1}")
        print(f"Average R² Score: {results_df['r2_score'].iloc[:-1].mean():.4f}")
        print(f"Average Components Used: {results_df['components_used'].iloc[:-1].mean():.1f}")
        
        # Evaluate combined model
        combined_model_performance = evaluate_combined_model(combined_model, csv_files)
        
        # Save combined model performance metrics
        performance_df = pd.DataFrame({
            'metric': ['mean_r2', 'std_r2', 'min_r2', 'max_r2'],
            'value': [
                combined_model_performance['mean_r2'],
                combined_model_performance['std_r2'],
                combined_model_performance['min_r2'],
                combined_model_performance['max_r2']
            ]
        })
        performance_df.to_csv("combined_model_performance.csv", index=False)
        print(f"\nCombined model performance metrics saved to: combined_model_performance.csv")
        print(f"Detailed results saved to: combined_model_detailed_results.csv")
        
    except Exception as e:
        print(f"Error during analysis: {str(e)}")

if __name__ == "__main__":
    main()

Starting Fourier analysis using 15 processes...
Processed Sheet312_combined.csv: R² = 0.9993, Components = 10Processed Sheet132_combined.csv: R² = 0.9906, Components = 10Processed Sheet423_combined.csv: R² = 0.9986, Components = 10Processed Sheet295_combined.csv: R² = 0.9943, Components = 10

Processed Sheet125_combined.csv: R² = 0.9937, Components = 10


Processed Sheet412_combined.csv: R² = 0.9989, Components = 10
Processed Sheet431_combined.csv: R² = 0.9988, Components = 10Processed Sheet480_combined.csv: R² = 0.9965, Components = 10
Processed Sheet466_combined.csv: R² = 0.9990, Components = 10

Processed Sheet404_combined.csv: R² = 0.9984, Components = 10Processed Sheet262_combined.csv: R² = 0.9976, Components = 10Processed Sheet124_combined.csv: R² = 0.9809, Components = 10


Processed Sheet373_combined.csv: R² = 0.9965, Components = 10Processed Sheet74_combined.csv: R² = 0.9946, Components = 10Processed Sheet201_combined.csv: R² = 0.9929, Components = 10
Processed Sheet23_combine