In [1]:
import pandas as pd
import os

# Define the list of metric names
metric_names = ['bloodbiomk1', 'bloodbiomk2', 'csfbiomk1', 'csfbiomk2','csfbiomk3']  

# Define the feature to look for
feature = 'pcltot'

# feature = 'abis_attention'  # Example value
# pcltot  # nbd_tot  # cnstot  # bistot  # bhstot  # BGLHA_Childhood_Total  # BGLHA_Adulthood_Total  # BGLHA_Adolescence_Total
# bditot  # bdhi_total  # baitot   # abis_nonplanning  # abis_motor  # abis_attention


# Initialize a list to store results
results = []

# Loop through each metric name
for metricName in metric_names:
    # Construct the file path using the metric name
    file_path = f"Z:\\Active-Diagnose_CTE\\Fargol_Analysis\\Neuropsychiatric_from_Biomarkers\\neuropsych_from_{metricName}.xlsx"

    # Check if the file exists
    if not os.path.exists(file_path):
        print(f"File not found at {file_path}")
        continue

    max_value = None
    max_name = None

    # Load the Excel file with all sheets
    with pd.ExcelFile(file_path) as xls:
        for sheet_name in xls.sheet_names:
            # Load the specific sheet
            df = pd.read_excel(xls, sheet_name=sheet_name)

            # Check if the sheet has enough columns and rows
            if df.shape[1] >= 5:
                # Filter rows where the second column matches the feature
                filtered_rows = df[df.iloc[:, 1] == feature]

                if not filtered_rows.empty:
                    # Find the max value in the fifth column among filtered rows
                    row_max_value = filtered_rows.iloc[:, 4].max()

                    # Extract the max value and corresponding name in the first column
                    max_row = filtered_rows[filtered_rows.iloc[:, 4] == row_max_value]
                    row_name = max_row.iloc[0, 0]

                    # Update the overall max if this is the highest found
                    if max_value is None or row_max_value > max_value:
                        max_value = row_max_value
                        max_name = row_name

    # Append the result for the current metric name
    results.append({
        "Metric Name": metricName,
        "Max Value": max_value,
        "Corresponding Name": max_name
    })

# Convert results to a DataFrame and display
results_df = pd.DataFrame(results)
print(results_df.to_string(index=False))  # Display in a table format suitable for copying and pasting

Metric Name  Max Value         Corresponding Name
bloodbiomk1   0.310183 Gradient Boosting Machines
bloodbiomk2   0.060678   Random Forest Regression
  csfbiomk1   0.150565   Random Forest Regression
  csfbiomk2   0.102489 Gradient Boosting Machines
  csfbiomk3   0.141500                    XGBoost


# Feature Importance Results

In [2]:
import pandas as pd
import os

# feature = 'abis_attention'  # Example value
# pcltot  # nbd_tot  # cnstot  # bistot  # bhstot  # BGLHA_Childhood_Total  # BGLHA_Adulthood_Total  # BGLHA_Adolescence_Total
# bditot  # bdhi_total  # baitot   # abis_nonplanning  # abis_motor  # abis_attention

# # Construct the file path using the variables
# file_path = f"Z:\\Active-Diagnose_CTE\\Fargol_Analysis\\Feature_importance_for_neuropsych_from_{metric}\\performance_{metricName}_w_top_features_{feature}.xlsx"


def find_max_in_excel(file_path, metric, metricName):
    if not os.path.exists(file_path):
        print(f"File not found at {file_path}")
        return
    
    max_value = None
    max_sheet = None

    # Load the Excel file with all sheets
    with pd.ExcelFile(file_path) as xls:
        for sheet_name in xls.sheet_names:
            try:
                # Load the specific sheet
                df = pd.read_excel(xls, sheet_name=sheet_name)

                # Check if the sheet has enough columns and rows
                if df.shape[1] >= 6 and df.shape[0] >= 3:
                    # Get the sixth column starting from row 3 (adjust index)
                    col_values = df.iloc[2:, 5]  # Zero-indexed: sixth column is 5

                    # Find the max value in this column
                    sheet_max = col_values.max()

                    # Update max_value and max_sheet if this sheet has a new max
                    if max_value is None or sheet_max > max_value:
                        max_value = sheet_max
                        max_sheet = sheet_name
            except Exception as e:
                print(f"Error processing sheet '{sheet_name}': {e}")

    if max_sheet:
        print(f"Metric '{metricName}': The highest max value found in column 6 (from row 3 onward) is {max_value} in sheet: {max_sheet}")
    else:
        print(f"Metric '{metricName}': No valid data found in the specified column and row range.")

# Define metrics and file paths
metrics = [
    {'metric': 'biomk1', 'metricName': 'bloodbiomk1'},
    {'metric': 'biomk2', 'metricName': 'bloodbiomk2'},
    {'metric': 'csfbiomk1', 'metricName': 'csfbiomk1'},
    {'metric': 'csfbiomk2', 'metricName': 'csfbiomk2'},
    {'metric': 'csfbiomk3', 'metricName': 'csfbiomk3'}
]

# Iterate over each metric and find the max value
for metric_data in metrics:
    metric = metric_data['metric']
    metricName = metric_data['metricName']
    file_path = f"Z:\\Active-Diagnose_CTE\\Fargol_Analysis\\Feature_Importance_for_neuropsych_from_{metric}\\performance_{metricName}_w_top_features_{feature}.xlsx"
    find_max_in_excel(file_path, metric, metricName)


Metric 'bloodbiomk1': The highest max value found in column 6 (from row 3 onward) is 0.1309130364715684 in sheet: Random Forest
Metric 'bloodbiomk2': The highest max value found in column 6 (from row 3 onward) is 0.1890441482463563 in sheet: XGBoost
Metric 'csfbiomk1': The highest max value found in column 6 (from row 3 onward) is 0.1776370695509103 in sheet: Random Forest
Metric 'csfbiomk2': The highest max value found in column 6 (from row 3 onward) is 0.06012105009762247 in sheet: KNeighbors Regressor
Metric 'csfbiomk3': The highest max value found in column 6 (from row 3 onward) is 0.1747980534339347 in sheet: Gradient Boosting Machines


In [3]:
feature

'pcltot'

In [20]:
import pandas as pd
import os

# Set the variables for biomk1 and feature
metric = 'BloodCSF_biomk'  # Example value
metricName = 'BloodCSF_biomk'

feature = 'pcltot'  

# pcltot  # nbd_tot  # cnstot  # bistot  # bhstot  # BGLHA_Childhood_Total  # BGLHA_Adulthood_Total  # BGLHA_Adolescence_Total
# bditot  # bdhi_total  # baitot   # abis_nonplanning  # abis_motor  # abis_attention

# Construct the file path using the variables
file_path = f"Z:\\Active-Diagnose_CTE\\Fargol_Analysis\\Feature_importance_for_neuropsych_from_{metric}\\performance_{metricName}_w_top_features_{feature}.xlsx"


if not os.path.exists(file_path):
    print(f"File not found at {file_path}")
else:
    max_value = None
    max_sheet = None

    # Load the Excel file with all sheets
    with pd.ExcelFile(file_path) as xls:
        for sheet_name in xls.sheet_names:
            # Load the specific sheet
            df = pd.read_excel(xls, sheet_name=sheet_name)

            # Check if the sheet has enough columns and rows
            if df.shape[1] >= 6 and df.shape[0] >= 3:
                # Get the sixth column starting from row 3 (adjust index)
                col_values = df.iloc[2:, 5]  # Zero-indexed: sixth column is 5

                # Find the max value in this column
                sheet_max = col_values.max()

                # Update max_value and max_sheet if this sheet has a new max
                if max_value is None or sheet_max > max_value:
                    max_value = sheet_max
                    max_sheet = sheet_name

    if max_sheet:
        print(f"The highest max value found in column 6 (from row 3 onward) is {max_value} in sheet: {max_sheet}")
    else:
        print("No valid data found in the specified column and row range.")


The highest max value found in column 6 (from row 3 onward) is 0.2699776592054767 in sheet: XGBoost


In [21]:
import pandas as pd
import os

feature = 'pcltot'  

# pcltot  # nbd_tot  # cnstot  # bistot  # bhstot  # BGLHA_Childhood_Total  # BGLHA_Adulthood_Total  # BGLHA_Adolescence_Total
# bditot  # bdhi_total  # baitot   # abis_nonplanning  # abis_motor  # abis_attention

# Construct the file path using the variables
file_path = f"Z:\\Active-Diagnose_CTE\\Fargol_Analysis\\Final_results\\neuropsych_from_all_biomarkers_afterFE_trial1_{feature}.xlsx"

# Check if the file exists
if not os.path.exists(file_path):
    print(f"File not found at {file_path}")
else:
    max_value = None
    max_name = None

    # Load the Excel file with all sheets
    with pd.ExcelFile(file_path) as xls:
        for sheet_name in xls.sheet_names:
            # Load the specific sheet
            df = pd.read_excel(xls, sheet_name=sheet_name)

            # Check if the sheet has enough columns and rows
            if df.shape[1] >= 5:
                # Filter rows where the second column matches the feature
                filtered_rows = df[df.iloc[:, 1] == feature]

                if not filtered_rows.empty:
                    # Find the max value in the fifth column among filtered rows
                    row_max_value = filtered_rows.iloc[:, 4].max()

                    # Extract the max value and corresponding name in the first column
                    max_row = filtered_rows[filtered_rows.iloc[:, 4] == row_max_value]
                    row_name = max_row.iloc[0, 0]

                    # Update the overall max if this is the highest found
                    if max_value is None or row_max_value > max_value:
                        max_value = row_max_value
                        max_name = row_name

    # Print the result
    if max_name:
        print(f"The maximum value found in column 5 for feature '{feature}' is {max_value}, corresponding to the name '{max_name}' in column 1.")
    else:
        print(f"No entries found for feature '{feature}' in the specified column.")

The maximum value found in column 5 for feature 'workmem' is 0.2006906066833458, corresponding to the name 'Random Forest Regression' in column 1.


In [6]:
# filtered_rows
# # max_row