In [51]:
import pandas as pd
import os

# Define the list of metric names
metric_names = ['lh_aparc_area', 'rh_aparc_area', 'lh_aparc_meancurv', 'rh_aparc_meancurv',
                'lh_aparc_thickness', 'rh_aparc_thickness', 'lh_aparc_volume', 'rh_aparc_volume', 'subcortical_vol']  

# Define the feature to look for
feature = 'orgmat'

# Initialize a list to store results
results = []

# Loop through each metric name
for metricName in metric_names:
    # Construct the file path using the metric name
    file_path = f"Z:\\Active-Diagnose_CTE\\Fargol_Analysis\\Neuropsychiatric_from_MRI_Metrics\\MRI_Metrics_to_briefa\\briefa_from_{metricName}.xlsx"

    # Check if the file exists
    if not os.path.exists(file_path):
        print(f"File not found at {file_path}")
        continue

    max_value = None
    max_name = None

    # Load the Excel file with all sheets
    with pd.ExcelFile(file_path) as xls:
        for sheet_name in xls.sheet_names:
            # Load the specific sheet
            df = pd.read_excel(xls, sheet_name=sheet_name)

            # Check if the sheet has enough columns and rows
            if df.shape[1] >= 5:
                # Filter rows where the second column matches the feature
                filtered_rows = df[df.iloc[:, 1] == feature]

                if not filtered_rows.empty:
                    # Find the max value in the fifth column among filtered rows
                    row_max_value = filtered_rows.iloc[:, 4].max()

                    # Extract the max value and corresponding name in the first column
                    max_row = filtered_rows[filtered_rows.iloc[:, 4] == row_max_value]
                    row_name = max_row.iloc[0, 0]

                    # Update the overall max if this is the highest found
                    if max_value is None or row_max_value > max_value:
                        max_value = row_max_value
                        max_name = row_name

    # Append the result for the current metric name
    results.append({
        "Metric Name": metricName,
        "Max Value": max_value,
        "Corresponding Name": max_name
    })

# Convert results to a DataFrame and display
results_df = pd.DataFrame(results)
print(results_df.to_string(index=False))  # Display in a table format suitable for copying and pasting


       Metric Name  Max Value        Corresponding Name
     lh_aparc_area   0.014523         Ensemble-Stacking
     rh_aparc_area   0.092767  Random Forest Regression
 lh_aparc_meancurv   0.179322                   XGBoost
 rh_aparc_meancurv   0.108423         Ensemble-Stacking
lh_aparc_thickness   0.097255 Baesian Linear Regression
rh_aparc_thickness   0.128874          Ridge Regression
   lh_aparc_volume   0.027836  Random Forest Regression
   rh_aparc_volume   0.091345  Random Forest Regression
   subcortical_vol   0.011915         Ensemble-Stacking


In [52]:
import pandas as pd
import os

# Set the variables for biomk1 and feature
metric = 'MRI_Metrics'  # Example value
metricName = 'MRI_Metrics'

feature = 'orgmat'  # Example value

# Construct the file path using the variables
file_path = f"Z:\\Active-Diagnose_CTE\\Fargol_Analysis\\Feature_importance_for_briefa_from_{metric}\\performance_{metricName}_w_top_features_{feature}.xlsx"


if not os.path.exists(file_path):
    print(f"File not found at {file_path}")
else:
    max_value = None
    max_sheet = None

    # Load the Excel file with all sheets
    with pd.ExcelFile(file_path) as xls:
        for sheet_name in xls.sheet_names:
            # Load the specific sheet
            df = pd.read_excel(xls, sheet_name=sheet_name)

            # Check if the sheet has enough columns and rows
            if df.shape[1] >= 6 and df.shape[0] >= 3:
                # Get the sixth column starting from row 3 (adjust index)
                col_values = df.iloc[2:, 5]  # Zero-indexed: sixth column is 5

                # Find the max value in this column
                sheet_max = col_values.max()

                # Update max_value and max_sheet if this sheet has a new max
                if max_value is None or sheet_max > max_value:
                    max_value = sheet_max
                    max_sheet = sheet_name

    if max_sheet:
        print(f"The highest max value found in column 6 (from row 3 onward) is {max_value} in sheet: {max_sheet}")
    else:
        print("No valid data found in the specified column and row range.")


The highest max value found in column 6 (from row 3 onward) is 0.1357147484260052 in sheet: Random Forest


In [55]:
import pandas as pd
import os

# Set the variables for biomk1 and feature
feature = 'orgmat'  # Example value

# Construct the file path using the variables
file_path = f"Z:\\Active-Diagnose_CTE\\Fargol_Analysis\\Final_results\\briefa_from_MRI_Metrics_afterFE_trial1_{feature}.xlsx"

# Check if the file exists
if not os.path.exists(file_path):
    print(f"File not found at {file_path}")
else:
    max_value = None
    max_name = None

    # Load the Excel file with all sheets
    with pd.ExcelFile(file_path) as xls:
        for sheet_name in xls.sheet_names:
            # Load the specific sheet
            df = pd.read_excel(xls, sheet_name=sheet_name)

            # Check if the sheet has enough columns and rows
            if df.shape[1] >= 5:
                # Filter rows where the second column matches the feature
                filtered_rows = df[df.iloc[:, 1] == feature]

                if not filtered_rows.empty:
                    # Find the max value in the fifth column among filtered rows
                    row_max_value = filtered_rows.iloc[:, 4].max()

                    # Extract the max value and corresponding name in the first column
                    max_row = filtered_rows[filtered_rows.iloc[:, 4] == row_max_value]
                    row_name = max_row.iloc[0, 0]

                    # Update the overall max if this is the highest found
                    if max_value is None or row_max_value > max_value:
                        max_value = row_max_value
                        max_name = row_name

    # Print the result
    if max_name:
        print(f"The maximum value found in column 5 for feature '{feature}' is {max_value}, corresponding to the name '{max_name}' in column 1.")
    else:
        print(f"No entries found for feature '{feature}' in the specified column.")

The maximum value found in column 5 for feature 'orgmat' is 0.1492148191032945, corresponding to the name 'XGBoost' in column 1.


In [42]:
# filtered_rows
# # max_row

In [None]:
## We didn't do feature importance on each of the MRI Metrics, so we do not need this
# import pandas as pd
# import os

# # Set the variables for biomk1 and feature
# # metric = 'biomk1'  # Example value
# # metric = 'biomk2'  # Example value
# # metric = 'csfbiomk1'  # Example value
# # metric = 'csfbiomk2'  # Example value
# metric = 'csfbiomk3'  # Example value

# # metricName = 'bloodbiomk1'  # Example value
# # metricName = 'bloodbiomk2'  # Example value
# # metricName = 'csfbiomk1'  # Example value
# # metricName = 'csfbiomk2'  # Example value
# metricName = 'csfbiomk3'  # Example value

# feature = 'bri'  # Example value

# # Construct the file path using the variables
# file_path = f"Z:\\Active-Diagnose_CTE\\Fargol_Analysis\\Feature_importance_for_briefa_from_{metric}\\performance_{metricName}_w_top_features_{feature}.xlsx"


# if not os.path.exists(file_path):
#     print(f"File not found at {file_path}")
# else:
#     max_value = None
#     max_sheet = None

#     # Load the Excel file with all sheets
#     with pd.ExcelFile(file_path) as xls:
#         for sheet_name in xls.sheet_names:
#             # Load the specific sheet
#             df = pd.read_excel(xls, sheet_name=sheet_name)

#             # Check if the sheet has enough columns and rows
#             if df.shape[1] >= 6 and df.shape[0] >= 3:
#                 # Get the sixth column starting from row 3 (adjust index)
#                 col_values = df.iloc[2:, 5]  # Zero-indexed: sixth column is 5

#                 # Find the max value in this column
#                 sheet_max = col_values.max()

#                 # Update max_value and max_sheet if this sheet has a new max
#                 if max_value is None or sheet_max > max_value:
#                     max_value = sheet_max
#                     max_sheet = sheet_name

#     if max_sheet:
#         print(f"The highest max value found in column 6 (from row 3 onward) is {max_value} in sheet: {max_sheet}")
#     else:
#         print("No valid data found in the specified column and row range.")
