In [None]:
import pandas as pd
import os
import glob
import numpy as np
from scipy import stats
from sklearn.metrics import mean_absolute_error, mean_squared_error
from scipy.stats import ttest_ind
import itertools
from scipy.stats import f_oneway
import seaborn as sns
import matplotlib.pyplot as plt
import re

from sympy.abc import alpha

In [None]:
# def get_latest_csv(directory):
#     list_of_files = glob.glob(os.path.join(directory, '*.csv'))
#     if not list_of_files:
#         raise FileNotFoundError("No CSV files found in the directory.")
#     latest_file = max(list_of_files, key=os.path.getmtime)
#     print(f"Using file: {latest_file}")
#     return latest_file

In [None]:
# latest_csv_path = get_latest_csv("llm_results")
# df = pd.read_csv(latest_csv_path)

In [None]:
def get_all_csv(directory):
    # Exclude files containing 'gpt4o' in their name
    list_of_files = [
        f for f in glob.glob(os.path.join(directory, '*.csv'))
        if 'gpt4o' not in os.path.basename(f) and (os.path.basename(f).startswith("evaluations_lift_"))]
    if not list_of_files:
        raise FileNotFoundError("No CSV files found in the directory (excluding gpt4o).")
    return list_of_files

def get_avg_result_accross_csv(list_of_files):
    df_list = []
    for file in list_of_files:
        # Extract timestamp using regex
        df_temp = pd.read_csv(file)
        df_temp['timestamp'] = file.split("/")[-1].split("_")[2]
        df_list.append(df_temp)
    combined_df = pd.concat(df_list, ignore_index=True)

    # Compute averages per hypothesis_id and model
    avg_df = combined_df.groupby(['hypothesis_id', 'model'])[['novelty', 'plausibility']].mean().reset_index()
    avg_df.rename(columns={'novelty': 'average_novelty', 'plausibility': 'average_plausibility'}, inplace=True)

    # Calculate std for novelty and plausibility per hypothesis_id and model
    std_df = combined_df.groupby(['hypothesis_id', 'model'])[['novelty', 'plausibility']].std().reset_index()
    std_df.rename(columns={'novelty': 'std_novelty', 'plausibility': 'std_plausibility'}, inplace=True)

    # Merge only the new average columns to keep all original columns
    combined_df = combined_df.merge(
        avg_df[['hypothesis_id', 'model', 'average_novelty', 'average_plausibility']],
        on=['hypothesis_id', 'model'],
        how='left'
    )

    # Merge std values back into the main DataFrame
    combined_df = combined_df.merge(
        std_df,
        on=['hypothesis_id', 'model'],
        how='left'
    )
    return combined_df

# Usage
list_of_files = get_all_csv("llm_results")
df = get_avg_result_accross_csv(list_of_files)
df["timestamp"] = pd.to_datetime(df["timestamp"])
df.sort_values(["hypothesis_id", "timestamp", "model"], inplace=True)
# df = df[["hypothesis_id", "model", "average_novelty", "average_plausibility",]]
df.drop_duplicates(inplace=True)
df

In [None]:
model_name_mapping = {
        'openai:o4-mini': 'OpenAI o4-mini',
        'openai:o3-mini': 'OpenAI o3-mini',
        'anthropic:claude-3-7-sonnet-latest': 'Claude 3.7 Sonnet'
    }
df['model'] = df['model'].map(model_name_mapping)

In [None]:
import numpy as np
import pandas as pd

# Use the same number of rows as your original df
random_df = df[['hypothesis_id']].copy()
random_df['model'] = 'Random'
random_df['novelty'] = np.random.uniform(0, 10, size=len(random_df))
random_df['plausibility'] = np.random.uniform(0, 10, size=len(random_df))

# Concatenate with original df
df_with_random = pd.concat([df[['hypothesis_id', 'model', 'novelty', 'plausibility']], random_df], ignore_index=True)

In [None]:
stds = (
    df_with_random.groupby(['hypothesis_id', 'model'])[['novelty', 'plausibility']]
    .std()
    .reset_index()
    .melt(id_vars=['hypothesis_id', 'model'], value_vars=['novelty', 'plausibility'],
          var_name='score_type', value_name='std')
)

# Plot
plt.figure(figsize=(12, 6))
sns.boxplot(data=stds, x='model', y='std', hue='score_type', palette='pastel', showfliers=False)
# plt.title("Model Consistency Across Runs (Lower STD = More Consistent)")
plt.ylabel("Standard Deviation Across Runs", fontsize=14)
plt.xlabel("Model", fontsize=14)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.ylim(0, 10)
plt.legend(fontsize=14, title_fontsize='15')
plt.tight_layout()
plt.show()

In [None]:
avg_std = df.groupby('model')[['std_novelty', 'std_plausibility']].mean().reset_index()
# avg_std['avg_std'] = avg_std[['std_novelty', 'std_plausibility']].mean(axis=1)

avg_std["std_novelty"] = avg_std["std_novelty"].round(2)
avg_std["std_plausibility"] = avg_std["std_plausibility"].round(2)
print(avg_std)

In [None]:
combine_hyp = pd.read_csv('models_hypotheses/combined_hypotheses_lift_only.csv')
combine_hyp.rename(columns={'hypo_id': 'hypothesis_id'}, inplace=True)

In [None]:
# Make sure both columns are the same type (e.g., convert both to string or both to int)
df['hypothesis_id'] = df['hypothesis_id'].astype(str)
combine_hyp['hypothesis_id'] = combine_hyp['hypothesis_id'].astype(str)

# Now you can safely join
df = df.join(combine_hyp.set_index('hypothesis_id'), on='hypothesis_id')


In [None]:
# Create dummies from Consequence
dummy_vars = df['Consequence'].str.split(',').explode().str.get_dummies().groupby(level=0).sum()

for col in dummy_vars.columns:
    if col in df.columns:
        # Update only where the new dummy is 1
        df.loc[dummy_vars[col] == 1, col] = 1
    else:
        df[col] = dummy_vars[col]

df.drop('Consequence', axis=1, inplace=True)

In [None]:
df.sort_values(by=['novelty', 'plausibility'], ascending=[False, False], inplace=True)
df.drop(columns=['novelty', 'plausibility', "timestamp"], inplace=True)
df.rename(columns={'average_novelty': 'novelty', 'average_plausibility': 'plausibility'}, inplace=True)
df.drop_duplicates(inplace=True)
df

In [None]:
novelty_matrix = df.pivot(index=['hypothesis_id', 'hypo_factors', 'cancer_type'], columns='model', values='novelty')
plausibility_matrix = df.pivot(index='hypothesis_id', columns='model', values='plausibility')

In [None]:
novelty_matrix

In [None]:
df.groupby(['hypothesis_id']).agg({
    'novelty': ['mean', 'std'],
    'plausibility': ['mean', 'std']
}).reset_index()

In [None]:
df_pivot = df.pivot(index='hypothesis_id', columns='model', values='novelty')

# Compute standard deviation per sample (variation between models)
df_pivot['std'] = df_pivot.std(axis=1)
df_pivot['range'] = df_pivot.max(axis=1) - df_pivot.min(axis=1)

# Compute pairwise MAE and RMSE between models
def compare_models(col1, col2):
    mae = mean_absolute_error(df_pivot[col1], df_pivot[col2])
    rmse = np.sqrt(mean_squared_error(df_pivot[col1], df_pivot[col2]))
    return mae, rmse

mae_ab, rmse_ab = compare_models('OpenAI o4-mini', 'Claude 3.7 Sonnet')
mae_ac, rmse_ac = compare_models('OpenAI o4-mini', 'OpenAI o3-mini')
mae_bc, rmse_bc = compare_models('Claude 3.7 Sonnet', 'OpenAI o3-mini')

# Output results

print("Mean Absolute Errors - Novelty:")
print(f"Model o4-mini vs anthropic: {mae_ab:.2f}")
print(f"Model o4-mini vs o3-mini: {mae_ac:.2f}")
print(f"Model anthropic vs o3-mini: {mae_bc:.2f}")

print("\nRMSE - Novelty:")
print(f"Model o4-mini vs anthropic: {rmse_ab:.2f}")
print(f"Model o4-mini vs o3-mini: {rmse_ac:.2f}")
print(f"Model anthropic vs o3-mini: {rmse_bc:.2f}")

df_pivot = df.pivot(index='hypothesis_id', columns='model', values='plausibility')

# Compute standard deviation per sample (variation between models)
df_pivot['std'] = df_pivot.std(axis=1)
df_pivot['range'] = df_pivot.max(axis=1) - df_pivot.min(axis=1)

# Compute pairwise MAE and RMSE between models
def compare_models(col1, col2):
    mae = mean_absolute_error(df_pivot[col1], df_pivot[col2])
    rmse = np.sqrt(mean_squared_error(df_pivot[col1], df_pivot[col2]))
    return mae, rmse

mae_ab, rmse_ab = compare_models('OpenAI o4-mini', 'Claude 3.7 Sonnet')
mae_ac, rmse_ac = compare_models('OpenAI o4-mini', 'OpenAI o3-mini')
mae_bc, rmse_bc = compare_models('Claude 3.7 Sonnet', 'OpenAI o3-mini')

# Output results

print("\nMean Absolute Errors - Plausibility:")
print(f"Model o4-mini vs anthropic: {mae_ab:.2f}")
print(f"Model o4-mini vs o3-mini: {mae_ac:.2f}")
print(f"Model anthropic vs o3-mini: {mae_bc:.2f}")

print("\nRMSE - Plausibility:")
print(f"Model o4-mini vs anthropic: {rmse_ab:.2f}")
print(f"Model o4-mini vs o3-mini: {rmse_ac:.2f}")
print(f"Model anthropic vs o3-mini: {rmse_bc:.2f}")

In [None]:
grouped = df.groupby("model")["novelty"].apply(list)

f_stat, p_val = f_oneway(*grouped)
print(f"F-statistic: {f_stat:.4f}, p-value: {p_val:.4e}")

In [None]:
grouped = df.groupby("model")["plausibility"].apply(list)

f_stat, p_val = f_oneway(*grouped)
print(f"F-statistic: {f_stat:.4f}, p-value: {p_val:.4e}")

In [None]:
# computing Z score
df[['novelty_norm', 'plausibility_norm']] = df.groupby('model')[['novelty', 'plausibility']].transform(
    lambda x: (x - x.mean()) / x.std()
)

In [None]:
#distribution before normalization
# df = merged_df.copy()
df['model'] = df['model'].astype(str)

# Set plot style
sns.set(style="whitegrid")

# Novelty distribution
plt.figure(figsize=(10, 5))
plot1 = sns.kdeplot(data=df,x="novelty", hue="model", common_norm=False, fill=True, linewidth=2, legend=False)
plt.xlabel("Novelty Score", fontsize=22)
plt.ylabel("Density", fontsize=22)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.tight_layout()
plt.show()

# Plausibility distribution
plt.figure(figsize=(10, 5))
plot2 = sns.kdeplot(data=df,x="plausibility", hue="model", common_norm=False, fill=True, linewidth=2, legend=False)
plt.xlabel("Plausibility Score", fontsize=22)
plt.ylabel("Density", fontsize=22)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.tight_layout()
plt.show()


In [None]:
import matplotlib.patches as mpatches

# Ensure 'model' is treated as a categorical variable
# df = merged_df.copy()
df['model'] = df['model'].astype(str)

# Set plot style
sns.set(style="whitegrid")

# Novelty distribution
plt.figure(figsize=(10, 5))
plot1 = sns.kdeplot(data=df,x="novelty_norm", hue="model", common_norm=False, fill=True, linewidth=2, legend=False)
plt.xlabel("Novelty Score", fontsize=22)
plt.ylabel("Density", fontsize=22)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.tight_layout()
plt.show()

# Plausibility distribution
plt.figure(figsize=(10, 5))
plot2 = sns.kdeplot(data=df,x="plausibility_norm", hue="model", common_norm=False, fill=True, linewidth=2, legend=False)
plt.xlabel("Plausibility Score" ,fontsize=22)
plt.ylabel("Density", fontsize=22)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.tight_layout()
plt.show()



In [None]:
# Calculate the 90th percentile thresholds
novelty_thresh = df['novelty_norm'].quantile(0.708)
plausibility_thresh = df['plausibility_norm'].quantile(0.708)

# Filter rows that exceed both thresholds
data = df[(df['novelty_norm'] >= novelty_thresh) & (df['plausibility_norm'] >= plausibility_thresh)]
data.head()

In [None]:
data = df[(df["novelty_norm"] > 0.8) & (df["plausibility_norm"] > 0.1)]
data = data.drop(columns=['novelty_norm', 'plausibility_norm', 'Unnamed: 0', 'rank'])
data['hypothesis_id'].unique().shape

In [None]:
data.drop(["model"])

In [None]:
data_for_model = pd.read_csv("")

In [None]:
data.to_excel("llm_results/normalized_new_results.xlsx", index=False)

In [None]:
df_michal = pd.read_excel("llm_results/michal_input.xlsx")
# df_michal = df.copy()

In [None]:
# computing Z score
df_michal[['novelty_norm', 'plausibility_norm']] = df_michal[['novelty', 'plausibility']].transform(
    lambda x: (x - x.mean()) / x.std()
)
df_michal

In [None]:
df_small = df[["hypothesis_id", "model", "hypo_factors", "novelty_norm", "plausibility_norm"]]
df_michal.loc[:, "model"] = "michal"
df_michal = df_michal[["hypothesis_id", "model", "hypo_factors", "novelty_norm", "plausibility_norm"]]
df_combined = (pd.merge(df_small, df_michal, on='hypothesis_id', how='inner'))
df_combined

In [None]:
df_small = df_small[df_small["hypothesis_id"].isin(df_michal.hypothesis_id)]

In [None]:
df_concat = pd.concat([df_small, df_michal], ignore_index=True)
df_concat

In [None]:
# Calculate correlation
correlation = df_combined['novelty_norm_x'].corr(df_combined['novelty_norm_y'])
print(f"Correlation - Novelty: {correlation:.2f}")

# Calculate MAE
mae = mean_absolute_error(df_combined['novelty_norm_x'], df_combined['novelty_norm_y'])
print(f"\nMean Absolute Error - Novelty: {mae:.2f}")

# Calculate correlation
correlation = df_combined['plausibility_norm_x'].corr(df_combined['plausibility_norm_y'])
print(f"\nCorrelation - plausibility: {correlation:.2f}")

# Calculate MAE
mae = mean_absolute_error(df_combined['plausibility_norm_x'], df_combined['plausibility_norm_y'])
print(f"\nMean Absolute Error - plausibility: {mae:.2f}")



In [None]:
df_pivot = df_concat.pivot(index='hypothesis_id', columns='model', values='novelty_norm')

# Compute standard deviation per sample (variation between models)
df_pivot['std'] = df_pivot.std(axis=1)
df_pivot['range'] = df_pivot.max(axis=1) - df_pivot.min(axis=1)

# Compute pairwise MAE and RMSE between models
def compare_models(col1, col2):
    mae = mean_absolute_error(df_pivot[col1], df_pivot[col2])
    rmse = np.sqrt(mean_squared_error(df_pivot[col1], df_pivot[col2]))
    return mae, rmse

mae_ab, rmse_ab = compare_models('OpenAI o4-mini', 'Claude 3.7 Sonnet')
mae_ac, rmse_ac = compare_models('OpenAI o4-mini', 'OpenAI o3-mini')
mae_bc, rmse_bc = compare_models('Claude 3.7 Sonnet', 'OpenAI o3-mini')
mae_db, rmse_db = compare_models('michal', 'Claude 3.7 Sonnet')
mae_dc, rmse_dc = compare_models('michal', 'OpenAI o3-mini')
mae_da, rmse_da = compare_models('michal', 'OpenAI o4-mini')

# Output results

print("Mean Absolute Errors - novelty:")
print(f"Model o4-mini vs anthropic: {mae_ab:.2f}")
print(f"Model o4-mini vs o3-mini: {mae_ac:.2f}")
print(f"Model anthropic vs o3-mini: {mae_bc:.2f}")
print(f"Model michal vs anthropic: {mae_ab:.2f}")
print(f"Model michal vs o3-mini: {mae_ac:.2f}")
print(f"Model michal vs o4-mini: {mae_bc:.2f}")

print("\nRMSE - novelty:")
print(f"Model o4-mini vs anthropic: {rmse_ab:.2f}")
print(f"Model o4-mini vs o3-mini: {rmse_ac:.2f}")
print(f"Model anthropic vs o3-mini: {rmse_bc:.2f}")
print(f"Model michal vs anthropic: {rmse_ab:.2f}")
print(f"Model michal vs o3-mini: {rmse_ac:.2f}")
print(f"Model michal vs o4-mini: {rmse_bc:.2f}")

df_pivot = df_concat.pivot(index='hypothesis_id', columns='model', values='plausibility_norm')

mae_ab, rmse_ab = compare_models('OpenAI o4-mini', 'Claude 3.7 Sonnet')
mae_ac, rmse_ac = compare_models('OpenAI o4-mini', 'OpenAI o3-mini')
mae_bc, rmse_bc = compare_models('Claude 3.7 Sonnet', 'OpenAI o3-mini')
mae_db, rmse_db = compare_models('michal', 'Claude 3.7 Sonnet')
mae_dc, rmse_dc = compare_models('michal', 'OpenAI o3-mini')
mae_da, rmse_da = compare_models('michal', 'OpenAI o4-mini')

# Output results

print("\nMean Absolute Errors - plausibility:")
print(f"Model o4-mini vs anthropic: {mae_ab:.2f}")
print(f"Model o4-mini vs o3-mini: {mae_ac:.2f}")
print(f"Model anthropic vs o3-mini: {mae_bc:.2f}")
print(f"Model michal vs anthropic: {mae_ab:.2f}")
print(f"Model michal vs o3-mini: {mae_ac:.2f}")
print(f"Model michal vs o4-mini: {mae_bc:.2f}")

print("\nRMSE - plausibility:")
print(f"Model o4-mini vs anthropic: {rmse_ab:.2f}")
print(f"Model o4-mini vs o3-mini: {rmse_ac:.2f}")
print(f"Model anthropic vs o3-mini: {rmse_bc:.2f}")
print(f"Model michal vs anthropic: {rmse_ab:.2f}")
print(f"Model michal vs o3-mini: {rmse_ac:.2f}")
print(f"Model michal vs o4-mini: {rmse_bc:.2f}")

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

# Define model order for consistent axes
model_order = ['OpenAI o4-mini', 'Claude 3.7 Sonnet', 'OpenAI o3-mini', 'michal']

# Function to compute pairwise metric matrix
def pairwise_metric(df_pivot, metric_func, models):
    n = len(models)
    matrix = np.zeros((n, n))
    for i, m1 in enumerate(models):
        for j, m2 in enumerate(models):
            if i != j:
                matrix[i, j] = metric_func(df_pivot[m1], df_pivot[m2])
            else:
                matrix[i, j] = np.nan
    return matrix

# Compute MAE and RMSE matrices for novelty
df_nov = df_concat.pivot(index='hypothesis_id', columns='model', values='novelty_norm')[model_order]
mae_matrix_nov = pairwise_metric(df_nov, mean_absolute_error, model_order)
rmse_matrix_nov = pairwise_metric(df_nov, lambda x, y: np.sqrt(mean_squared_error(x, y)), model_order)

# Compute MAE and RMSE matrices for plausibility
df_plau = df_concat.pivot(index='hypothesis_id', columns='model', values='plausibility_norm')[model_order]
mae_matrix_plau = pairwise_metric(df_plau, mean_absolute_error, model_order)
rmse_matrix_plau = pairwise_metric(df_plau, lambda x, y: np.sqrt(mean_squared_error(x, y)), model_order)

# Plot heatmaps
fig, axes = plt.subplots(2, 2, figsize=(14, 12))
sns.heatmap(mae_matrix_nov, annot=True, fmt=".2f", xticklabels=model_order, yticklabels=model_order, ax=axes[0,0], cmap="Blues")
axes[0,0].set_title("MAE - Novelty")
sns.heatmap(rmse_matrix_nov, annot=True, fmt=".2f", xticklabels=model_order, yticklabels=model_order, ax=axes[0,1], cmap="Greens")
axes[0,1].set_title("RMSE - Novelty")
sns.heatmap(mae_matrix_plau, annot=True, fmt=".2f", xticklabels=model_order, yticklabels=model_order, ax=axes[1,0], cmap="Blues")
axes[1,0].set_title("MAE - Plausibility")
sns.heatmap(rmse_matrix_plau, annot=True, fmt=".2f", xticklabels=model_order, yticklabels=model_order, ax=axes[1,1], cmap="Greens")
axes[1,1].set_title("RMSE - Plausibility")
for ax in axes.flat:
    ax.set_xlabel("Model")
    ax.set_ylabel("Model")
plt.tight_layout()
plt.show()

In [None]:
full_data = pd.read_csv("narrowed_cancers_data.csv")
# full_data = full_data.drop('_tmpkey', axis=1)

In [None]:
# Mapping from df_michal to full_data column names
rename_dict = {
    'cancer_type': 'Cancer Type',
    'Diagnosis Age': 'Diagnosis Age',
    'Smoke Status': 'Smoke Status',
    'TMB (nonsynonymous)': 'TMB (nonsynonymous)',
    'Hugo_Symbol': 'Hugo_Symbol',
    'Chromosome': 'Chromosome',
    'Start_Position': 'Start_Position',
    'End_Position': 'End_Position',
    'Variant_Type': 'Variant_Type',
    'SNP_event': 'SNP_event',
    'Protein_position': 'Protein_position',
    'Codons': 'Codons',
    'Exon_Number': 'Exon_Number',
    'VAR_TYPE_SX': 'VAR_TYPE_SX',
    'Site1_Hugo_Symbol': 'Site1_Hugo_Symbol',
    'Site2_Hugo_Symbol': 'Site2_Hugo_Symbol',
    'Event_Info': 'Event_Info',
    'missense_variant': 'missense_variant',
    'Sex': 'Sex',
    'splice_acceptor_variant': 'splice_acceptor_variant',
    'upstream_gene_variant': 'upstream_gene_variant'
    # Add more mappings if needed
}

# Rename columns in df_michal
data = df.copy()
data_renamed = data.rename(columns=rename_dict)
data_renamed

In [None]:
data_renamed.columns

In [None]:
full_data.columns

In [None]:
data_renamed['Start_Position'] = data_renamed['Start_Position'].astype(str)
full_data['Start_Position'] = full_data['Start_Position'].astype(str)
data_renamed['End_Position'] = data_renamed['End_Position'].astype(str)
full_data['End_Position'] = full_data['End_Position'].astype(str)
data_renamed['upstream_gene_variant'] = data_renamed['upstream_gene_variant'].astype(str)
full_data['upstream_gene_variant'] = full_data['upstream_gene_variant'].astype(str)
data_renamed['TMB (nonsynonymous)'] = data_renamed['TMB (nonsynonymous)'].astype(str)
full_data['TMB (nonsynonymous)'] = full_data['TMB (nonsynonymous)'].astype(str)
data_renamed['missense_variant'] = data_renamed['missense_variant'].astype(str)
full_data['missense_variant'] = full_data['missense_variant'].astype(str)
data_renamed['Protein_position'] = data_renamed['Protein_position'].astype(str)
full_data['Protein_position'] = full_data['Protein_position'].astype(str)
data_renamed['splice_acceptor_variant'] = data_renamed['splice_acceptor_variant'].astype(str)
full_data['splice_acceptor_variant'] = full_data['splice_acceptor_variant'].astype(str)

In [None]:
data_renamed.columns

In [None]:
def find_matching_patients(df_transformed, data_for_lift):
    df_transformed = df_transformed.copy()
    data_for_lift = data_for_lift.copy()
    cancer_type_groups = data_for_lift.groupby('Cancer Type')
    matched_ids = []
    matched_counts = []
    total_matched_feature_counts = []

    for idx, row in df_transformed.iterrows():
        cancer_type = row['Cancer Type']
        filter_columns = row.drop(labels=['Cancer Type', 'hypothesis_id', 'model', 'novelty', 'plausibility', 'std_novelty', 'std_plausibility', 'hypo_factors', 'support', 'method', 'Unnamed: 0', 'Position', 'novelty_norm', 'rank', 'plausibility_norm']).dropna()

        # Find matched patient IDs for this cancer type
        if cancer_type not in cancer_type_groups.groups:
            matched = []
        else:
            subset = cancer_type_groups.get_group(cancer_type)
            if filter_columns.empty:
                matched = subset['PATIENT_ID'].dropna().unique()
            else:
                mask = pd.Series(True, index=subset.index)
                for col, val in filter_columns.items():
                    if col in subset.columns:
                        mask &= subset[col] == val
                matched = subset.loc[mask, 'PATIENT_ID'].dropna().unique()
        matched_ids.append(','.join(map(str, matched)))
        matched_counts.append(len(matched))

        # Find total matched patient IDs with these features across all cancer types
        if filter_columns.empty:
            total_matched = data_for_lift['PATIENT_ID'].dropna().unique()
        else:
            mask = pd.Series(True, index=data_for_lift.index)
            for col, val in filter_columns.items():
                if col in data_for_lift.columns:
                    mask &= data_for_lift[col] == val
            total_matched = data_for_lift.loc[mask, 'PATIENT_ID'].dropna().unique()
        total_matched_feature_counts.append(len(total_matched))

    df_transformed['Matched PATIENT_IDs'] = matched_ids
    df_transformed['Matched_Count'] = matched_counts
    df_transformed['Total_Matched_Feature_Count'] = total_matched_feature_counts
    return df_transformed

In [None]:
data = find_matching_patients(data_renamed, full_data)
data

In [None]:
data = data[data["Matched_Count"] >= 15]

In [None]:
f = full_data[full_data["Cancer Type"] == "Colorectal Carcinoma"]
f = f[f["Hugo_Symbol"] == "KRAS"]
f = f[f["Protein_position"] == 12.0]
f = f[f["SNP_event"] == "C>T"]
f["PATIENT_ID"].unique().shape

# Protein position value is 12.0 AND Hugo Symbol value is KRAS AND SNP event value is C>T

In [None]:
# Compute unique PATIENT_ID count per hypothesis_id
patient_counts = result.groupby('hypothesis_id')['PATIENT_ID'].nunique().reset_index()
patient_counts.rename(columns={'PATIENT_ID': 'unique_patient_count'}, inplace=True)

# Merge into df_michal_renamed
df_michal_renamed = df_michal_renamed.merge(patient_counts, on='hypothesis_id', how='left')

In [None]:
df_michal_renamed.sort_values(by=["novelty", 'unique_patient_count', "plausibility"], ascending=[False, False, False], inplace=True)
df_michal_renamed

In [None]:
dd = df_michal_renamed[df_michal_renamed["unique_patient_count"] > 3]
dd

In [None]:
df_michal_renamed[df_michal_renamed["hypothesis_id"].str.startswith("LIFT")]

In [None]:
df_michal = pd.read_excel("llm_results/michal_input.xlsx")
merged = data.merge(df_michal, on='hypothesis_id', how='left')
merged[["hypothesis_id", "model", "hypo_factors_x", "cancer_type","novelty_x", "plausibility_x", "novelty_y", "plausibility_y", "unique_patient_count", "comments"]].drop_duplicates()

In [None]:
results_lift = pd.read_csv("llm_results/evaluations_lift_20250528_144638.csv")

In [None]:
combine_hyp_lift = pd.read_csv('models_hypotheses/combined_hypotheses_lift_only.csv')
combine_hyp_lift.rename(columns={'hypo_id': 'hypothesis_id'}, inplace=True)
results_lift = results_lift.join(combine_hyp_lift.set_index('hypothesis_id'), on='hypothesis_id')

In [None]:
df["plausibility"].round().value_counts()

In [None]:
df["novelty"].round().value_counts()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

# Create a pivot table of counts
pivot = data.groupby([data['novelty_norm'].round(), data['plausibility_norm'].round()]).size().unstack(fill_value=0)

plt.figure(figsize=(8, 6))
sns.heatmap(pivot, annot=True, fmt='d', cmap='Blues', cbar=True)
plt.xlabel("Plausibility")
plt.ylabel("Novelty")
plt.title("Count of (Novelty, Plausibility) Pairs")
plt.tight_layout()
plt.show()

In [None]:
data = data[~data["hypothesis_id"].str.startswith("LIFT")]

In [None]:
data2 = data[["hypothesis_id", "novelty_norm", "plausibility_norm", "Cancer Type", "hypo_factors", "Matched_Count"]].drop_duplicates()
dd = data2[(data2["novelty_norm"] >= 0.5) & (data2["plausibility_norm"] >= -0.2)].copy()
dd.rename(columns={'Matched_Count': 'support', 'Cancer Type': 'cancer_type'}, inplace=True)
dd = dd.drop_duplicates(subset=['hypothesis_id'], keep='first')
dd  # ["hypothesis_id"].unique().shape

In [None]:
def get_max_from_str(val):
    if pd.isna(val):
        return None
    vals = [float(x) for x in str(val).split(',') if x.strip()]
    return max(vals) if vals else None

df['support'] = df['support'].apply(get_max_from_str)

In [None]:
df2 = df[["hypothesis_id", "novelty_norm", "plausibility_norm", "cancer_type", "hypo_factors", "max_lift", "support"]].copy()
df2.drop_duplicates(inplace=True)
d = df2[(df2["novelty_norm"] > 0.6) & (df2["plausibility_norm"] > 0)]
# d['support'] = d['support'].astype(float).astype(int)
d["hypothesis_id"].unique().shape
d = d.drop_duplicates(subset=['hypothesis_id'], keep='first')
d

In [None]:
case_stady = d.merge(dd, on=['hypothesis_id', 'novelty_norm', 'plausibility_norm', 'cancer_type', 'hypo_factors', 'support'], how='outer')

In [None]:
case_stady.to_excel("llm_results/case_study_candidates.xlsx", index=False)

In [None]:
results_lift[results_lift["hypothesis_id"] == "LIFT.GASTRIC_CANCER.901"]

In [None]:
plt.figure(figsize=(8, 6))
sns.regplot(
    data=results_lift,
    x='novelty',
    y='max_lift',
    scatter_kws={'alpha':0.6},
    line_kws={'color':'red'}
)
plt.title("Correlation between Plausibility and Max Lift")
plt.xlabel("Plausibility")
plt.ylabel("Max Lift")
plt.tight_layout()
plt.show()

In [None]:
df[["hypo_factors", "cancer_type"]]