In [None]:
import pandas as pd
from sentence_transformers import SentenceTransformer
import numpy as np

# Load Excel file
file_path = "../../LLM_output_generation/extracted_video_anomalies_all_models.xlsx"
df = pd.read_excel(file_path)

# Initialize model
model = SentenceTransformer("all-MiniLM-L6-v2")

# Define model names
model_names = ["claude-3-5-sonnet", "flash", "gpt-4o-mini", "gpt4o", "pro"]

# Prepare final DataFrame
final_data = pd.DataFrame()
final_data["video_name"] = df["video_name"]

# Embed and extract selected fields
for model_name in model_names:
    # desc_col = f"{model_name}-description"
    reas_col = f"{model_name}-reasoning"
    anomaly_col = f"{model_name}-anomaly"

    if desc_col in df.columns:
        desc_texts = df[desc_col].tolist()
        desc_embeds = []
        for text in desc_texts:
            if pd.isna(text):
                desc_embeds.append(np.nan)
            else:
                desc_embeds.append(model.encode(text))
        final_data[f"{model_name}-desc_emb"] = desc_embeds

    if reas_col in df.columns:
        reas_texts = df[reas_col].tolist()
        reas_embeds = []
        for text in reas_texts:
            if pd.isna(text):
                reas_embeds.append(np.nan)
            else:
                reas_embeds.append(model.encode(text))
        final_data[f"{model_name}-reas_emb"] = reas_embeds

    if anomaly_col in df.columns:
        final_data[f"{model_name}-anomaly"] = df[anomaly_col]

# Save to output file
output_path = "embedded_only_video_anomalies_reas_label.xlsx"
final_data.to_excel(output_path, index=False)

print(f"✅ Done! File saved as: {output_path}")


In [None]:
import json
import pandas as pd

model_names = [
    "claude-3-5-sonnet",
    "flash",
    "gpt-4o-mini",
    "gpt4o",
    "pro"
]

video_dict = {}

for name in model_names:
    file_path = f"../../LLM_output_generation/handcraft_rule10_{name}.json"
    if not os.path.exists(file_path):
        continue

    print(f"Processing: {file_path}")

    with open(file_path, 'r') as file:
        content = file.read()
        data = json.loads(content)  # Load the entire dictionary

    anomalies = {}

    for video_name, response_text in data.items():
        try:
            if response_text is None:
                anomaly_value = "NAN"
            else:
                response_json = json.loads(response_text)
                anomaly_value = response_json.get('updated_anomaly', 'NAN')
        except Exception:
            anomaly_value = "NAN"

        anomalies[video_name] = anomaly_value

    for video_name, anomaly in anomalies.items():
        if video_name not in video_dict:
            video_dict[video_name] = {}
        video_dict[video_name][f"{name}-anomaly"] = anomaly

# Convert to DataFrame
df = pd.DataFrame.from_dict(video_dict, orient='index')
df.index.name = "video_name"
df.reset_index(inplace=True)

# Save to Excel
df.to_excel("extracted_video_anomalies_all_models_updated_all.xlsx", index=False)


In [None]:
import pandas as pd

# Load the two files
file1 = "embedded_only_video_anomalies_reas_label.xlsx"
file2 = "extracted_video_anomalies_all_models_updated_all.xlsx"

df1 = pd.read_excel(file1)
df2 = pd.read_excel(file2)

# Ensure consistent types for comparison
df2['gpt-4o-mini-anomaly'] = pd.to_numeric(df2['gpt-4o-mini-anomaly'], errors='coerce')
df2['gpt4o-anomaly'] = pd.to_numeric(df2['gpt4o-anomaly'], errors='coerce')
df2['pro-anomaly'] = pd.to_numeric(df2['pro-anomaly'], errors='coerce')

# Merge on video_name
merged_df = pd.merge(df1, df2, on='video_name', suffixes=('_df1', '_df2'))

# Compare anomalies for each model and create y indicators
models = ['claude-3-5-sonnet', 'flash', 'gpt-4o-mini', 'gpt4o', 'pro']
for model in models:
    col1 = f"{model}-anomaly_df1"
    col2 = f"{model}-anomaly_df2"
    y_col = f"{model}_y"
    merged_df[y_col] = (merged_df[col1] != merged_df[col2]).astype(int)

# Keep only relevant columns and save to Excel
output_cols = ['video_name'] + [f"{model}_y" for model in models]
result_df = merged_df[output_cols]

# Save the results
result_df.to_excel("anomaly_label_comparison_y.xlsx", index=False)
print("✅ Done! File saved as anomaly_label_comparison_y.xlsx")


In [None]:
import pandas as pd
import numpy as np
import re
from sklearn.linear_model import LogisticRegression


for fold in range(5):
    suffix = f"fold{fold}_reas"
    
    # Load files
    y_df = pd.read_excel('anomaly_label_comparison_y.xlsx')
    features_df = pd.read_excel('embedded_only_video_anomalies_reas_label.xlsx')
    rule_df = pd.read_excel('embedded_combined_handcraft_rule.xlsx')
    train_videos_df = pd.read_excel(f'../reas/train_dataset_{suffix}.xlsx')
    test_videos_df = pd.read_excel('../reas/test_dataset_reas.xlsx')
    
    # Rename 'test video' to 'video_name' in test dataset if needed
    if 'test video' in test_videos_df.columns:
        test_videos_df.rename(columns={'test video': 'video_name'}, inplace=True)
    elif 'test_video' in test_videos_df.columns:
        test_videos_df.rename(columns={'test_video': 'video_name'}, inplace=True)
    
    # Clean and parse rule embedding
    rule_text_raw = rule_df['embedding'].iloc[0]
    rule_text_clean = re.sub(r'([0-9e\.\+\-])\s+([\-0-9])', r'\1, \2', rule_text_raw)
    rule_embedding = np.array(eval(rule_text_clean))
    
    # Model list
    model_list = ['claude-3-5-sonnet', 'flash', 'gpt-4o-mini', 'gpt4o', 'pro']
    
    # Training and test video names with .mp4 suffix
    train_video_names = set(train_videos_df['video_name'].astype(str) + ".mp4")
    test_video_names = set(test_videos_df['video_name'].astype(str) + ".mp4")
    
    # Store final results: video_name + 5 probabilities + average
    prob_output = []
    
    # Initialize dict to store probabilities by model
    prob_dict = {model: {} for model in model_list}
    
    # Loop over models
    for model in model_list:
        feature_list = []
        y_list = []
        video_name_list = []
    
        reas_emb_series = features_df[f"{model}-reas_emb"]
        anomaly_series = features_df[f"{model}-anomaly"]
        y_series = y_df[f"{model}_y"]
        video_series = features_df['video_name']
    
        for emb_str, anom, label, vid in zip(reas_emb_series, anomaly_series, y_series, video_series):
            if isinstance(emb_str, str):
                emb_clean = re.sub(r'([0-9e\.\+\-])\s+([\-0-9])', r'\1, \2', emb_str)
                emb = np.array(eval(emb_clean))
    
                feature = np.concatenate([emb, [anom], rule_embedding])
                feature_list.append(feature)
                y_list.append(label)
                video_name_list.append(vid)
    
        X = np.vstack(feature_list)
        y = np.array(y_list)
        video_names = np.array(video_name_list)
    
        # Split into train/test based on train/test datasets
        train_idx = np.isin(video_names, list(train_video_names))
        test_idx = np.isin(video_names, list(test_video_names))
    
        X_train, y_train = X[train_idx], y[train_idx]
        X_test, video_test = X[test_idx], video_names[test_idx]
    
        model_lr = LogisticRegression(max_iter=1000)
        model_lr.fit(X_train, y_train)
    
        # Predict probabilities on test set (probability of class 1)
        probs = model_lr.predict_proba(X_test)[:, 1]
    
        # Store probabilities by video name
        for vid, prob in zip(video_test, probs):
            prob_dict[model][vid] = prob
    
    # Collect all test video names (union from all models' outputs)
    all_video_names = set()
    for model_probs in prob_dict.values():
        all_video_names.update(model_probs.keys())
    
    # Build final output
    for vid in sorted(all_video_names):
        row = {'video_name': vid}
        prob_values = []
        for model in model_list:
            prob = prob_dict[model].get(vid, np.nan)
            row[f"{model}_prob"] = prob
            prob_values.append(prob)
        row['average_prob'] = np.nanmean(prob_values)
        prob_output.append(row)
    
    # Save to Excel
    output_df = pd.DataFrame(prob_output)
    output_df.to_excel(f"model_prediction_probabilities_{suffix}.xlsx", index=False)
    
    print("✅ Probability file saved as: model_prediction_probabilities.xlsx")


In [None]:
import pandas as pd
import numpy as np
import re
from sklearn.linear_model import LogisticRegression

for fold in range(5):
    suffix = f"fold{fold}_reas"
    
    # Load files
    y_df = pd.read_excel('anomaly_label_comparison_y.xlsx')
    features_df = pd.read_excel('embedded_only_video_anomalies_reas_label.xlsx')
    rule_df = pd.read_excel('embedded_combined_handcraft_rule.xlsx')
    train_videos_df = pd.read_excel(f'../reas/train_dataset_{suffix}.xlsx')
    # test_videos_df = pd.read_excel('test_dataset_reas.xlsx')
    
    test_videos_df = pd.read_excel(f'../reas/train_dataset_{suffix}.xlsx')
    
    
    # Rename 'test video' to 'video_name' in test dataset if needed
    if 'test video' in test_videos_df.columns:
        test_videos_df.rename(columns={'test video': 'video_name'}, inplace=True)
    elif 'test_video' in test_videos_df.columns:
        test_videos_df.rename(columns={'test_video': 'video_name'}, inplace=True)
    
    # Clean and parse rule embedding
    rule_text_raw = rule_df['embedding'].iloc[0]
    rule_text_clean = re.sub(r'([0-9e\.\+\-])\s+([\-0-9])', r'\1, \2', rule_text_raw)
    rule_embedding = np.array(eval(rule_text_clean))
    
    # Model list
    model_list = ['claude-3-5-sonnet', 'flash', 'gpt-4o-mini', 'gpt4o', 'pro']
    
    # Training and test video names with .mp4 suffix
    train_video_names = set(train_videos_df['video_name'].astype(str) + ".mp4")
    test_video_names = set(test_videos_df['video_name'].astype(str) + ".mp4")
    
    # Store final results: video_name + 5 probabilities + average
    prob_output = []
    
    # Initialize dict to store probabilities by model
    prob_dict = {model: {} for model in model_list}
    
    # Loop over models
    for model in model_list:
        feature_list = []
        y_list = []
        video_name_list = []
    
        reas_emb_series = features_df[f"{model}-reas_emb"]
        anomaly_series = features_df[f"{model}-anomaly"]
        y_series = y_df[f"{model}_y"]
        video_series = features_df['video_name']
    
        for emb_str, anom, label, vid in zip(reas_emb_series, anomaly_series, y_series, video_series):
            if isinstance(emb_str, str):
                emb_clean = re.sub(r'([0-9e\.\+\-])\s+([\-0-9])', r'\1, \2', emb_str)
                emb = np.array(eval(emb_clean))
    
                feature = np.concatenate([emb, [anom], rule_embedding])
                feature_list.append(feature)
                y_list.append(label)
                video_name_list.append(vid)
    
        X = np.vstack(feature_list)
        y = np.array(y_list)
        video_names = np.array(video_name_list)
    
        # Split into train/test based on train/test datasets
        train_idx = np.isin(video_names, list(train_video_names))
        test_idx = np.isin(video_names, list(test_video_names))
    
        X_train, y_train = X[train_idx], y[train_idx]
        X_test, video_test = X[test_idx], video_names[test_idx]
    
        model_lr = LogisticRegression(max_iter=1000)
        model_lr.fit(X_train, y_train)
    
        # Predict probabilities on test set (probability of class 1)
        probs = model_lr.predict_proba(X_test)[:, 1]
    
        # Store probabilities by video name
        for vid, prob in zip(video_test, probs):
            prob_dict[model][vid] = prob
    
    # Collect all test video names (union from all models' outputs)
    all_video_names = set()
    for model_probs in prob_dict.values():
        all_video_names.update(model_probs.keys())
    
    # Build final output
    for vid in sorted(all_video_names):
        row = {'video_name': vid}
        prob_values = []
        for model in model_list:
            prob = prob_dict[model].get(vid, np.nan)
            row[f"{model}_prob"] = prob
            prob_values.append(prob)
        row['average_prob'] = np.nanmean(prob_values)
        prob_output.append(row)
    
    # Save to Excel
    output_df = pd.DataFrame(prob_output)
    output_df.to_excel(f"model_prediction_probabilities_{suffix}_train.xlsx", index=False)
    
    print("✅ Probability file saved as: model_prediction_probabilities.xlsx")


In [None]:
for fold in range(5):
    suffix = f"fold{fold}_reas"
    
    
    prob_df = pd.read_excel(f'model_prediction_probabilities_{suffix}.xlsx')
    
    video_names = prob_df['video_name'].astype(str).values
    uncertainty_scores = prob_df['average_prob']
    
    #uncertainty_scores = np.array(average_prob)  # Convert to NumPy array
    
    P_set = [5, 10, 15, 20, 25, 30, 35, 40, 45, 50]
    
    for P in P_set:
        tau = np.percentile(uncertainty_scores, 100 - P)
        mask_low = uncertainty_scores <= tau
        S_low_videos = video_names[mask_low]
    
        out_df = pd.DataFrame({
            "video_name":  S_low_videos,
            "uncertainty": uncertainty_scores[mask_low]
        })
        out_df.to_excel(f"low list/S_low_videos_{P}_{suffix}_ref.xlsx", index=False)
        
        print(f"Threshold τ = {tau:.4f}")
        print(f"Kept {len(S_low_videos)}/{len(video_names)} videos in S_low.")


In [None]:
for fold in range(5):
    suffix = f"fold{fold}_reas"
    
    
    prob_df = pd.read_excel(f'model_prediction_probabilities_{suffix}_train.xlsx')
    
    video_names = prob_df['video_name'].astype(str).values
    uncertainty_scores = prob_df['average_prob']
    
    #uncertainty_scores = np.array(average_prob)  # Convert to NumPy array
    
    P_set = [5, 10, 15, 20, 25, 30, 35, 40, 45, 50]
    
    for P in P_set:
        tau = np.percentile(uncertainty_scores, 100 - P)
        mask_low = uncertainty_scores <= tau
        S_low_videos = video_names[mask_low]
    
        out_df = pd.DataFrame({
            "video_name":  S_low_videos,
            "uncertainty": uncertainty_scores[mask_low]
        })
        out_df.to_excel(f"low list/S_low_videos_{P}_{suffix}_ref_train.xlsx", index=False)
        
        print(f"Threshold τ = {tau:.4f}")
        print(f"Kept {len(S_low_videos)}/{len(video_names)} videos in S_low.")


In [None]:
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Model names
model_names = ["claude-3-5-sonnet", "flash", "gpt-4o-mini", "gpt4o", "pro"]

# P values representing percent of videos excluded as high-uncertainty
P_set = [5, 10, 15, 20, 25, 30, 35, 40]



for fold in range(5):
    suffix = f"fold{fold}_reas"


    
    # Store results for each P
    all_results = []
    
    for P in P_set:
        
        try:
            low = pd.read_excel(f'low list/S_low_videos_{P}_{suffix}_ref.xlsx')
        except Exception as e:
            print(f"[Error] Failed to load S_low_videos_{P}_trace.xlsx: {e}")
            continue
        
        low_list = low['video_name'].str.replace('.mp4', '', regex=False)
        
        # Store predictions per model
        model_preds = {}
        
        for model in model_names:
            try:
                df = pd.read_csv(f'../../LLM_output_generation/vad_results_{model}.csv')
                df['Video Name'] = df['Video Name'].str.replace('.mp4', '', regex=False)
                df = df[df['Video Name'].isin(low_list)]
                model_preds[model] = df[['Video Name', 'Predicted Label']].set_index('Video Name')
            except Exception as e:
                print(f"[Error] Failed to process {model}: {e}")
        
        # Merge all predictions
        merged = pd.DataFrame(index=low_list)
        for model in model_names:
            merged[model] = model_preds.get(model, pd.DataFrame()).reindex(low_list)['Predicted Label']
        
        # Drop rows with missing predictions
        merged = merged.dropna()
        
        if merged.empty:
            print(f"[Warning] No valid data for P={P}, skipping.")
            continue
        
        # Load ground truth from claude-3-5-sonnet
        gt_df = pd.read_csv('../../LLM_output_generation/vad_results_claude-3-5-sonnet.csv')
        gt_df['Video Name'] = gt_df['Video Name'].str.replace('.mp4', '', regex=False)
        gt_df = gt_df[gt_df['Video Name'].isin(merged.index)]
        ground_truth = gt_df.set_index('Video Name')['True Label']
        
        # Majority voting
        majority_vote = merged.mode(axis=1)[0]
        
        # Align ground truth
        y_true = ground_truth.loc[merged.index]
        
        # Compute metrics
        acc = accuracy_score(y_true, majority_vote)
        prec = precision_score(y_true, majority_vote, zero_division=0)
        rec = recall_score(y_true, majority_vote, zero_division=0)
        f1 = f1_score(y_true, majority_vote, zero_division=0)
        
        all_results.append({
            'P': P,
            'Num Videos': len(merged),
            'Accuracy': acc,
            'Precision': prec,
            'Recall': rec,
            'F1-Score': f1
        })
        
        print(f"[P={P}] Processed {len(merged)} videos | Accuracy: {acc:.4f}, F1-Score: {f1:.4f}")
    
    # Save final results to one Excel file
    result_df = pd.DataFrame(all_results)
    result_df.to_excel(f'low list/VAD_Majority_Voting_Summary_{suffix}_ref.xlsx', index=False)
    print("[Saved] VAD_Majority_Voting_Summary.xlsx")


In [None]:
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Model names
model_names = ["claude-3-5-sonnet", "flash", "gpt-4o-mini", "gpt4o", "pro"]

# P values representing percent of videos excluded as high-uncertainty
P_set = [5, 10, 15, 20, 25, 30, 35, 40]



for fold in range(5):
    suffix = f"fold{fold}_reas"


    
    # Store results for each P
    all_results = []
    
    for P in P_set:
        
        try:
            low = pd.read_excel(f'low list/S_low_videos_{P}_{suffix}_ref_train.xlsx')
        except Exception as e:
            print(f"[Error] Failed to load S_low_videos_{P}_trace.xlsx: {e}")
            continue
        
        low_list = low['video_name'].str.replace('.mp4', '', regex=False)
        
        # Store predictions per model
        model_preds = {}
        
        for model in model_names:
            try:
                df = pd.read_csv(f'../../LLM_output_generation/vad_results_{model}.csv')
                df['Video Name'] = df['Video Name'].str.replace('.mp4', '', regex=False)
                df = df[df['Video Name'].isin(low_list)]
                model_preds[model] = df[['Video Name', 'Predicted Label']].set_index('Video Name')
            except Exception as e:
                print(f"[Error] Failed to process {model}: {e}")
        
        # Merge all predictions
        merged = pd.DataFrame(index=low_list)
        for model in model_names:
            merged[model] = model_preds.get(model, pd.DataFrame()).reindex(low_list)['Predicted Label']
        
        # Drop rows with missing predictions
        merged = merged.dropna()
        
        if merged.empty:
            print(f"[Warning] No valid data for P={P}, skipping.")
            continue
        
        # Load ground truth from claude-3-5-sonnet
        gt_df = pd.read_csv('../../LLM_output_generation/vad_results_claude-3-5-sonnet.csv')
        gt_df['Video Name'] = gt_df['Video Name'].str.replace('.mp4', '', regex=False)
        gt_df = gt_df[gt_df['Video Name'].isin(merged.index)]
        ground_truth = gt_df.set_index('Video Name')['True Label']
        
        # Majority voting
        majority_vote = merged.mode(axis=1)[0]
        
        # Align ground truth
        y_true = ground_truth.loc[merged.index]
        
        # Compute metrics
        acc = accuracy_score(y_true, majority_vote)
        prec = precision_score(y_true, majority_vote, zero_division=0)
        rec = recall_score(y_true, majority_vote, zero_division=0)
        f1 = f1_score(y_true, majority_vote, zero_division=0)
        
        all_results.append({
            'P': P,
            'Num Videos': len(merged),
            'Accuracy': acc,
            'Precision': prec,
            'Recall': rec,
            'F1-Score': f1
        })
        
        print(f"[P={P}] Processed {len(merged)} videos | Accuracy: {acc:.4f}, F1-Score: {f1:.4f}")
    
    # Save final results to one Excel file
    result_df = pd.DataFrame(all_results)
    result_df.to_excel(f'low list/VAD_Majority_Voting_Summary_{suffix}_ref_train.xlsx', index=False)
    print("[Saved] VAD_Majority_Voting_Summary.xlsx")


In [None]:
import pandas as pd



for fold in range(5):
    suffix = f"fold{fold}_reas"


    
    # Load the summary files
    majority_df = pd.read_excel(f'low list/VAD_Majority_Voting_Summary_{suffix}_ref.xlsx')
    
    # Extract metrics
    P_values = majority_df['P']
    overall_accuracy = majority_df['Accuracy']
    # recall = majority_df['Recall']
    # vague_abnormal_accuracy = vague_df['Accuracy']
    
    # Combine into final DataFrame
    out_df = pd.DataFrame({
        'P': P_values,
        'Overall Accuracy': overall_accuracy
        # 'Recall': recall,
        # 'Vague Abnormal Accuracy': vague_abnormal_accuracy
    })
    
    # Remove final two rows
    out_df = out_df.iloc[:, :]
    
    # Save to xlsx
    out_df.to_excel(f'low list/results_P_{suffix}_ref.xlsx', index=False)
