In [None]:
import pandas as pd

# Read the CSV file
df = pd.read_csv('SVM/SVM-run9-new.csv')  # Path to the output file from each run (VSBS) of each ML model saved as .csv in its respective folder

# Sort by the 'Active_Prob' column in descending order
df_sorted = df.sort_values(by='Active_Prob', ascending=False)

# Save the sorted file (optional)
df_sorted.to_csv('SVM/AO-SVM-9.csv', index=False)  # Sorted output file for each ML model and run in its corresponding folder

import pandas as pd
import numpy as np
import os

# Initialize a list to store the results of each model
all_results = []

# Define the models and their respective folders
models = {
    'XGB': 'XGB',
    'SVM': 'SVM',
    'DNN': 'DNN',
    'ANN': 'ANN',
    'RF': 'RF'
}

# Read and process the files of each model in its corresponding folder
for model, folder in models.items():
    for i in range(1, 11):
        # Create the file path for each model run
        file_name = f'AO-{model}-{i}.csv'  # All files are in the same folder. AO stands for "ordered file", model is the ML model name, and i is the run number for training and testing
        file_path = os.path.join(folder, file_name)
        
        # Check if the file exists before reading it
        if os.path.exists(file_path):
            df_sorted = pd.read_csv(file_path)
            
            # Extract the relevant columns
            real_classes = df_sorted['Real_Class'].values
            predicted_classes = df_sorted['Predicted_Class'].values
            
            # Count how many times Real_Class and Predicted_Class both are "Active"
            correct_active_count = np.sum((real_classes == 'Active') & (predicted_classes == 'Active'))
            
            # Count how many times Real_Class and Predicted_Class match in the top 1%
            n_total = len(real_classes)
            n_top_1_percent = max(1, n_total // 100)
            
            # Extract the first n_top_1_percent rows for the top 1%
            top_1_real_classes = real_classes[:n_top_1_percent]
            top_1_predicted_classes = predicted_classes[:n_top_1_percent]
            
            # Count how many times Real_Class and Predicted_Class match in the top 1%
            correct_active_top_1_percent = np.sum((top_1_real_classes == 'Active') & (top_1_predicted_classes == 'Active'))
            
            # Calculate EF1%
            ef1_percent = (correct_active_top_1_percent / correct_active_count) * 100 if correct_active_count > 0 else 0
            
            # Calculate EF1% max
            if correct_active_count <= n_top_1_percent:
                ef1_max = 100
            else:
                ef1_max = (n_top_1_percent / correct_active_count) * 100
            
            # Calculate NEF1%
            nef1_percent = ef1_percent / ef1_max if ef1_max > 0 else 0
            
            # Store the results in a list
            all_results.append({
                'Model': model,
                'Run': i,
                'EF1%': ef1_percent,
                'NEF1%': nef1_percent,
                'EF1% max': ef1_max,
                'Total Active Correct': correct_active_count,
                'Total Active in Top 1%': correct_active_top_1_percent
            })
        else:
            print(f"File not found: {file_path}")

# Convert the list of results into a DataFrame
df_results = pd.DataFrame(all_results)

# Save the results to a CSV file
df_results.to_csv('model_results.csv', index=False)

# Calculate the average of each metric by model
df_avg_results = df_results.groupby('Model').agg({
    'EF1%': 'mean',
    'NEF1%': 'mean',
    'EF1% max': 'mean',
    'Total Active Correct': 'mean',
    'Total Active in Top 1%': 'mean'
}).reset_index()

# Save the averages to a CSV file
df_avg_results.to_csv('model_avg_results.csv', index=False)  # File containing the results: Model, Run, EF1%, NEF1%, EF1% max, Total Active Correct, and Total Active in Top 1%

# Show the generated DataFrames
print("Results per model and run:")
print(df_results)
print("\nAverage results per model:")
print(df_avg_results)
