In [43]:
import pandas as pd
import numpy as np
from collections import Counter

from io import StringIO
import zipfile
import os

from utils import score_aggregates

In [50]:
pred_csv = "/saved_models/experiment_1/predictions.csv" # Path to the predictions file
val_csv = "/data_2/df_task2_val_challenge.csv" # Provided validation set without labels during training phase 
val_gt = "/data_2/task2_groundtruth.csv" # Provided validation set with labels after the end of the developement phase

In [51]:
val_df = pd.read_csv(val_csv)
pred_df = pd.read_csv(pred_csv)
gt_df = pd.read_csv(val_gt)

In [52]:
merged_df = pd.merge(val_df, pred_df, on='case')
scores = score_aggregates(gt_df,merged_df)
print(scores, np.array(list(scores.values())).mean())

{'F1_score': 0.7203035060177917, 'Rk-correlation': 0.004134345683976752, 'Quadratic-weighted_Kappa': 0.013398849618260322, 'Specificity': 0.6666915643009497} 0.35113206640524464


In [28]:
# Define the custom function
def determine_new_column(row):
    count = Counter(row.to_list())
    # Get the score with the highest count
    #print(count)
    majority = count.most_common(1)[0][0]
    unique_values = set(row)  
    if unique_values == {1}:
        return 1
    elif unique_values == {0,1,2}:
        return np.random.choice([0, 2])
    elif unique_values == {0,1}:
        return 0
    elif unique_values == {1,2}:
        return 2
    else:
        return majority
        
# Apply the custom function to the specific columns of interest
merged_df['prediction'] = merged_df[['prediction_0', 'prediction_1', 'prediction_2']].apply(determine_new_column, axis=1)
scores = score_aggregates(gt_df,merged_df)
print(scores, np.array(list(scores.values())).mean())

{'F1_score': 0.6904761904761905, 'Rk-correlation': 0.0366900197207601, 'Quadratic-weighted_Kappa': 0.0658894556052877, 'Specificity': 0.6726727252179326} 0.3664320977550427


In [29]:
# Define a function to calculate the majority score based on the new rules
def calculate_majority_score(scores):
    total_scores = len(scores)
    count_ones = (scores == 1).sum() 
    percentage_ones = count_ones / total_scores
    
    if percentage_ones >= 0.80:
        return 1
    else:
        # Calculate majority score between 0 and 2
        count = Counter(scores)
        # Filter out count of 1
        count.pop(1, None)
        if count:
            return count.most_common(1)[0][0]
        else:
            # This case should not normally occur since it means there are no 0s or 2s
            return np.random.choice([0, 2])
val_df = pd.read_csv(val_csv)
pred_df = pd.read_csv(pred_csv+"/predictions.csv")
merged_df = pd.merge(val_df, pred_df, on='case')
merged_df['prediction'] = merged_df[['prediction_0', 'prediction_1', 'prediction_2']].apply(determine_new_column, axis=1)
# Group by 'ID' and calculate the majority score for each group
majority_scores = merged_df.groupby('LOCALIZER')['prediction'].apply(calculate_majority_score).reset_index()

# Rename the column to 'Majority_Score'
majority_scores.columns = ['LOCALIZER', 'Majority_Score']

# Merge the majority scores back to the original DataFrame
merged_df = pd.merge(merged_df, majority_scores, on='LOCALIZER')

# Replace the original 'Score' with the 'Majority_Score'
merged_df['prediction'] = merged_df['Majority_Score']

# Drop the 'Majority_Score' column as it's no longer needed
merged_df = merged_df.drop(columns=['Majority_Score'])
scores = score_aggregates(gt_df,merged_df)
print(scores, np.array(list(scores.values())).mean())

{'F1_score': 0.7051282051282052, 'Rk-correlation': 0.20336175332438666, 'Quadratic-weighted_Kappa': 0.2159489269530026, 'Specificity': 0.7219411078684427} 0.46159499831850925


In [88]:
merged_df.to_csv(pred_csv+'/predv2.csv',index=False)
zip = zipfile.ZipFile(pred_csv+'/predv7.zip', "w", zipfile.ZIP_DEFLATED)
zip.write(pred_csv+'/predv7.csv', arcname='/predv7.csv')
zip.close()