In [61]:
from pathlib import Path
import numpy as np
import pandas as pd

In [62]:
def post_process_w_exp_decay(df: pd.DataFrame, a: float, b: np.array, t: int) -> None:
    df['post_proba'] = np.nan
    df['post_pred'] = np.nan

    for i, row in df.iterrows():
        t_frame = row['proba_avg']
        t_minus_frames = df['proba_avg'].iloc[max(0, i - t):i].to_numpy()
        adjusted_b = b[:len(t_minus_frames)]

        post_proba = t_frame * a + np.dot(t_minus_frames, adjusted_b) if len(t_minus_frames) == t else t_frame
        df.at[i, 'post_proba'] = post_proba

        df.at[i, 'post_pred'] = 1 if post_proba > 0.5 else 0

In [63]:
def compute_rate(target_rate: float, t: int, verbose: bool = False) -> np.array:
    result = np.zeros(t)
    current_rate = target_rate
    for i in range(t - 1):
        current_rate = current_rate / 2
        if verbose: print(f'Current rate: {current_rate}')
        result[i] = current_rate
    # last rate is the difference between target, and what we currently have
    result[-1] = target_rate - np.sum(result)
    if verbose: print(f'Last rate: {result[-1]}')

    # Reverse the array so that higher rates come near the end
    result = np.flip(result)
    if verbose:
        print(f'Result: \n{result}')
        print(f'Total = {np.sum(result)}')
    return result

In [64]:
def run_post_processing(df, nb_periods, target_rate):
    # Only keep these columns from df
    frame_df = df[['frame', 'proba_avg', 'label']].copy()
    frame_df = frame_df.sort_values(by='frame')
    # Setup post-processing params
    b = compute_rate(target_rate, nb_periods)
    # Add post-processed probas
    post_process_w_exp_decay(frame_df, a=target_rate, b=b, t=nb_periods)

    return frame_df

### Params

In [65]:
nb_periods = 10
target_rate = 0.5

In [66]:
root_sources = Path('../output/benchmarking/sources')
output_dir = Path('../output/benchmarking/post_pro_sources')
output_dir.mkdir(parents=True, exist_ok=True)  

for model_dir in root_sources.iterdir():
    if model_dir.is_dir():
        for file_path in model_dir.glob('*.csv'):
            dataframe = pd.read_csv(file_path)
            post_processed_df = run_post_processing(dataframe, nb_periods=nb_periods, target_rate=target_rate)
            
            output_file = output_dir / model_dir.name / file_path.name
            output_file.parent.mkdir(parents=True, exist_ok=True) 
            post_processed_df.to_csv(output_file, index=False)
        

df -->
   frame  proba_avg  label
0      0   0.385331      0
1      1   0.336946      0
2      2   0.287288      0
3      3   0.250492      0
4      4   0.208195      0
rates: [0.00097656 0.00097656 0.00195312 0.00390625 0.0078125  0.015625
 0.03125    0.0625     0.125      0.25      ]
df -->
   frame  proba_avg  label
0      0   0.356864      0
1      1   0.342509      0
2      2   0.329155      0
3      3   0.308142      0
4      4   0.284622      0
rates: [0.00097656 0.00097656 0.00195312 0.00390625 0.0078125  0.015625
 0.03125    0.0625     0.125      0.25      ]
df -->
   frame  proba_avg  label
0      0   0.408699      0
1      1   0.412212      0
2      2   0.419161      0
3      3   0.425551      0
4      4   0.417779      0
rates: [0.00097656 0.00097656 0.00195312 0.00390625 0.0078125  0.015625
 0.03125    0.0625     0.125      0.25      ]
df -->
   frame  proba_avg  label
0      0   0.076617      0
1      1   0.076671      0
2      2   0.078661      0
3      3   0.077949     

In [67]:
q

NameError: name 'q' is not defined

In [None]:
root_sources = Path('../output/benchmarking/sources')
model = 'nnunet'
model_source = Path(root_sources / model)
video_id = 'id_x1.csv'
video_source = Path(model_source / video_id)

In [None]:
dataframe = pd.read_csv(video_source)

In [None]:
dataframe

In [None]:
nb_periods = 5
target_rate = 0.5

In [None]:
post_processed_df = run_post_processing(dataframe, nb_periods=nb_periods, target_rate=target_rate)

In [None]:
post_processed_df