In [None]:
import pandas as pd

def convert_csv_format(input_csv_path, output_csv_path):
    df = pd.read_csv(input_csv_path)
    
    total_duration = float(df['Timestamp'].iloc[0].split('/')[1])
    
    # Subtract 0.4 from dysfluency time to account for reaction time
    df['Timestamp'] = df['Timestamp'].apply(lambda x: f"{float(x.split('/')[0]) - 0.4}/{x.split('/')[1]}")
    
    num_chunks = int(total_duration // 3) + (1 if total_duration % 3 > 0 else 0)
    
    predictions = {f"chunk_{i}.wav": 1 for i in range(num_chunks)}
    
    for index, row in df.iterrows():
        dysfluent_time = float(row['Timestamp'].split('/')[0])
        chunk_index = int(dysfluent_time // 3)
        predictions[f"chunk_{chunk_index}.wav"] = 0
    
    predictions_df = pd.DataFrame(list(predictions.items()), columns=['ChunkName', 'Prediction'])
    
    predictions_df.to_csv(output_csv_path, index=False)

filename = 'MSLP - Andrea.csv'
convert_csv_format(f'Marks\Raw\{filename}', f'Marks\Processed+ReactionTime\{filename}')


In [None]:
import os
import pandas as pd

def merge_csv_files(directory):
    csv_files = [file for file in os.listdir(directory) if file.endswith('.csv')]
    predictions_dict = {}

    for file in csv_files:
        file_path = os.path.join(directory, file)
        df = pd.read_csv(file_path)
        
        df.set_index('ChunkName', inplace=True)
        
        predictions_dict[file] = df['Prediction']
    
    merged_df = pd.DataFrame(predictions_dict)
    merged_df.reset_index(inplace=True)
    merged_df.rename(columns={'index': 'ChunkName'}, inplace=True)
    
    merged_csv_path = os.path.join(directory, 'merged_predictions.csv')
    merged_df.to_csv(merged_csv_path, index=False)
    
    print(f"Merged predictions saved to: {merged_csv_path}")

merge_csv_files('Marks\Processed+ReactionTime')

In [5]:
import pandas as pd
csv_file_path = 'Marks\Processed+ReactionTime\merged_predictions.csv'
df = pd.read_csv(csv_file_path)

prediction_cols = df.columns[1:]

df['Average'] = df[prediction_cols].mean(axis=1).round().astype(int)

output_csv_path = csv_file_path.replace('.csv', '_with_average.csv')
df.to_csv(output_csv_path, index=False)

print(f"New CSV file with average column saved to: {output_csv_path}")

New CSV file with average column saved to: Marks\Processed+ReactionTime\merged_predictions_with_average.csv


In [9]:
import pandas as pd

def merge_csv_files(merged_csv_path, new_csv_path):
    merged_df = pd.read_csv(merged_csv_path)
    
    new_df = pd.read_csv(new_csv_path)    
    merged_df.set_index('ChunkName', inplace=True)
    new_df.set_index('ChunkName', inplace=True)
    
    merged_df = merged_df.join(new_df, how='outer', lsuffix='_merged', rsuffix='_new')    
    merged_df = merged_df.loc[:,~merged_df.columns.duplicated()]
    merged_df.reset_index(inplace=True)
    
    output_csv_path = merged_csv_path.replace('.csv', '_merged.csv')
    merged_df.to_csv(output_csv_path, index=False)
    
    print(f"Merged predictions saved to: {output_csv_path}")

merge_csv_files(r'Marks\Processed+ReactionTime\merged_predictions_with_average.csv', r'C:\Users\ojmar\Documents\Uni\Synoptic Project\StammerScore\ML Models\combined-and-filtered-strict-Binary-RandF-gpu-optimised\My Stuttering Life Podcast Presents - My Journey From PWS To PWSS\chunk_predictions.csv')


Merged predictions saved to: Marks\Processed+ReactionTime\merged_predictions_with_average_merged.csv
