In [3]:
import pandas as pd

# Load the data
users_df = pd.read_csv('../STAGING/staging_users.csv')  # Update the path if necessary
training_sessions_df = pd.read_csv('../STAGING/staging_training_sessions.csv')  # Update the path if necessary

# Merge training sessions with users to replace participant and trainer IDs with usernames
merged_df = training_sessions_df.merge(
    users_df[['username', '_id']], how='left', left_on='participant_user', right_on='_id'
).rename(columns={'username': 'participant_username'}).drop(columns=['_id'])

# Add trainer usernames
merged_df = merged_df.merge(
    users_df[['username', '_id']], how='left', left_on='trainer', right_on='_id'
).rename(columns={'username': 'trainer_username'}).drop(columns=['_id'])

# Calculate total score
merged_df['total_score'] = merged_df[
    ['participant_hackerRankScore', 'participant_assessmentScore', 
     'participant_performance', 'participant_communication']
].sum(axis=1)

merged_df['Max_Marks[10]'] = merged_df['total_score'] / 4

# Sort participants within each training session by total_score, hackerRankScore, and performance
merged_df = merged_df.sort_values(by=['training_code','Max_Marks[10]'], ascending=False)

# Identify the top 5 performer(s) by session
top_performers_df = merged_df.groupby('training_code').head(5)



# Select relevant columns for the report
top_performers_df = top_performers_df[[
    'training_code', 'trainer_username', 'participant_username', 
    'participant_hackerRankScore', 'participant_performance','participant_assessmentScore','participant_communication','Max_Marks[10]'
]]


# Save to CSV
top_performers_df.to_csv('top_5_performers_by_session_report.csv', index=False)

print("Top 5 performers report generated successfully as 'top_5_performers_by_session.csv'")


Top 5 performers report generated successfully as 'top_5_performers_by_session.csv'
