In [1]:
import pandas as pd

# Load the data
users_df = pd.read_csv('../STAGING/staging_users.csv')  # Adjust path if necessary
training_sessions_df = pd.read_csv('../STAGING/staging_training_sessions.csv')  # Adjust path if necessary

# Merge the two DataFrames to include user details in training sessions
merged_df = training_sessions_df.merge(users_df, how='left', left_on='participant_user', right_on='_id')

# Calculate Total Score and Mean Score
merged_df['total_score'] = merged_df[[
    'participant_hackerRankScore', 'participant_assessmentScore', 
    'participant_performance', 'participant_communication'
]].sum(axis=1)

merged_df['mean_score'] = merged_df['total_score'] / 4

# Define mapping of training codes to fields
fields_mapping = {
    'Full Stack Development': 'FS',
    'Data Science': 'DS',
    'Data Engineering': 'DE',
    'UI/UX': 'UI',
    'Software Testing': 'ST'
}

# Initialize a dictionary to hold counts
field_counts = {field: {'greater_than_4': 0, 'less_than_equal_4': 0} for field in fields_mapping.keys()}

# Count participants by field and mean score
for _, row in merged_df.iterrows():
    training_code = row['training_code']
    
    # Identify the field based on training code prefix
    for field, code in fields_mapping.items():
        if training_code.startswith(code):  # Adjusted to use startswith for better matching
            if row['mean_score'] > 4:
                field_counts[field]['greater_than_4'] += 1
            else:
                field_counts[field]['less_than_equal_4'] += 1
            break  # Break after finding the correct field

# Convert the counts to a DataFrame for better readability
counts_df = pd.DataFrame(field_counts).T.reset_index()
counts_df.columns = ['Field', 'Count > 4', 'Count ≤ 4']

# Save the report to a CSV file
counts_df.to_csv('field_participant_report.csv', index=False)

# Display the report
print(counts_df)


                    Field  Count > 4  Count ≤ 4
0  Full Stack Development        215         80
1            Data Science        283        114
2        Data Engineering        234        101
3                   UI/UX        245         92
4        Software Testing        209        107
