In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
import os
from collections import defaultdict # For easier subject counts
import operator
from tqdm.notebook import tqdm

# Optional: for better display in Jupyter
from IPython.display import display

Generate how likely a student is to go clubbing and socialise

In [2]:
#Load core friendship groups from file
students_df = pd.read_csv('students_groups_final.csv')
subject_effects_df = pd.read_csv('Subjecteffects.csv')

In [4]:
# --- Step 1: Prepare the Subject Modifiers ---

# Clean up and rename the columns from Subjecteffects.csv
subject_effects_df.rename(columns={
    'Social Propensity Value 5-9 (baseline 7)': 'Social_Propensity_Base',
    'Club Propensity Value (baseline 7)': 'Club_Propensity_Base'
}, inplace=True)

# Calculate the modifier by comparing the subject's base to the overall baseline of 7
subject_effects_df['Social_Modifier'] = subject_effects_df['Social_Propensity_Base'] - 7
subject_effects_df['Club_Modifier'] = subject_effects_df['Club_Propensity_Base'] - 7

# Select only the columns we need for the merge
subject_modifiers_to_merge = subject_effects_df[['Subject', 'Social_Modifier', 'Club_Modifier']]

# --- Step 2: Merge data ---
merged_df = pd.merge(students_df, subject_modifiers_to_merge, on='Subject', how='left')


# --- Step 3: Define new randomized calculation functions ---

def calculate_random_social_propensity(row):
    # Start with a random integer base for each person
    propensity = np.random.randint(3, 9)  # Random integer from 3 to 8

    # Add the pre-calculated subject modifier
    propensity += row['Social_Modifier']

    # Adjust for year group
    if row['Year'] == 1:
        propensity += 2
    elif row['Year'] == 2:
        propensity += 1
    elif row['Year'] == 3:
        propensity -= 1
    else:  # Postgraduate
        propensity -= 2

    # Adjust for group size
    if row['Group Size'] > 5:
        propensity += 1
    elif row['Group Size'] < 3:
        propensity -= 1

    # Ensure propensity is within 1-10 range
    return int(max(1, min(10, propensity)))

def calculate_random_club_propensity(row):
    # Start with a random integer base for each person
    propensity = np.random.randint(3, 9)  # Random integer from 3 to 8

    # Add the pre-calculated subject modifier
    propensity += row['Club_Modifier']

    # Adjust for year group
    if row['Year'] == 1:
        propensity += 3
    elif row['Year'] == 2:
        propensity += 1
    elif row['Year'] == 3:
        propensity -= 2
    else:  # Postgraduate
        propensity -= 4

    # Adjust for group size
    if row['Group Size'] > 4:
        propensity += 1
    elif row['Group Size'] < 3:
        propensity -= 1

    # Ensure propensity is within 1-10 range
    return int(max(1, min(10, propensity)))

# --- Step 4: Apply the functions and save ---
# Note: The results will be different each time this code is run due to the randomness
merged_df['Social Propensity'] = merged_df.apply(calculate_random_social_propensity, axis=1)
merged_df['Club Propensity'] = merged_df.apply(calculate_random_club_propensity, axis=1)
merged_df['StudentID'] = range(0, len(merged_df))
# Drop the helper columns
final_df = merged_df.drop(columns=['Social_Modifier', 'Club_Modifier'])

# Save the final updated dataframe
final_df.to_csv('students_groups_social.csv', index=False)

print("Propensities successfully updated with a degree of randomness.")
print("The final data has been saved to 'students_groups_social.csv'")
print(final_df.head())

Propensities successfully updated with a degree of randomness.
The final data has been saved to 'students_groups_social.csv'
    College              Subject  Year  Social Propensity  Club Propensity  \
0  Christ's  Architecture Tripos     1                 10                7   
1  Christ's  Architecture Tripos     1                 10               10   
2  Christ's  Architecture Tripos     2                  8                6   
3  Christ's  Architecture Tripos     2                  9                9   
4  Christ's  Architecture Tripos     3                  3                4   

   Disease State  Adapted Group Size  Gender  Assigned  Group ID  Group Size  \
0              0                   7  Female      True       1.0           7   
1              0                   6  Female      True       2.0           6   
2              0                   5  Female      True       3.0           5   
3              0                   6  Female      True      32.0           8   
4     