In [6]:
# Import necessary libraries 
import pandas as pd
import numpy as np
import random
from typing import List

# Initialize random seeds
np.random.seed(42)
random.seed(42)


In [7]:
# Configuration dictionary 
CONFIG = {
    "initial_num_users": 500,
    "goal_options": ["weight_loss", "muscle_gain", "rehabilitation", "flexibility", "general_fitness"],
    "fitness_levels": ["beginner", "intermediate", "advanced"],
    "average_interactions_per_user": 5
}

In [8]:
# Load trainer data
trainer_data_path = "/Users/nidhidudeja/Desktop/Ranking_Algorithm_Code/Trainers_ Data.xlsx"
trainers_df = pd.read_excel(trainer_data_path)
trainers_df["trainer_id"] = trainers_df.index + 1  

In [9]:
# -----------------------
# OOP DESIGN: Dynamic System
# -----------------------

class User:
    def __init__(self, user_id: int, config: dict):
        self.user_id = user_id
        self.age = random.randint(18, 60)
        self.gender = random.choice(["male", "female"])
        self.goal = random.choice(config["goal_options"])
        self.fitness_level = random.choice(config["fitness_levels"])

    def to_dict(self):
        return {
            "user_id": self.user_id,
            "age": self.age,
            "gender": self.gender,
            "goal": self.goal,
            "fitness_level": self.fitness_level
        }

class InteractionSimulator:
    def __init__(self, trainers: pd.DataFrame, config: dict):
        self.trainers = trainers
        self.config = config

    def simulate_weekly_interactions(self, users: List[User], week_id: int) -> pd.DataFrame:
        all_interactions = []
        for user in users:
            num_interactions = np.random.poisson(lam=self.config["average_interactions_per_user"])
            if num_interactions == 0:
                continue

            sampled_trainers = self.trainers.sample(min(num_interactions, len(self.trainers)))
            for _, trainer in sampled_trainers.iterrows():
                goal_match = user.goal.replace("_", " ") in str(trainer["specialities"]).lower()

                impressions = random.randint(1, 5)
                clicks = np.random.binomial(impressions, 0.6 if goal_match else 0.25)
                video_views = clicks if clicks > 0 else np.random.binomial(impressions, 0.2)
                avg_watch_time = round(np.random.uniform(0.6, 1.0), 2) if clicks else round(np.random.uniform(0.0, 0.5), 2)
                likes = np.random.binomial(video_views, 0.3) if video_views else 0
                label = int(clicks > 0 and avg_watch_time > 0.5)

                interaction = {
                    "user_id": user.user_id,
                    "trainer_id": trainer["trainer_id"],
                    "week_id": week_id,
                    "impressions": impressions,
                    "clicks": clicks,
                    "video_views": video_views,
                    "avg_watch_time": avg_watch_time,
                    "likes": likes,
                    "label": label
                }
                all_interactions.append(interaction)
        return pd.DataFrame(all_interactions)

In [10]:
# Generate users once
users = [User(user_id=i+1, config=CONFIG) for i in range(CONFIG["initial_num_users"])]
users_df = pd.DataFrame([u.to_dict() for u in users])

# Simulate for weeks 1 to 4 and combine
simulator = InteractionSimulator(trainers_df, CONFIG)
weekly_dfs = []
for week in range(1, 5):
    weekly_df = simulator.simulate_weekly_interactions(users, week_id=week)
    weekly_dfs.append(weekly_df)

all_weeks_df = pd.concat(weekly_dfs, ignore_index=True)



In [11]:
all_weeks_df.to_csv("All_weeks_Interactions.csv",index=False)

In [12]:
users_df.to_csv("Users.csv", index=False)