# BTC Fake - Training Completion Simulator

This notebook simulates employees completing training courses based on recommendations from the training recommender API.

## How it works:
1. Reads employee population from `actors/employees.csv`
2. For each employee, calls the recommendation API
3. Based on employee type (A, B, or F), completes training
4. Generates a ContentUserCompletion CSV file
5. Prints summary for each employee

In [1]:
import pandas as pd
import requests
from datetime import datetime, timedelta
import random
import string
from typing import List, Dict
import urllib3

# Disable SSL warnings when ignoring certificate verification
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

# Configuration
API_BASE_URL = "https://dataiku-api-devqa.lower.internal.sephora.com"
API_ENDPOINT = "/public/api/v1/mltr/v3/run"
EMPLOYEES_FILE = "actors/employees.csv"
OUTPUT_DIR = "generated_files"

In [None]:
def get_training_recommendations(employee_id: int) -> List[Dict]:
    """
    Call the training recommender API for a given employee.
    
    Args:
        employee_id: The employee's ID (ba_id)
    
    Returns:
        List of recommended training courses
    """
    url = f"{API_BASE_URL}{API_ENDPOINT}"
    payload = {"data": {"ba_id": employee_id}}
    
    try:
        # Disable SSL certificate verification for internal APIs
        response = requests.post(url, json=payload, timeout=30, verify=False)
        response.raise_for_status()
        data = response.json()
        
        # Response structure: {"response": {"ml_recommendations": [...], "coaching_note": {...}}, "timing": {...}, "apiContext": {...}}
        if isinstance(data, dict):
            response_data = data.get("response", {})
            if isinstance(response_data, dict):
                # Get ml_recommendations from nested response
                recommendations = response_data.get("ml_recommendations", [])
            else:
                # Response is directly a list
                recommendations = response_data if isinstance(response_data, list) else []
        else:
            print(f"  Unexpected response type: {type(data)}")
            return []
        
        # Print selected fields from API response
        if isinstance(recommendations, list) and recommendations:
            print(f"  API Response for employee {employee_id}:")
            for rec in recommendations:
                ba_id = rec.get("ba_id", "N/A")
                content_id = rec.get("recommended_content_id", "N/A")
                recommended_content = rec.get("recommended_content", "N/A")
                print(f"  {ba_id} | {content_id} | {recommended_content}")
            print()
        
        # Ensure we have a list
        if isinstance(recommendations, list):
            return recommendations
        else:
            print(f"  Recommendations is not a list: {type(recommendations)}")
            return []
            
    except Exception as e:
        print(f"  Error fetching recommendations for employee {employee_id}: {e}")
        return []

In [3]:
def format_content_id(content_id: int) -> str:
    """
    Format content ID with commas for human readability.
    Example: 1915085 -> "1,915,085"
    
    Args:
        content_id: The numeric content ID
    
    Returns:
        Formatted string with commas
    """
    return f"{content_id:,}"

def generate_training_times(num_courses: int) -> List[tuple]:
    """
    Generate start and completion times for training courses.
    Start time: 2 minutes ago
    Completion time: 1 minute ago
    
    Args:
        num_courses: Number of courses to generate times for
    
    Returns:
        List of (start_time, end_time) tuples in ISO-8601 format
    """
    times = []
    current_time = datetime.now()
    
    for _ in range(num_courses):
        # Start time: 2 minutes ago
        start_time = current_time - timedelta(minutes=2)
        
        # Completion time: 1 minute ago
        end_time = current_time - timedelta(minutes=1)
        
        times.append((
            start_time.strftime("%Y-%m-%dT%H:%M:%SZ"),
            end_time.strftime("%Y-%m-%dT%H:%M:%SZ")
        ))
    
    return times

In [4]:
def process_employee(employee_id: int, employee_type: str) -> List[Dict]:
    """
    Process a single employee: get recommendations and simulate completions.
    
    Args:
        employee_id: The employee's ID
        employee_type: The employee's type (a, b, or f)
    
    Returns:
        List of completed training records
    """
    employee_type = employee_type.lower().strip()
    recommendations = get_training_recommendations(employee_id)
    
    if not recommendations:
        print(f"  No recommendations for employee {employee_id}")
        return []
    
    # Determine how many courses to complete based on employee type
    if employee_type == 'a':
        # Type A: complete all assignments
        courses_to_complete = recommendations
    elif employee_type == 'b':
        # Type B: complete one assignment
        courses_to_complete = recommendations[:1]
    else:
        # Type F: complete no assignments
        courses_to_complete = []
    
    # Generate completion records
    completions = []
    times = generate_training_times(len(courses_to_complete))
    
    for i, course in enumerate(courses_to_complete):
        try:
            # Validate course is a dict
            if not isinstance(course, dict):
                print(f"  WARNING: Course is not a dict, it's {type(course)}: {course}")
                continue
            
            start_time, end_time = times[i]
            completions.append({
                "UserId": employee_id,
                "ContentId": format_content_id(course["recommended_content_id"]),
                "DateStarted": start_time,
                "DateCompleted": end_time,
                "CourseName": course.get("recommended_content", "Unknown")
            })
        except KeyError as e:
            print(f"  WARNING: Missing key {e} in course data: {course}")
            continue
        except Exception as e:
            print(f"  WARNING: Error processing course: {e}")
            continue
    
    return completions

In [5]:
def generate_output_filename() -> str:
    """
    Generate output filename with timestamp and random suffix.
    Format: ContentUserCompletion_V2_YY_MM_DD_1_RAND.csv
    
    Returns:
        Generated filename
    """
    now = datetime.now()
    year = now.strftime("%Y")
    month = now.strftime("%m")
    day = now.strftime("%d")
    
    # Generate 6-character random alphanumeric suffix
    random_suffix = ''.join(random.choices(string.ascii_lowercase + string.digits, k=6))
    
    return f"ContentUserCompletion_V2_{year}_{month}_{day}_1_{random_suffix}.csv"

In [6]:
# Main execution
print("=" * 80)
print("BTC Fake - Training Completion Simulator")
print("=" * 80)
print()

# Load employees
print(f"Loading employees from {EMPLOYEES_FILE}...")
employees_df = pd.read_csv(EMPLOYEES_FILE)
print(f"Loaded {len(employees_df)} employees\n")

# Process each employee
all_completions = []
employee_summaries = []

for _, employee in employees_df.iterrows():
    employee_id = employee['employee_id']
    employee_type = employee['employee_edu_type']
    
    print(f"Processing Employee {employee_id} (Type {employee_type.upper()})...")
    completions = process_employee(employee_id, employee_type)
    
    if completions:
        all_completions.extend(completions)
        # Store both ContentId and CourseName for summary
        course_details = [(c['ContentId'], c['CourseName']) for c in completions]
        employee_summaries.append((employee_id, course_details))
        print(f"  Completed {len(completions)} training(s)")
    else:
        print(f"  No training completed")
    print()

print("=" * 80)

BTC Fake - Training Completion Simulator

Loading employees from actors/employees.csv...
Loaded 7 employees

Processing Employee 104829 (Type A	)...
  API Response for employee 104829:
  104829 | 574327 | Sell.

  Completed 1 training(s)

Processing Employee 151557 (Type A)...
  API Response for employee 151557:
  151557 | 594097 | Flex Beauty Advisor Foundational Training

  Completed 1 training(s)

Processing Employee 75412 (Type A)...
  No recommendations for employee 75412
  No training completed

Processing Employee 312675 (Type A)...
  API Response for employee 312675:
  312675 | 604266 | Flex Beauty Advisor Foundational Training

  Completed 1 training(s)

Processing Employee 352205 (Type F)...
  No recommendations for employee 352205
  No training completed

Processing Employee 63492 (Type F)...
  No recommendations for employee 63492
  No training completed

Processing Employee 1 (Type A)...
  No recommendations for employee 1
  No training completed



In [7]:
# Generate output file
if all_completions:
    output_filename = generate_output_filename()
    output_path = f"{OUTPUT_DIR}/{output_filename}"
    
    # Create DataFrame with only the required columns for CSV
    output_df = pd.DataFrame(all_completions)
    output_df = output_df[['UserId', 'ContentId', 'DateStarted', 'DateCompleted']]
    
    # Write to CSV with proper quoting
    output_df.to_csv(output_path, index=False, quoting=1)  # quoting=1 means QUOTE_ALL
    
    print(f"Generated output file: {output_filename}")
    print(f"Total completions: {len(all_completions)}")
    print()
else:
    print("No training completions to write.")
    print()

Generated output file: ContentUserCompletion_V2_2026_01_02_1_85qrrj.csv
Total completions: 3



In [8]:
# Print summary
print("=" * 80)
print("EMPLOYEE TRAINING SUMMARY")
print("=" * 80)
print()

for employee_id, course_details in employee_summaries:
    # Format: "ContentId: CourseName"
    course_list = ", ".join([f"{content_id}: {course_name}" for content_id, course_name in course_details])
    print(f"Employee {employee_id}: {course_list}")

print()
print("=" * 80)
print("Simulation complete!")
print("=" * 80)

EMPLOYEE TRAINING SUMMARY

Employee 104829: 574,327: Servicing Multiple Clients
Employee 151557: 594,097: Sell. Three Ways to Sell
Employee 312675: 604,266: Give. Setting Expectations

Simulation complete!
