In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

def generate_bowdoin_2024_data():
    np.random.seed(2024)  # Fixed seed for reproducibility
    
    # Bowdoin-specific constants
    BASE_ENROLLMENT = 1881
    MEAL_PLAN_STUDENTS = 1600
    THORNE_CAPACITY = 350
    MOULTON_CAPACITY = 250
    
    # 2024 Academic Calendar
    fall_start, fall_end = datetime(2024,9,3), datetime(2024,12,20)
    spring_start, spring_end = datetime(2025,1,21), datetime(2025,5,23)
    
    def generate_semester_dates(start, end):
        dates = []
        current = start
        while current <= end:
            if current.weekday() < 5:  # Mon-Fri only
                dates.append(current)
            current += timedelta(days=1)
        return dates
    
    dates = generate_semester_dates(fall_start, fall_end) + generate_semester_dates(spring_start, spring_end)
    
    data = []
    
    for date in dates:
        # Enrollment adjustments
        enrollment = BASE_ENROLLMENT - 170 if date.month in [1,2,9,10] else BASE_ENROLLMENT
        month = date.month  # Extract month
        
        # Campus events
        is_thursday = date.weekday() == 3
        has_event = is_thursday and (np.random.random() < 0.3)
        
        # Exam periods
        is_exam = ((date.month == 12 and date.day >= 7) or 
                  (date.month == 5 and date.day >= 10))
        
        # Menu quality (Bowdoin's typical distribution)
        menu_quality = np.random.choice([1,2,3,4], p=[0.1,0.25,0.5,0.15])
        
        # Generate dinner swipes
        for interval in range(1, 13):
            time_str = (datetime.min + timedelta(hours=17, minutes=15*(interval-1))).strftime('%H:%M')
            
            # Base traffic patterns
            if interval in [5,6,7]:  # Peak dinner
                base_thorne = np.random.normal(240, 20)
                base_moulton = np.random.normal(200, 15)
            else:
                base_thorne = np.random.normal(130, 15)
                base_moulton = np.random.normal(100, 10)
            
            # Apply modifiers
            menu_factor = 1 + (menu_quality-1)*0.12
            event_factor = 0.65 if has_event else 1.0
            exam_factor = 0.75 if is_exam else 1.0
            
            thorne_swipes = min(THORNE_CAPACITY, 
                               int(base_thorne * menu_factor * event_factor * exam_factor * np.random.uniform(0.95,1.05)))
            moulton_swipes = min(MOULTON_CAPACITY, 
                                int(base_moulton * menu_factor * (0.85 if has_event else 1.0) * exam_factor * np.random.uniform(0.95,1.05)))
            
            # Append data with month column
            data.append([
                date.strftime('%Y-%m-%d'),
                time_str,
                interval,
                thorne_swipes,
                1,  # Thorne
                month,  # Month as separate column
                menu_quality,
                int(is_exam),
                enrollment,
                int(has_event),
                MEAL_PLAN_STUDENTS
            ])
            data.append([
                date.strftime('%Y-%m-%d'),
                time_str,
                interval,
                moulton_swipes,
                0,  # Moulton
                month,
                menu_quality,
                int(is_exam),
                enrollment,
                int(has_event),
                MEAL_PLAN_STUDENTS
            ])
    
    # Create DataFrame with Month column
    df = pd.DataFrame(data, columns=[
        "Date",
        "Time",
        "Time Interval",
        "Number of Swipes",
        "Dining Hall",
        "Month of the Year",  # New month column
        "Dining Menu Quality",
        "Exam/Reading Period or Weekend",
        "Number of Students on Campus",
        "Campus Events with Takeout Food",
        "Number of Students on Declining Balance"
    ])
    
    return df.sort_values(['Date', 'Time Interval']).reset_index(drop=True)

# Generate and save data
bowdoin_2024 = generate_bowdoin_2024_data()
bowdoin_2024.to_csv("data.csv", index=False)
print(f"Generated {len(bowdoin_2024)} records")
print("Month distribution:")
print(bowdoin_2024['Month of the Year'].value_counts().sort_index())

Generated 4032 records
Month distribution:
Month of the Year
1     216
2     480
3     504
4     528
5     408
9     480
10    552
11    504
12    360
Name: count, dtype: int64
