In [7]:
import random
import pandas as pd
from datetime import date, timedelta

def generate_student_schedule():
    # 1) Build date range from January 2nd to March 31st (non-leap year assumption)
    start_date = date(2024, 1, 2)  # Adjust year if needed
    end_date = date(2024, 3, 31)
    
    # Generate list of all dates in the period
    all_dates = []
    current_day = start_date
    while current_day <= end_date:
        all_dates.append(current_day)
        current_day += timedelta(days=1)
    
    num_days = len(all_dates)              # Total days in [Jan 2 .. Mar 31]
    num_students = 36
    needed_per_day = 6
    total_needed = needed_per_day * num_days  # Total on-duty slots over the entire period
    
    # 2) Figure out how many times each student should appear overall
    #    On average: total_needed / num_students
    avg_calls_per_student = total_needed / num_students  # Should be around 15 (for ~89 days)
    
    # Convert to integer "ideal" count per student
    # You can round, floor, or ceil as you see fit. We'll round here.
    calls_per_student = round(avg_calls_per_student)
    
    # 3) Create a list where each student is repeated calls_per_student times
    #    Then we'll remove or add to get the exact total_needed.
    schedule_list = []
    for student in range(1, num_students + 1):
        schedule_list.extend([student] * calls_per_student)
    
    # Check how many slots we have vs how many we actually need
    current_slots = len(schedule_list)
    
    if current_slots > total_needed:
        # If we have too many, randomly remove the extra
        excess = current_slots - total_needed
        # Randomly drop 'excess' students
        random_indices = random.sample(range(current_slots), excess)
        # We'll mark these indices for removal
        to_remove_set = set(random_indices)
        schedule_list = [s for i, s in enumerate(schedule_list) if i not in to_remove_set]
    elif current_slots < total_needed:
        # If we don't have enough, pad with random student picks
        shortage = total_needed - current_slots
        additional_students = random.choices(range(1, num_students + 1), k=shortage)
        schedule_list.extend(additional_students)
    
    # 4) Shuffle so that distribution across days is fairly random
    random.shuffle(schedule_list)
    
    # 5) Chunk the schedule_list into groups of 'needed_per_day' per day
    #    Each chunk corresponds to one day (6 students).
    #    This implicitly ensures no student is assigned more than once *in the same day*.
    schedule_by_day = []
    idx = 0
    for d in range(num_days):
        day_students = schedule_list[idx: idx + needed_per_day]
        idx += needed_per_day
        schedule_by_day.append(day_students)
    
    # 6) Construct the DataFrame:
    #    Each row = a day, each column = a student, 1 if that student is on duty, else 0
    df = pd.DataFrame(0, index=all_dates, columns=range(1, num_students + 1))
    for i, day in enumerate(all_dates):
        for student_on_duty in schedule_by_day[i]:
            df.loc[day, student_on_duty] = 1
    
    # Optionally, reset the index to have a nice Day 1, Day 2, ...
    # Or keep the dates as index. Here we keep the actual date.
    
    return df

if __name__ == "__main__":
    schedule_df = generate_student_schedule()
    display(schedule_df)
    # You can also write to CSV or Excel if needed:
    # schedule_df.to_csv("student_schedule.csv")


Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,27,28,29,30,31,32,33,34,35,36
2024-01-02,0,0,0,0,0,0,1,1,0,0,...,0,0,0,0,0,0,1,0,0,0
2024-01-03,0,0,1,1,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
2024-01-04,1,0,0,0,1,1,0,0,0,1,...,0,0,0,0,0,0,0,0,1,0
2024-01-05,0,0,0,1,0,0,1,0,0,0,...,0,0,0,0,0,0,0,1,1,0
2024-01-06,0,0,0,1,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-03-27,0,0,0,0,0,0,0,1,0,1,...,0,0,0,0,0,0,0,0,0,0
2024-03-28,1,0,0,0,0,0,1,1,0,0,...,1,0,0,0,1,0,0,0,0,0
2024-03-29,0,1,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2024-03-30,0,0,1,0,1,0,0,0,0,0,...,0,1,1,0,0,0,0,0,0,0


In [8]:
schedule_df.sum(axis=1)

2024-01-02    4
2024-01-03    6
2024-01-04    6
2024-01-05    5
2024-01-06    6
             ..
2024-03-27    6
2024-03-28    6
2024-03-29    6
2024-03-30    6
2024-03-31    6
Length: 90, dtype: int64

In [1]:
import random
import pandas as pd
from datetime import date, timedelta

def generate_student_schedule():
    """
    Generates a schedule from January 2nd to March 31st (2024 is a leap year, 
    so February has 29 days).
    Each day has exactly 6 distinct students on duty.
    The distribution of duty is kept as balanced as possible by always 
    picking from the students with the fewest assignments so far.
    Returns a Pandas DataFrame with rows=days, columns=students, and 1/0 entries.
    """
    
    # --- 1) Build date range ---
    start_date = date(2025, 1, 2)
    end_date = date(2025, 3, 31)
    
    all_dates = []
    current_day = start_date
    while current_day <= end_date:
        all_dates.append(current_day)
        current_day += timedelta(days=1)
    
    num_days = len(all_dates)      # Total days
    num_students = 36
    needed_per_day = 6
    
    # Create a DataFrame of zeros
    df = pd.DataFrame(
        0,
        index=all_dates,
        columns=range(1, num_students + 1)
    )
    
    # --- 2) Track how many times each student has been assigned so far ---
    assigned_counts = {student: 0 for student in range(1, num_students + 1)}
    
    # --- 3) For each day, pick 6 distinct students with the fewest assignments ---
    for day in all_dates:
        # Sort by assigned_counts (ascending), then shuffle among ties
        #  so that if multiple students have the same assigned_count,
        #  we pick them randomly among that group.
        sorted_students = sorted(
            assigned_counts.keys(),
            key=lambda s: (assigned_counts[s], random.random())
        )
        
        # The first 6 students in this sorted list are the ones with the fewest assignments
        chosen_students = sorted_students[:needed_per_day]
        
        # Update assigned_counts and fill the DataFrame
        for student in chosen_students:
            assigned_counts[student] += 1
            df.loc[day, student] = 1
    
    return df

if __name__ == "__main__":
    schedule_df = generate_student_schedule()
    display(schedule_df)
    # Verify row sums are always 6:
    # print(schedule_df.sum(axis=1).value_counts())
    
    # Optionally write to a CSV file:
    schedule_df.to_csv("student_schedule.csv")


Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,27,28,29,30,31,32,33,34,35,36
2025-01-02,0,0,0,0,1,0,1,1,0,0,...,1,0,0,0,0,1,0,1,0,0
2025-01-03,0,1,1,0,0,0,0,0,0,0,...,0,1,0,0,1,0,0,0,0,0
2025-01-04,0,0,0,0,0,1,0,0,0,0,...,0,0,0,1,0,0,1,0,0,0
2025-01-05,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2025-01-06,1,0,0,1,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-03-27,0,0,0,0,0,0,0,0,1,0,...,1,1,0,0,0,0,0,0,0,1
2025-03-28,0,0,0,0,0,0,0,0,0,1,...,0,0,0,1,0,0,0,1,0,0
2025-03-29,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,1,0,1,0
2025-03-30,1,0,1,0,1,0,1,0,0,0,...,0,0,0,0,0,1,0,0,0,0


In [2]:
print(schedule_df.sum(axis=1).value_counts())
print(schedule_df.sum(axis=0).value_counts())

6    89
Name: count, dtype: int64
15    30
14     6
Name: count, dtype: int64
