In [3]:
# DSC670
# Week 12
# Term Project - Milestone 4
# Author: Nathanael Ochoa
# 03/01/2025

# Project Milestone 4: Create Your Application to Finish the Project!

In [6]:
# Load necessary packages
import json
import nbformat
import os
import requests
import subprocess

import pandas as pd
import streamlit as st

from nbconvert import PythonExporter
from openai import OpenAI

The first step is to load the CSV file into a DataFrame and then convert it into a JSONL file:

In [24]:
# Load CSV file
input_file = 'DSC670 Project Data - Final.csv'
output_file = 'Complete Schedules - Final.jsonl'

In [25]:
# Read the CSV file into a DataFrame
df = pd.read_csv(input_file)

# Prepare the output data
output_data = []

# Define scheduling rules to embed in the prompt
scheduling_rule = """
Follow these scheduling rules:

- Breaks are 15 minutes long.
- Lunches are 45 minutes long.
- Minimize overlap between team member breaks and lunches as much as possible.
- Schedule breaks no earlier than 2 hours after clock-in.
- Schedule lunch no later than the 4th hour and 59th minute of work (lunch must occur before the 5th hour).
- For shifts that are 6 hours or more, schedule a 2nd break between the time the employee returns from lunch 
  and their shift end time.
- Do not schedule a break for a team member's last 15 minutes of their shift.
"""

# Group the data by the 'set' column (each 'set' represents a day)
grouped = df.groupby('set')

In [26]:
# No personal data is used in the data as can be seen below.
df.head()

Unnamed: 0,set,employee,shift start,shift end,1st break,lunch,2nd break
0,1,1,4:00 AM,11:45 AM,6:00 AM,8:00 AM,10:30 AM
1,1,2,4:00 AM,12:00 PM,6:15 AM,8:30 AM,10:45 AM
2,1,3,4:00 AM,12:00 PM,6:30 AM,8:45 AM,11:00 AM
3,1,4,5:00 AM,12:45 PM,7:30 AM,9:45 AM,11:30 AM
4,1,5,5:00 AM,11:30 PM,7:00 AM,9:00 AM,


In [28]:
# The set variable goes up to 53 which means I fine-tuned this model using 53 different work schedules.
df.tail(1)

Unnamed: 0,set,employee,shift start,shift end,1st break,lunch,2nd break
671,53,11,5:00 PM,9:00 PM,7:00 PM,,


In [6]:
# Process each group (i.e., each day) in the DataFrame to create the prompt-completion pairs
for set_number, group in grouped:
    # Start the message list
    messages = []

    # Add the system message to explain the task
    messages.append({
        "role": "system",
        "content": f"Generate a break and lunch schedule for the employees working on Day {set_number}. "
                   f"Ensure breaks are spaced by at least 2 hours."
    })
    
    # Add the user message with the shift details
    user_content = f"Here are the shift details for employees scheduled for Day {set_number}: "
    for index, row in group.iterrows():
        user_content += (f"Employee {row['employee']}: shift start {row['shift start']}, "
                         f"shift end {row['shift end']}. ")
    
    messages.append({
        "role": "user",
        "content": user_content
    })

    # Add the assistant message with the generated schedule (we can generate this programmatically or manually for now)
    assistant_content = "Here is the generated schedule for Day " + str(set_number) + ": "
    for index, row in group.iterrows():
        assistant_content += (f"Employee {row['employee']}: 1st break at {row['1st break']}, "
                              f"lunch at {row['lunch']}, 2nd break at {row['2nd break']}. ")

    messages.append({
        "role": "assistant",
        "content": assistant_content
    })
    
    # Append the messages to the output list
    output_data.append({"messages": messages})

# Write the new format data to a JSONL file
with open(output_file, 'w') as f:
    for item in output_data:
        f.write(json.dumps(item) + '\n')

print(f"Data has been converted and saved to {output_file}")

Data has been converted and saved to Complete Schedules - Final.jsonl


This is the file I will upload to OpenAI for fine-tuning. I will use the same code from my Week 7 assignment. The test code was provided by Professor F. Neugebauer and will be slightly modified to suit my needs.

In [7]:
# JSON file path
json_path = 'config.json'

# Load API key from the JSON file
with open(json_path, "r") as f:
    config = json.load(f)
    api_key = config['api_key']
    
# Client for API call
client = OpenAI(api_key = api_key)

Upload data:

In [8]:
# Set headers for API requests
headers = {
    'Authorization': f'Bearer {api_key}',
}

# Define file path
TRAINING_FILENAME = output_file  # Created earlier

# Upload the training dataset file to OpenAI
def upload_data(file_path):
    url = 'https://api.openai.com/v1/files'
    files = {
        'file': open(file_path, 'rb')
    }
    data = {
        'purpose': 'fine-tune',  # Define the purpose as fine-tuning
    }
    
    response = requests.post(url, headers = headers, files = files, data = data)
    return response.json()

# Upload the file
upload_response = upload_data(TRAINING_FILENAME)

# Display
print("Training file ID:", upload_response['id'])
print("Training file name:", upload_response['filename'])

Training file ID: file-PBPigQXhwP12FBoqWYtFmX
Training file name: Complete Schedules - Final.jsonl


Begin the fine-tuning job:

In [9]:
# Begin fine-tuning job
def fine_tune_model(file_id):
    url = 'https://api.openai.com/v1/fine_tuning/jobs'
    data = {
        'training_file': file_id,
        'model': 'gpt-4o-2024-08-06',  # My selected model
        'hyperparameters': {
            'n_epochs': 3
        }
    }

    response = requests.post(url, headers = headers, json = data)
    return response.json()

# Start fine-tuning with the uploaded file ID
file_id = upload_response['id']  # Get the file ID from the upload response
fine_tune_response = fine_tune_model(file_id)

# Display
print("Fine-tuning job ID:", fine_tune_response['id'])

Fine-tuning job ID: ftjob-Ce6wEeVM890sN9W13inziroK


List fine-tuning jobs:

In [13]:
# List all fine-tuning jobs
def list_fine_tuning_jobs():
    url = 'https://api.openai.com/v1/fine_tuning/jobs'
    response = requests.get(url, headers = headers)
    return response.json()

# Fetch and print all fine-tuning jobs
ft_jobs = list_fine_tuning_jobs()
if ft_jobs:
    for ft_job in ft_jobs['data']:  # 'data' contains the list of jobs
        print(f"Job ID: {ft_job['id']}, Status: {ft_job['status']}")

Job ID: ftjob-Ce6wEeVM890sN9W13inziroK, Status: running
Job ID: ftjob-3nnQ5MiTV8z3a7QfIlm0gykt, Status: succeeded
Job ID: ftjob-eU7v1NyfK7wdUlQCwZhmQv8N, Status: succeeded
Job ID: ftjob-jxJdni4yyo8t0Mj4tGwbs7DH, Status: succeeded
Job ID: ftjob-6cXyN9aNgSgz2Xgg3Lsk1B69, Status: failed
Job ID: ftjob-NQxL6YdMQuPehVB4wVELwnIB, Status: failed
Job ID: ftjob-VQMeTGDytEdLAjFjvQZ4aOqG, Status: succeeded
Job ID: ftjob-yaSW4h4qGAkQS484JbhLnV1T, Status: succeeded
Job ID: ftjob-wmeLdw7oc1kv34QwGpLRh2pX, Status: succeeded
Job ID: ftjob-VslpCd37dkypNYXz9n5Qg3Km, Status: succeeded
Job ID: ftjob-qw6K4qDF5dD4s6JG97D3z469, Status: succeeded
Job ID: ftjob-DnYEIE22YHVEeG8dqBc3V1g0, Status: succeeded
Job ID: ftjob-GMGi9ExD3oPkLGEZw8pdTDGd, Status: succeeded


Print the fine-tuned model name:

In [12]:
# Monitor the fine-tuning job status
def monitor_fine_tuning(job_id):
    url = f'https://api.openai.com/v1/fine_tuning/jobs/{job_id}'
    response = requests.get(url, headers = headers)
    return response.json()

# Monitor the fine-tuning job using the job ID
job_id = fine_tune_response['id']
status_response = monitor_fine_tuning(job_id)

# Check if the job has succeeded and print the fine-tuned model name and status
if status_response.get('status') == 'succeeded':
    print(f"model name: {MODEL}")
    print(f"status: succeeded")
else:
    print("model name: N/A")  # If not succeeded, no model name is shown (N/A)
    print(f"status: {status_response.get('status')}")

model name: N/A
status: running


In [14]:
# Save model name - note this model name isn't from the most recent fine-tuning job seen above. I realized after re-running
# the above code that there was no need to fine-tune another model after already having done it.
MODEL = 'ft:gpt-4o-2024-08-06:personal::B5GnWLsi'

In [16]:
# Add model name to JSON file
with open(json_path, "r") as f:
    config = json.load(f)

config['MODEL'] = MODEL  # Adds previusly saved model name

# Write updated data back to the JSON file
with open(json_path, 'w') as f:
    json.dump(config, f, indent = 4)

In [15]:
# Test code
completion = client.chat.completions.create(
  model = f"{MODEL}",  # Name from previous code chunk
  messages = [
    {"role": "system", "content": f"""
    You are a model trained to generate break and lunch schedules for employees based on their shift start and end times. 
    {scheduling_rule}
    """},
    {"role": "user", "content": """
    Here are the shift details for employees scheduled today:
        TM A: 4:00 AM - 11:00 AM
        TM B: 5:00 AM - 12:00 PM
        TM C: 6:00 AM - 1:00 PM
        TM D: 6:30 AM - 2:30 PM
        TM E: 7:00 AM - 2:00 PM
        TM F: 7:00 AM - 3:00 PM
        TM G: 9:00 AM - 5:00 PM
        TM H: 10:00 AM - 2:00 PM
        TM I: 10:00 AM - 6:00 PM
        TM J: 12:00 PM - 8:00 PM
        TM K: 12:00 PM - 8:00 PM
        TM L: 2:00 PM - 10:00 PM
        TM M: 3:15 PM - 10:00 PM
    
    Create a break and lunch schedule for every employee listed above. Use this format:

    | TM   | shift start | shift end | 1st break | lunch    | 2nd break | 
    |-------------------------------------------------------------------|
    | TM A | XX:XX XM    | XX:XX XM  | XX:XX XM  | XX:XX XM | XX:XX XM  |
    | TM B | XX:XX XM    | XX:XX XM  | XX:XX XM  | XX:XX XM | XX:XX XM  |
    
    If a lunch or 2nd break is not applicable leave blank.
    """}
  ],
  max_tokens = 750  # I had to increase this value since the full response was not being shown
)

# Display message content
print(completion.choices[0].message.content)


    | TM   | shift start | shift end | 1st break | lunch    | 2nd break |
    |-------------------------------------------------------------------|
    | TM A | 4:00 AM     | 11:00 AM  | 6:00 AM   | 9:00 AM  | 9:45 AM   |
    | TM B | 5:00 AM     | 12:00 PM  | 7:00 AM   | 10:00 AM | 10:45 AM  |
    | TM C | 6:00 AM     | 1:00 PM   | 8:00 AM   | 10:45 AM | 12:00 PM  |
    | TM D | 6:30 AM     | 2:30 PM   | 8:30 AM   | 11:15 AM | 1:30 PM   |
    | TM E | 7:00 AM     | 2:00 PM   | 9:15 AM   | 11:00 AM | 1:00 PM   |
    | TM F | 7:00 AM     | 3:00 PM   | 9:00 AM   | 11:30 AM | 1:45 PM   |
    | TM G | 9:00 AM     | 5:00 PM   | 11:15 AM  | 1:30 PM  | 3:45 PM   |
    | TM H | 10:00 AM    | 2:00 PM   | 12:15 PM  |          |           |
    | TM I | 10:00 AM    | 6:00 PM   | 12:00 PM  | 2:45 PM  | 4:45 PM   |
    | TM J | 12:00 PM    | 8:00 PM   | 2:00 PM   | 4:45 PM  | 6:45 PM   |
    | TM K | 12:00 PM    | 8:00 PM   | 2:15 PM   | 5:30 PM  | 6:00 PM   |
    | TM L | 2:00 PM     | 10:00 PM  

The test code above works perfectly and will be used in the following main() function, which will be saved to a .py file.

Export specific code to a .py file:

In [19]:
code_to_export = '''
import json
import requests
import streamlit as st
from openai import OpenAI 

# Streamlit UI code wrapped in main() function
def main():
    
    # JSON file path
    json_path = 'config.json'

    # Load API key from the JSON file
    with open(json_path, "r") as f:
        config = json.load(f)
        api_key = config['api_key']
        MODEL = config['MODEL']
    
    # Client for API call
    client = OpenAI(api_key = api_key)
    
    # Define scheduling rules to embed in the prompt
    scheduling_rule = """
    Follow these scheduling rules:

    - Breaks are 15 minutes long.
    - Lunches are 45 minutes long.
    - Minimize overlap between team member breaks and lunches as much as possible.
    - Schedule breaks no earlier than 2 hours after clock-in.
    - Schedule lunch no later than the 4th hour and 59th minute of work (lunch must occur before the 5th hour).
    - For shifts that are 6 hours or more, schedule a 2nd break between the time the employee returns from lunch 
      and their shift end time.
    - Do not schedule a break for a team member's last 15 minutes of their shift.
    - Lunches are only scheduled when the shift is longer than 5 hours.
    - Shifts that are 4 hours and 59 minutes long or less only get a single break.
    """
    
    # Streamlit UI
    st.set_page_config(page_title = "Employee Break/Lunch Schedule Generator", layout = "wide")
    
    # App title
    st.title("Employee Break/Lunch Schedule Generator")

    # User input for schedule 
    user_input = st.text_area("Enter Employee Schedule:")

    # Button to trigger the model call
    if st.button("Generate Schedule"):
        if user_input:
            # Call OpenAI API directly to generate schedule
            response = client.chat.completions.create(
                model = MODEL,  
                messages = [
                    {"role": "system", "content": f"""
                    You are a model trained to generate break and lunch schedules for employees based on their 
                    shift start and end times. 
                    {scheduling_rule}
                    """},
                    {"role": "user", "content": f"""
                    Here are the shift details for employees scheduled today:
                    {user_input}
    
                    Create a break and lunch schedule for every employee listed above. Use this format:

                    | TM   | shift start - shift end | 1st break | lunch    | 2nd break | 
                    | TM A | XX:XX XM - XX:XX XM  | XX:XX XM  | XX:XX XM | XX:XX XM  |
                    | TM B | XX:XX XM - XX:XX XM  | XX:XX XM  | XX:XX XM | XX:XX XM  |

                    If a lunch or 2nd break is not applicable enter 'NA'.
                    """}
                ],
                max_tokens = 1000
            )
            
            # Extract and display the generated schedule
            generated_schedule = response.choices[0].message.content
            st.subheader("Generated Break/Lunch Schedule:")
            
            # Dislay response line by line
            for line in generated_schedule.splitlines():
                st.write(line)
        else:
            st.error("Please enter the schedule details before generating.")

# Ensure Streamlit app runs only runs when executed as a script
if __name__ == "__main__":
    main()
'''

# Save the code to a .py file
name = "Employee Schedule App.py"

with open(name, 'w') as f:
    f.write(code_to_export)
    
print(f"Code exported to {name}.")

Code exported to 670PROJECT.py.


In [20]:
# Start Streamlit app
process = subprocess.Popen(['streamlit', 'run', 'Employee Schedule App.py'])
print("Starting Streamlit app - check browser window.")

In [21]:
# End Streamlit app
process.terminate()
print("Streamlit app stopped.")

Streamlit app stopped.
