# Testing
In diesem Notebook werden die Tests für die Anforderungen durchgeführt.

## Testing locations

### Prompt 1

In [121]:
import openai
import csv

class OpenAIWrapper:
    def __init__(self, api_key):
        self.api_key = api_key
        openai.api_key = self.api_key

    def generate_chat_response(self, prompt):
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "Please analyze the two film production schedules below. Specifically, assess the spatial sequence of locations in each schedule. Identify if one schedule minimizes travel distance between scenes more effectively than the other by calculating the ditsances bezween scenes in the given sequence and discuss how this might impact production logistics and efficiency."},
                {"role": "user", "content": prompt}
            ]
        )
        return response.choices[0].message['content']

def format_schedule(schedule_entries):
    formatted_schedule = ""
    for entry in sorted(schedule_entries, key=lambda x: x[2]):  # Sort by Scene Number which is the third item in each row (zero-indexed)
        scene_number, actors, equipment, location, duration = entry[2], entry[3], entry[4], entry[5], entry[6]
        formatted_schedule += f"- Scene {scene_number} (Actors: {actors}, Equipment: {equipment}, Location: {location}, Duration: {duration})\n"
    return formatted_schedule

def create_prompt(pair_schedules):
    prompt = "Here are two different film production schedules:\n\n"
    for schedule_type, entries in pair_schedules.items():
        prompt += f"{schedule_type}:\n{format_schedule(entries)}\n"
    return prompt

def analyze_schedules(filename, api_key):
    with open(filename, newline='') as csvfile:
        reader = csv.reader(csvfile, delimiter=',')
        next(reader)  
        pair_schedules = {}
        for row in reader:
            pair_id = row[0]
            schedule_type = row[1]
            if pair_id not in pair_schedules:
                pair_schedules[pair_id] = {}
            if schedule_type not in pair_schedules[pair_id]:
                pair_schedules[pair_id][schedule_type] = []
            pair_schedules[pair_id][schedule_type].append(row)

    openai_wrapper = OpenAIWrapper(api_key)
    results = {}

    for pair_id, schedules in pair_schedules.items():
        prompt = create_prompt(schedules)
        response = openai_wrapper.generate_chat_response(prompt)
        results[pair_id] = response

    return results

# OpenAI API key
api_key = ''
filename = 'data/testdata/testdata_location.csv'

results = analyze_schedules(filename, api_key)
for pair_id, insights in results.items():
    print(f"GPT-3's Insights for Pair ID {pair_id}: {insights}")


GPT-3's Insights for Pair ID 1: In Schedule Plan 1, the spatial sequence of locations is as follows:

- First, there are three consecutive scenes in Zürich.
- Then, the production moves to Altstetten for Scene 2.
- From Altstetten, the crew travels to Seebach, then back to Zürich Enge for Scene 2.
- Following Zürich Enge, the crew heads to Zürich Hardbrücke for Scene 3.
- The crew then goes to Affoltern, before returning to Zürich Hardbrücke for Scene 3.
- Next, the crew travels to Zürich Bahnhofstrasse for Scene 4.
- After Zürich Bahnhofstrasse, the crew goes to Wipkingen for Scene 4.
- Finally, the last two scenes take place in Wollishofen and Schwamendingen, respectively.

In Schedule Plan 2, the spatial sequence of locations is as follows:

- First, the crew heads to Wollishofen for Scene 1.
- Then, they travel to Seebach for Scene 1 and Oerlikon for Scene 1.
- Next, the crew goes to Schlieren for Scene 2.
- After Schlieren, they travel to Altstetten for Scene 2 and back to Schlier

### Prompt 2

In [122]:
import openai
import csv
import math

class OpenAIWrapper:
    def __init__(self, api_key):
        self.api_key = api_key
        openai.api_key = self.api_key

    def generate_chat_response(self, prompt):
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "Please analyze the two film production schedules below. Specifically, assess the spatial sequence of locations in each schedule. Identify if one schedule minimizes travel distance between scenes more effectively than the other by calculating the ditsances bezween scenes in the given sequence and discuss how this might impact production logistics and efficiency."},
                {"role": "user", "content": prompt}
            ]
        )
        return response.choices[0].message['content']

def calculate_distance(coord1, coord2):
    return math.sqrt((float(coord1[0]) - float(coord2[0]))**2 + (float(coord1[1]) - float(coord2[1]))**2)

def format_schedule(schedule_entries):
    formatted_schedule = ""
    for entry in schedule_entries:
        if len(entry) >= 9:
            pair_id, schedule_type, order_number, scene_number, actors, equipment, location, coordinates, duration = entry
            formatted_schedule += f"- Order {order_number}, Scene {scene_number} (Actors: {actors}, Equipment: {equipment}, Location: {location}, Coordinates: {coordinates}, Duration: {duration})\n"
        else:
            print(f"Skipping entry due to insufficient data: {entry}")
    return formatted_schedule

def create_prompt(pair_schedules):
    prompt = "Here are two different film production schedules. For each schedule, evaluate the order of scenes based on the travel distance between their locations. Identify which schedule minimizes travel distance between scenes more effectively and suggest which one is more efficient based on location sequence:\n\n"
    for schedule_type, entries in pair_schedules.items():
        prompt += f"{schedule_type}:\n{format_schedule(entries)}\n"
    return prompt

def analyze_schedules(filename, api_key):
    with open(filename, newline='') as csvfile:
        reader = csv.reader(csvfile, delimiter=',')
        next(reader) 
        pair_schedules = {}
        for row in reader:
            pair_id = row[0]
            schedule_type = row[1]
            if pair_id not in pair_schedules:
                pair_schedules[pair_id] = {}
            if schedule_type not in pair_schedules[pair_id]:
                pair_schedules[pair_id][schedule_type] = []
            pair_schedules[pair_id][schedule_type].append(row)

    openai_wrapper = OpenAIWrapper(api_key)
    results = {}

    for pair_id, schedules in pair_schedules.items():
        prompt = create_prompt(schedules)
        response = openai_wrapper.generate_chat_response(prompt)
        results[pair_id] = response

    return results

# OpenAI API key
api_key = ''
filename = 'data/testdata/testdata_location.csv'

results = analyze_schedules(filename, api_key)
for pair_id, insights in results.items():
    print(f"GPT-3's Insights for Pair ID {pair_id}: {insights}")


GPT-3's Insights for Pair ID 1: To compare the two film production schedules (Schedule Plan 1 and Schedule Plan 2) in terms of minimizing travel distance between scenes, we need to analyze the spatial sequence of locations for each pair of scenes based on their order numbers in each schedule. We will calculate the distances between consecutive locations to determine which schedule is more efficient in terms of minimizing travel distance. 

Let's analyze each Pair ID from the two schedules:
1. Pair ID: Scene 1 to Scene 2
   - For Schedule Plan 1: 
     - Scene 1 at Schlieren, Coordinates: (47.3962, 8.4515)
     - Scene 2 at Altstetten, Coordinates: (47.3914, 8.4886)
     - Distance between Schlieren and Altstetten: 3.79 km
   - For Schedule Plan 2:
     - Scene 1 at Schlieren, Coordinates: (47.3962, 8.4515)
     - Scene 2 at Seebach, Coordinates: (47.4223, 8.5414)
     - Distance between Schlieren and Seebach: 9.4 km
   - Conclusion: Schedule Plan 1 minimizes travel distance between Sce

## Testing Actors and Equipment

### Prompt 1

In [89]:
import openai
import csv

class OpenAIWrapper:
    def __init__(self, api_key):
        self.api_key = api_key
        openai.api_key = self.api_key

    def generate_chat_response(self, prompt):
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "Please analyze the two film production schedules below. Specifically, assess the sequence of scenes in each schedule based on the continuity of actors and equipment. Determine which schedule is more suitable by checking how actors and equipment appear in consecutive scenes rather than being scattered across all scenes. Identify which schedule minimizes the distribution of actors and equipment between scenes more effectively."},
                {"role": "user", "content": prompt}
            ]
        )
        return response.choices[0].message['content']

def format_schedule(schedule_entries):
    formatted_schedule = ""
    for entry in sorted(schedule_entries, key=lambda x: x[2]):  # Sort by Scene Number which is the third item in each row (zero-indexed)
        scene_number, actors, equipment, location, duration = entry[2], entry[3], entry[4], entry[5], entry[6]
        formatted_schedule += f"- Scene {scene_number} (Actors: {actors}, Equipment: {equipment}, Location: {location}, Duration: {duration})\n"
    return formatted_schedule

def create_prompt(pair_schedules):
    prompt = "Here are two different film production schedules:\n\n"
    for schedule_type, entries in pair_schedules.items():
        prompt += f"{schedule_type}:\n{format_schedule(entries)}\n"
    return prompt

def analyze_schedules(filename, api_key):
    with open(filename, newline='') as csvfile:
        reader = csv.reader(csvfile, delimiter=',')
        next(reader)  # Skip header
        pair_schedules = {}
        for row in reader:
            pair_id = row[0]
            schedule_type = row[1]
            if pair_id not in pair_schedules:
                pair_schedules[pair_id] = {}
            if schedule_type not in pair_schedules[pair_id]:
                pair_schedules[pair_id][schedule_type] = []
            pair_schedules[pair_id][schedule_type].append(row)

    openai_wrapper = OpenAIWrapper(api_key)
    results = {}

    for pair_id, schedules in pair_schedules.items():
        prompt = create_prompt(schedules)
        response = openai_wrapper.generate_chat_response(prompt)
        results[pair_id] = response

    return results

# OpenAI API key
api_key = ''
filename = 'data/testdata/testdata_actors_equipment.csv'

results = analyze_schedules(filename, api_key)
for pair_id, insights in results.items():
    print(f"GPT-3's Insights for Pair ID {pair_id}: {insights}")


GPT-3's Insights for Pair ID 2: In evaluating the two film production schedules provided, we need to consider the efficiency of the sequence of scenes based on the continuity of actors and equipment, which can impact the ease of production and reduce logistical efforts. 

In Schedule Plan 1, the scenes are arranged as follows:
1. Scene 1 with Laura Neumann, Peter Schmidt, and specific equipment at München Marienplatz.
2. Scene 2 with the same actors and additional equipment at Olympiapark München.
3. Scene 3 with different actors and equipment at Englischer Garten.

In this schedule, Scene 2 disrupts the continuity by introducing different actors and additional equipment not present in Scene 1, complicating the logistics of setting up and coordinating the different elements required. This distribution of actors and equipment across scenes reduces efficiency due to the disjointed nature of the sequencing.

On the other hand, Schedule Plan 2 offers the following sequence:
1. Scene 1 with

### Prompt 2

In [119]:
import openai
import csv

class OpenAIWrapper:
    def __init__(self, api_key):
        self.api_key = api_key
        openai.api_key = self.api_key

    def generate_chat_response(self, prompt):
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": (
                    "Please analyze the two film production schedules below. Specifically, assess the sequence of scenes in each schedule based on the continuity of actors and equipment. "
                    "Determine which schedule is more suitable by checking how actors and equipment appear in consecutive scenes rather than being scattered across all scenes. "
                    "The schedule that minimizes the distribution of actors and equipment between scenes is considered more efficient because it reduces the time and logistical efforts needed to change setups, move equipment, and coordinate actors. "
                    "Identify which schedule achieves this efficiency better by ensuring that scenes with common actors and/or equipment follow each other consecutively. Provide detailed reasons for your conclusion. "
                    "1. Review each schedule and list the sequence of actors and equipment for each scene. "
                    "2. Check for continuity by noting if the same actors and equipment appear in consecutive scenes. "
                    "3. Highlight any breaks in continuity where actors or equipment change between scenes. "
                    "4. Identify which schedule has fewer breaks in continuity, indicating a more efficient arrangement. "
                    "5. Conclude which schedule is more efficient based on the above criteria, providing specific examples from the schedules to support your reasoning."
                )},
                {"role": "user", "content": prompt}
            ]
        )
        return response.choices[0].message['content']

def format_schedule(schedule_entries):
    formatted_schedule = ""
    for entry in sorted(schedule_entries, key=lambda x: x[2]):  # Sort by Order Number which is the third item in each row (zero-indexed)
        order_number, scene_number, actors, equipment, location, duration = entry[2], entry[3], entry[4], entry[5], entry[6], entry[7]
        formatted_schedule += f"- Order {order_number}, Scene {scene_number}: Actors: {actors}, Equipment: {equipment}, Location: {location}, Duration: {duration}\n"
    return formatted_schedule

def create_prompt(pair_schedules):
    prompt = "Here are two different film production schedules:\n\n"
    for schedule_type, entries in pair_schedules.items():
        prompt += f"{schedule_type}:\n{format_schedule(entries)}\n"
    return prompt

def analyze_schedules(filename, api_key):
    with open(filename, newline='') as csvfile:
        reader = csv.reader(csvfile, delimiter=',')
        next(reader)  # Skip header
        pair_schedules = {}
        for row in reader:
            pair_id = row[0]
            schedule_type = row[1]
            if pair_id not in pair_schedules:
                pair_schedules[pair_id] = {}
            if schedule_type not in pair_schedules[pair_id]:
                pair_schedules[pair_id][schedule_type] = []
            pair_schedules[pair_id][schedule_type].append(row)

    openai_wrapper = OpenAIWrapper(api_key)
    results = {}

    for pair_id, schedules in pair_schedules.items():
        prompt = create_prompt(schedules)
        response = openai_wrapper.generate_chat_response(prompt)
        results[pair_id] = response

    return results

# OpenAI API key
api_key = ''
filename = 'data/testdata/testdata_actors_equipment.csv'

results = analyze_schedules(filename, api_key)
for pair_id, insights in results.items():
    print(f"GPT-3's Insights for Pair ID {pair_id}: {insights}")


GPT-3's Insights for Pair ID 31: Analyzing the two film production schedules:

In Schedule Plan 1:
Scene 1: Actors Morgan Lee and Casey Johnson with Equipment Drone J and Camera D
Scene 2: Actors Jordan Davis and Skyler Taylor with Equipment Drone J and Camera D
Scene 3: Actors Casey Johnson and Jamie Taylor with Equipment Drone J and Camera D
Scene 4: Actors Skyler Taylor and Morgan Lee with Equipment Drone J and Camera D
Scene 5: Actors Jamie Taylor and Jordan Davis with Equipment Drone J and Camera D

In Schedule Plan 2:
Scene 1: Actors Morgan Lee and Casey Johnson with Equipment Drone J and Camera D
Scene 5: Actors Jamie Taylor and Jordan Davis with Equipment Drone J and Camera D
Scene 2: Actors Jordan Davis and Skyler Taylor with Equipment Drone J and Camera D
Scene 4: Actors Skyler Taylor and Morgan Lee with Equipment Drone J and Camera D
Scene 3: Actors Casey Johnson and Jamie Taylor with Equipment Drone J and Camera D

Analyzing the continuity of actors and equipment:
- In Sche

## Testing actors/equipment AND locations

### Prompt 1

In [123]:
import openai
import csv

class OpenAIWrapper:
    def __init__(self, api_key):
        self.api_key = api_key
        openai.api_key = self.api_key

    def generate_chat_response(self, prompt):
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "Please analyze the two film production schedules below. Specifically, assess the sequence of scenes in each schedule based on the continuity of actors, equipment, and location proximity. "
                "Determine which schedule is more suitable by checking how actors, equipment, and locations appear in consecutive scenes rather than being scattered across all scenes. "
                "The schedule that minimizes the distribution of actors, equipment, and location changes between scenes is considered more efficient because it reduces the time and logistical efforts needed to change setups, move equipment, and coordinate actors. "
                "Identify which schedule achieves this efficiency better by ensuring that scenes with common actors and/or equipment follow each other consecutively and that scenes at nearby locations are also in sequence. Provide detailed reasons for your conclusion."},
                {"role": "user", "content": prompt}
            ]
        )
        return response.choices[0].message['content']

def format_schedule(schedule_entries):
    formatted_schedule = ""
    for entry in sorted(schedule_entries, key=lambda x: x[2]):  # Sort by Order Number which is the third item in each row (zero-indexed)
        order_number, scene_number, actors, equipment, location, coordinates, duration = entry[2], entry[3], entry[4], entry[5], entry[6], entry[7], entry[8]
        formatted_schedule += f"- Order {order_number}, Scene {scene_number}: Actors: {actors}, Equipment: {equipment}, Location: {location} (Coordinates: {coordinates}), Duration: {duration}\n"
    return formatted_schedule

def create_prompt(pair_schedules):
    prompt = "Here are two different film production schedules:\n\n"
    for schedule_type, entries in pair_schedules.items():
        prompt += f"{schedule_type}:\n{format_schedule(entries)}\n"
    return prompt

def analyze_schedules(filename, api_key):
    with open(filename, newline='') as csvfile:
        reader = csv.reader(csvfile, delimiter=',')
        next(reader)  # Skip header
        pair_schedules = {}
        for row in reader:
            pair_id = row[0]
            schedule_type = row[1]
            if pair_id not in pair_schedules:
                pair_schedules[pair_id] = {}
            if schedule_type not in pair_schedules[pair_id]:
                pair_schedules[pair_id][schedule_type] = []
            pair_schedules[pair_id][schedule_type].append(row)

    openai_wrapper = OpenAIWrapper(api_key)
    results = {}

    for pair_id, schedules in pair_schedules.items():
        prompt = create_prompt(schedules)
        response = openai_wrapper.generate_chat_response(prompt)
        results[pair_id] = response

    return results

# OpenAI API key
api_key = ''
filename = 'data/testdata/testdata_actors_equip_loc.csv'

results = analyze_schedules(filename, api_key)
for pair_id, insights in results.items():
    print(f"GPT-3's Insights for Pair ID {pair_id}: {insights}")


GPT-3's Insights for Pair ID 26: In comparing the two film production schedules provided, it is evident that Schedule Plan 2 is more efficient in terms of continuity of actors, equipment, and location proximity. 

In Schedule Plan 2, Scene 1 and Scene 2 share common actors (Hans Müller) and equipment (Microphone C, Camera D), making it logistically easier to move from one scene to the next without significant setup changes. Additionally, the location of Scene 2 (Zürich Zoo) in Schedule Plan 2 is closer to the initial location of Scene 1 (Zürich Hauptbahnhof) compared to the location of Scene 2 in Schedule Plan 1 (Zürich Opernhaus), reducing the travel time and effort required for the crew and equipment to transition between scenes.

Furthermore, the sequence of scenes in Schedule Plan 2 maintains a more logical flow in terms of actor availability and equipment usage, as it progresses from Hans Müller to Laura Becker and then to Tim Braun, ensuring a smoother transition in terms of acto

### Prompt 2

In [91]:
import openai
import csv

class OpenAIWrapper:
    def __init__(self, api_key):
        self.api_key = api_key
        openai.api_key = self.api_key

    def generate_chat_response(self, prompt):
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "Please analyze the two film production schedules below. Specifically, assess the sequence of scenes in each schedule based on the continuity of actors, equipment, and location proximity. "
                "Determine which schedule is more suitable by checking how actors, equipment, and locations appear in consecutive scenes rather than being scattered across all scenes. "
                "The schedule that minimizes the distribution of actors, equipment, and location changes between scenes is considered more efficient because it reduces the time and logistical efforts needed to change setups, move equipment, and coordinate actors. "
                "Identify which schedule achieves this efficiency better by ensuring that scenes with common actors and/or equipment follow each other consecutively and that scenes at nearby locations are also in sequence. Provide detailed reasons for your conclusion. "
                "Remember to evaluate both continuity (actors, equipment) and location proximity equally. Each criterion should be weighted equally in your analysis to determine the most efficient schedule."},
                {"role": "user", "content": prompt}
            ]
        )
        return response.choices[0].message['content']

def format_schedule(schedule_entries):
    formatted_schedule = ""
    for entry in sorted(schedule_entries, key=lambda x: x[2]):  # Sort by Order Number which is the third item in each row (zero-indexed)
        order_number, scene_number, actors, equipment, location, coordinates, duration = entry[2], entry[3], entry[4], entry[5], entry[6], entry[7], entry[8]
        formatted_schedule += f"- Order {order_number}, Scene {scene_number}: Actors: {actors}, Equipment: {equipment}, Location: {location} (Coordinates: {coordinates}), Duration: {duration}\n"
    return formatted_schedule

def create_prompt(pair_schedules):
    prompt = "Here are two different film production schedules:\n\n"
    for schedule_type, entries in pair_schedules.items():
        prompt += f"{schedule_type}:\n{format_schedule(entries)}\n"
    prompt += "\nPlease determine which schedule is more efficient based on the criteria mentioned above."
    return prompt

def analyze_schedules(filename, api_key):
    with open(filename, newline='') as csvfile:
        reader = csv.reader(csvfile, delimiter=',')
        next(reader)  # Skip header
        pair_schedules = {}
        for row in reader:
            pair_id = row[0]
            schedule_type = row[1]
            if pair_id not in pair_schedules:
                pair_schedules[pair_id] = {}
            if schedule_type not in pair_schedules[pair_id]:
                pair_schedules[pair_id][schedule_type] = []
            pair_schedules[pair_id][schedule_type].append(row)

    openai_wrapper = OpenAIWrapper(api_key)
    results = {}

    for pair_id, schedules in pair_schedules.items():
        prompt = create_prompt(schedules)
        response = openai_wrapper.generate_chat_response(prompt)
        results[pair_id] = response

    return results

# OpenAI API key
api_key = ''
filename = 'data/testdata/testdata_actors_equip_loc.csv'

results = analyze_schedules(filename, api_key)
for pair_id, insights in results.items():
    print(f"GPT-3's Insights for Pair ID {pair_id}: {insights}")


GPT-3's Insights for Pair ID 1: In analyzing the two film production schedules provided, the efficiency can be evaluated based on the continuity of actors, equipment, and location proximity.

**Schedule Plan 1 Analysis:**
- In Schedule Plan 1, the sequence of scenes is as follows: Scene 1, Scene 2, Scene 3.
- The actors are not consistent across the scenes, with different sets of actors appearing in each scene.
- The equipment is also not consistent between scenes, with a different combination of equipment used in each scene.
- The locations are different for each scene, with scenes taking place in Schlieren, Altstetten, and Zürich Hardbrücke.

**Schedule Plan 2 Analysis:**
- In Schedule Plan 2, the sequence of scenes is Scene 1, Scene 3, Scene 2.
- This schedule shows better continuity in terms of actors, as Scene 1 and Scene 3 share the same set of actors.
- There is also better equipment continuity in Schedule Plan 2, as the equipment used in Scene 1 is also used in Scene 3, reducin

### Prompt 3

In [101]:
import openai
import csv
import math

class OpenAIWrapper:
    def __init__(self, api_key):
        self.api_key = api_key
        openai.api_key = self.api_key

    def generate_chat_response(self, prompt):
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "Please analyze the two film production schedules below. Specifically, assess the sequence of scenes in each schedule based on the continuity of actors, equipment, and location proximity. "
                "For actors and equipment, determine how well the schedule groups scenes with common actors or equipment together in consecutive order. "
                "For locations, determine which schedule minimizes the total travel distance between consecutive scenes. "
                "The schedule that minimizes the distribution of actors, equipment, and location changes between scenes is considered more efficient because it reduces the time and logistical efforts needed to change setups, move equipment, and coordinate actors. "
                "Assign a score from 1 to 3 for each of the following criteria for each schedule: "
                "1. Continuity of Actors: 1 (poor) if scenes with common actors are spread out, 2 (average) if there is some grouping, 3 (excellent) if scenes with common actors are grouped together consecutively. "
                "2. Continuity of Equipment: 1 (poor) if scenes with common equipment are spread out, 2 (average) if there is some grouping, 3 (excellent) if scenes with common equipment are grouped together consecutively. "
                "3. Location Proximity: 1 (poor) if locations are far apart, 2 (average) if locations are somewhat close, 3 (excellent) if locations are very close. "
                "Provide a total score for each schedule and conclude which schedule is more efficient based on the total score."},
                {"role": "user", "content": prompt}
            ]
        )
        return response.choices[0].message['content']

def format_schedule(schedule_entries):
    formatted_schedule = ""
    for entry in sorted(schedule_entries, key=lambda x: x[2]):  # Sort by Order Number which is the third item in each row (zero-indexed)
        order_number, scene_number, actors, equipment, location, coordinates, duration = entry[2], entry[3], entry[4], entry[5], entry[6], entry[7], entry[8]
        formatted_schedule += f"- Order {order_number}, Scene {scene_number}: Actors: {actors}, Equipment: {equipment}, Location: {location} (Coordinates: {coordinates}), Duration: {duration}\n"
    return formatted_schedule

def haversine(coord1, coord2):
    # Calculate the great-circle distance between two points on the Earth's surface.
    R = 6371  # Radius of the Earth in kilometers.
    lat1, lon1 = coord1
    lat2, lon2 = coord2
    dlat = math.radians(lat2 - lat1)
    dlon = math.radians(lon2 - lon1)
    a = math.sin(dlat / 2) ** 2 + math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) * math.sin(dlon / 2) ** 2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    distance = R * c
    return distance

def calculate_total_distance(schedule):
    total_distance = 0
    previous_location = None
    for entry in schedule:
        coordinates = entry[7]
        lat, lon = map(float, coordinates.strip('()').split(','))
        if previous_location is not None:
            total_distance += haversine(previous_location, (lat, lon))
        previous_location = (lat, lon)
    return total_distance

def evaluate_schedule(schedule_entries):
    actors_seen = {}
    equipment_seen = {}
    actor_continuity = 3
    equipment_continuity = 3
    for idx, entry in enumerate(schedule_entries):
        actors = set(entry[4].split(', '))
        equipment = set(entry[5].split(', '))
        
        # Check actor continuity
        for actor in actors:
            if actor in actors_seen and actors_seen[actor] != idx - 1:
                actor_continuity = min(actor_continuity, 1)
            actors_seen[actor] = idx
        
        # Check equipment continuity
        for equip in equipment:
            if equip in equipment_seen and equipment_seen[equip] != idx - 1:
                equipment_continuity = min(equipment_continuity, 1)
            equipment_seen[equip] = idx
            
    location_proximity = 3 if calculate_total_distance(schedule_entries) < 10 else 2 if calculate_total_distance(schedule_entries) < 50 else 1
    
    return actor_continuity, equipment_continuity, location_proximity

def create_prompt(pair_schedules):
    prompt = "Here are two different film production schedules:\n\n"
    for schedule_type, entries in pair_schedules.items():
        prompt += f"{schedule_type}:\n{format_schedule(entries)}\n"
    return prompt

def analyze_schedules(filename, api_key):
    with open(filename, newline='') as csvfile:
        reader = csv.reader(csvfile, delimiter=',')
        header = next(reader, None)  # Skip header
        if header is None:
            print("Error: CSV file is empty or missing a header row.")
            return {}

        pair_schedules = {}
        for row in reader:
            if len(row) < 9:
                print(f"Error: Row has insufficient columns: {row}")
                continue

            pair_id = row[0]
            schedule_type = row[1]
            if pair_id not in pair_schedules:
                pair_schedules[pair_id] = {}
            if schedule_type not in pair_schedules[pair_id]:
                pair_schedules[pair_id][schedule_type] = []
            pair_schedules[pair_id][schedule_type].append(row)

    openai_wrapper = OpenAIWrapper(api_key)
    results = {}

    for pair_id, schedules in pair_schedules.items():
        prompt = create_prompt(schedules)
        response = openai_wrapper.generate_chat_response(prompt)
        
        plan1_actors, plan1_equipment, plan1_location = evaluate_schedule(schedules["Schedule Plan 1"])
        plan2_actors, plan2_equipment, plan2_location = evaluate_schedule(schedules["Schedule Plan 2"])
        
        total_score_plan1 = plan1_actors + plan1_equipment + plan1_location
        total_score_plan2 = plan2_actors + plan2_equipment + plan2_location
        
        result = f"Schedule Plan 1:\n1. Continuity of Actors: {plan1_actors}\n2. Continuity of Equipment: {plan1_equipment}\n3. Location Proximity: {plan1_location}\nTotal Score: {total_score_plan1}\n\n"
        result += f"Schedule Plan 2:\n1. Continuity of Actors: {plan2_actors}\n2. Continuity of Equipment: {plan2_equipment}\n3. Location Proximity: {plan2_location}\nTotal Score: {total_score_plan2}\n\n"
        result += "Conclusion: "
        
        if total_score_plan1 > total_score_plan2:
            result += "Schedule Plan 1 is more efficient."
        else:
            result += "Schedule Plan 2 is more efficient."
        
        results[pair_id] = result

    return results

# OpenAI API key
api_key = ''
filename = 'data/testdata/testdata_actors_equip_loc.csv'

results = analyze_schedules(filename, api_key)
for pair_id, insights in results.items():
    print(f"GPT-3's Insights for Pair ID {pair_id}: {insights}")


GPT-3's Insights for Pair ID 26: Schedule Plan 1:
1. Continuity of Actors: 3
2. Continuity of Equipment: 3
3. Location Proximity: 3
Total Score: 9

Schedule Plan 2:
1. Continuity of Actors: 3
2. Continuity of Equipment: 3
3. Location Proximity: 3
Total Score: 9

Conclusion: Schedule Plan 2 is more efficient.
GPT-3's Insights for Pair ID 27: Schedule Plan 1:
1. Continuity of Actors: 3
2. Continuity of Equipment: 3
3. Location Proximity: 3
Total Score: 9

Schedule Plan 2:
1. Continuity of Actors: 1
2. Continuity of Equipment: 1
3. Location Proximity: 3
Total Score: 5

Conclusion: Schedule Plan 1 is more efficient.
GPT-3's Insights for Pair ID 28: Schedule Plan 1:
1. Continuity of Actors: 3
2. Continuity of Equipment: 3
3. Location Proximity: 3
Total Score: 9

Schedule Plan 2:
1. Continuity of Actors: 1
2. Continuity of Equipment: 1
3. Location Proximity: 3
Total Score: 5

Conclusion: Schedule Plan 1 is more efficient.
GPT-3's Insights for Pair ID 29: Schedule Plan 1:
1. Continuity of Act