In [53]:
from abc import ABC, abstractmethod
from datetime import datetime
import requests
import json
from pydifact.segmentcollection import Interchange
from pydifact.segments import Segment
from pydifact.control.characters import Characters
from datetime import datetime
from enum import Enum
from typing import List, Literal, Optional, Union

import fitz
import instructor
from openai import AsyncOpenAI, OpenAI
from pydantic import BaseModel, Field
from dotenv import load_dotenv

load_dotenv()

True

In [56]:
client = instructor.patch(OpenAI(), mode=instructor.Mode.JSON)

In [57]:
# Load the structured examples from the JSON file
with open('./data/structured_examples_cleaned.json', 'r') as file:
    structured_examples = json.load(file)

print(f"Loaded {len(structured_examples)} structured examples.")


Loaded 5 structured examples.


In [58]:
def parse_edifact_message(edifact_message):
    try:
        interchange = Interchange.from_str(edifact_message)
        interchange.validate()
        
        message_dict = {}

        # Iterate through messages and segments, populating the dictionary
        for message in interchange.get_messages():
            message_header = message.get_header_segment()

            message_dict[message_header.tag] = message_header.elements
            for segment in message.segments:
                tag = segment.tag
                if tag not in message_dict:
                    message_dict[tag] = []
                message_dict[tag].extend(segment.elements)

            message_footer = message.get_footer_segment()

            if message_footer.tag not in message_dict:
                message_dict[message_footer.tag] = []
            message_dict[message_footer.tag].extend(message_footer.elements)

        return message_dict
    except Exception as e:
        return f"Interchange validation error: {e}"

In [59]:
print(structured_examples[0])

{'edifact_input': "UNB+IATA:1+W1+ON+240703:1456+0001'\nUNH+1+DCQSMF:03:1:IA'\nLOR+W1:INK'\nFDQ+IN+3202+240703+BCN+MAD++IN+3201+2407030900+2407031100+INK+BCN'\nUNT+4+1'\nUNZ+1+0001'\n", 'ink_api_url': 'https://api-staging.inkcloud.io/api/1.8/seat_plan/get_seat_plan?station_iata=BCN&flight_number=IN3202&departure_date=2024-07-03', 'api_response': {'carrier_name': 'Ink Aviation', 'carrier_code': 'IN', 'airplane_model': 'A320', 'cabin_configuration': 'Y180', 'seat_plan_class': [{'name': 'Economy', 'seat_plan_row': [{'row': 1, 'seat_plan_position': [{'seat_number': '1A', 'exit_seat': False, 'type': 'seat'}, {'seat_number': '1B', 'exit_seat': False, 'type': 'seat'}, {'seat_number': '1C', 'exit_seat': False, 'type': 'seat'}, {'seat_number': 'Ai', 'exit_seat': False, 'type': 'aisle'}, {'seat_number': '1D', 'exit_seat': False, 'type': 'seat'}, {'seat_number': '1E', 'exit_seat': False, 'type': 'seat'}, {'seat_number': '1F', 'exit_seat': False, 'type': 'seat'}]}, {'row': 2, 'seat_plan_position': 

In [60]:
parse_edifact_message("UNB+IATA:1+W1+ON+240703:1456+0001'\nUNH+1+DCQSMF:03:1:IA'\nLOR+W1:INK'\nFDQ+IN+3202+240703+BCN+MAD++IN+3201+2407030900+2407031100+INK+BCN'\nUNT+4+1'\nUNZ+1+0001'\n")

{'UNH': ['1', ['DCQSMF', '03', '1', 'IA']],
 'LOR': [['W1', 'INK']],
 'FDQ': ['IN',
  '3202',
  '240703',
  'BCN',
  'MAD',
  '',
  'IN',
  '3201',
  '2407030900',
  '2407031100',
  'INK',
  'BCN'],
 'UNT': ['4', '1']}

In [61]:
test_example = structured_examples[0]
for key in test_example.keys():
    print(key)

edifact_input
ink_api_url
api_response
edifact_output


In [62]:
class EdifactEncoder(BaseModel):
    encoding_function: str = Field(description="Python code for the encoder function")

In [63]:
SYSTEM_PROMPT = f""" # Purpose
You are an expert programmer specializing in python and edifact

# Instructions
- Based On:
    - An input edifact message
    - API Response
    - Proper Output Edifact Response
- Write code to format the edifact response based on inputs

# Rules
- The code must be adaptive and work for ALL possible inputs
- Only hardcode values that 100% NEED to be hardcoded
    - Always pass values where applicable

# Input
- Input Edifact Message
- API Response
- Proper Output Edifact Response

# Output
- Encoding Function
"""

In [64]:
def format_examples(examples):
    out_string = "# Examples\n"
    for i, example in enumerate(examples):
        out_string += f"## Example {i}\n"
        out_string += f"### Input Edifact Message:\n{example['edifact_input']}\n"
        out_string += f"### API Response:\n{example['api_response']}\n"
        out_string += f"### Proper Output Edifact Response:\n{example['edifact_output']}\n"
    return out_string

In [65]:
print(format_examples(structured_examples))

# Examples
## Example 0
### Input Edifact Message:
UNB+IATA:1+W1+ON+240703:1456+0001'
UNH+1+DCQSMF:03:1:IA'
LOR+W1:INK'
FDQ+IN+3202+240703+BCN+MAD++IN+3201+2407030900+2407031100+INK+BCN'
UNT+4+1'
UNZ+1+0001'

### API Response:
{'carrier_name': 'Ink Aviation', 'carrier_code': 'IN', 'airplane_model': 'A320', 'cabin_configuration': 'Y180', 'seat_plan_class': [{'name': 'Economy', 'seat_plan_row': [{'row': 1, 'seat_plan_position': [{'seat_number': '1A', 'exit_seat': False, 'type': 'seat'}, {'seat_number': '1B', 'exit_seat': False, 'type': 'seat'}, {'seat_number': '1C', 'exit_seat': False, 'type': 'seat'}, {'seat_number': 'Ai', 'exit_seat': False, 'type': 'aisle'}, {'seat_number': '1D', 'exit_seat': False, 'type': 'seat'}, {'seat_number': '1E', 'exit_seat': False, 'type': 'seat'}, {'seat_number': '1F', 'exit_seat': False, 'type': 'seat'}]}, {'row': 2, 'seat_plan_position': [{'seat_number': '2A', 'exit_seat': False, 'type': 'seat'}, {'seat_number': '2B', 'exit_seat': False, 'type': 'seat'}, {

In [80]:
def create_encoding_function(input_edifact_message, api_response, proper_output_edifact_response):
    edifact_encoder_response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
                {"role": "system", "content": SYSTEM_PROMPT},
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": (
                                "Please create the encoding function for the following: \n" + 
                                format_examples(structured_examples)
                            ),
                        },
                    ],
                },
            ],
            temperature=0.0,
            response_model=EdifactEncoder,
            max_tokens=4096,
        )
    return edifact_encoder_response

In [81]:
test_encoding_function = create_encoding_function(
    test_example['edifact_input'],
    test_example['api_response'],
    test_example['edifact_output']
)
print(test_encoding_function)

encoding_function='def encode_edifact(input_message, api_response):\n    def format_date(date_str):\n        return date_str[:6] + \'1300\' if len(date_str) == 6 else date_str\n\n    def format_seat(seat):\n        return f\'{seat["seat_number"]}:{"E" if seat["exit_seat"] else "N"}\'\n\n    def format_row(row):\n        return \'+\'.join([format_seat(seat) for seat in row["seat_plan_position"]])\n\n    def format_class(seat_class):\n        return f\'CBD+Y+1:{len(seat_class["seat_plan_row"])}+++++A:N+B:N+C:N+D:N+E:N+F:N\'\n\n    def format_rod(seat_class):\n        return [f\'ROD+{row["row"]}++{format_row(row)}\' for row in seat_class["seat_plan_row"]]\n\n    segments = input_message.split("\'")\n    output_segments = []\n\n    for segment in segments:\n        if segment.startswith(\'UNB\'):\n            output_segments.append(segment)\n        elif segment.startswith(\'UNH\'):\n            output_segments.append(segment)\n        elif segment.startswith(\'LOR\'):\n            output_

In [82]:
print(test_encoding_function.encoding_function)

def encode_edifact(input_message, api_response):
    def format_date(date_str):
        return date_str[:6] + '1300' if len(date_str) == 6 else date_str

    def format_seat(seat):
        return f'{seat["seat_number"]}:{"E" if seat["exit_seat"] else "N"}'

    def format_row(row):
        return '+'.join([format_seat(seat) for seat in row["seat_plan_position"]])

    def format_class(seat_class):
        return f'CBD+Y+1:{len(seat_class["seat_plan_row"])}+++++A:N+B:N+C:N+D:N+E:N+F:N'

    def format_rod(seat_class):
        return [f'ROD+{row["row"]}++{format_row(row)}' for row in seat_class["seat_plan_row"]]

    segments = input_message.split("'")
    output_segments = []

    for segment in segments:
        if segment.startswith('UNB'):
            output_segments.append(segment)
        elif segment.startswith('UNH'):
            output_segments.append(segment)
        elif segment.startswith('LOR'):
            output_segments.append(segment)
        elif segment.startswith('FDQ

In [83]:
def encode_edifact(input_message, api_response):
    def format_date(date_str):
        return date_str[:6] + '1300' if len(date_str) == 6 else date_str

    def format_seat(seat):
        return f'{seat["seat_number"]}:{"E" if seat["exit_seat"] else "N"}'

    def format_row(row):
        return '+'.join([format_seat(seat) for seat in row["seat_plan_position"]])

    def format_class(seat_class):
        return f'CBD+Y+1:{len(seat_class["seat_plan_row"])}+++++A:N+B:N+C:N+D:N+E:N+F:N'

    def format_rod(seat_class):
        return [f'ROD+{row["row"]}++{format_row(row)}' for row in seat_class["seat_plan_row"]]

    segments = input_message.split("'")
    output_segments = []

    for segment in segments:
        if segment.startswith('UNB'):
            output_segments.append(segment)
        elif segment.startswith('UNH'):
            output_segments.append(segment)
        elif segment.startswith('LOR'):
            output_segments.append(segment)
        elif segment.startswith('FDQ'):
            parts = segment.split('+')
            parts[4] = format_date(parts[4])
            output_segments.append('+'.join(parts))
        elif segment.startswith('UNT'):
            output_segments.append(segment)
        elif segment.startswith('UNZ'):
            output_segments.append(segment)

    if 'error' in api_response:
        output_segments.append(f'ERD+1:5')
    else:
        output_segments.append(f'FDR+{api_response["carrier_code"]}+{segments[3].split("+")[2]}+{segments[3].split("+")[4]}+{segments[3].split("+")[5]}+{segments[3].split("+")[6]}')
        output_segments.append('RAD+S+O')
        output_segments.append(f'EQD+Y:{api_response["cabin_configuration"].split()[1]}+++++{api_response["airplane_model"]}')
        output_segments.append(format_class(api_response["seat_plan_class"][0]))
        output_segments.extend(format_rod(api_response["seat_plan_class"][0]))

    output_segments.append(f'UNT+{len(output_segments) - 1}+1')
    output_segments.append('UNZ+1+0001')

    return "'".join(output_segments) + "'"

In [84]:
for example in structured_examples:
    encoded_output = encode_edifact(example['edifact_input'], example['api_response'])
    expected_output = example['edifact_output']
    
    if encoded_output == expected_output:
        print("Input and output match.")
    else:
        print("Input and output do not match.")
        print("Encoded output:")
        print(encoded_output)
        print("Expected output:")
        print(expected_output)
    
    print("\n\n")

IndexError: list index out of range

In [21]:
def encode_edifact2(input_message, api_response):
    # Parse the input message
    segments = input_message.strip().split("'\n")
    unb_segment = segments[0]
    unz_segment = segments[-1]
    # Extract relevant data from the API response
    carrier_name = api_response.get('carrier_name', 'Unknown Carrier')
    carrier_code = api_response.get('carrier_code', 'Unknown Code')
    airplane_model = api_response.get('airplane_model', 'Unknown Model')
    cabin_configuration = api_response.get('cabin_configuration', 'Unknown Configuration')
    seat_plan_class = api_response.get('seat_plan_class', [])

    # Prepare the output segments
    output_segments = []
    output_segments.append(unb_segment)
    output_segments.append("UNH+1+DCRSMF:03:1:IA'")

    # Add flight details
    if 'LOR' in segments[1]:
        flight_info = segments[1].split('+')
        output_segments.append(f"FDR+{carrier_code}+{flight_info[1]}+{flight_info[2]}+{flight_info[3]}+{flight_info[4]}+{flight_info[5]}+")
    else:
        output_segments.append(f"FDR+{carrier_code}+{flight_info[1]}+{flight_info[2]}+{flight_info[3]}+")

    output_segments.append("RAD+S+O'")
    output_segments.append(unz_segment)

    # Join all segments into a single output message
    return "'\n".join(output_segments)

In [22]:
for example in structured_examples:
    encoded_output = encode_edifact2(example['edifact_input'], example['api_response'])
    expected_output = example['edifact_output']
    
    if encoded_output == expected_output:
        print("Input and output match.")
    else:
        print("Input and output do not match.")
        print("Encoded output:")
        print(encoded_output)
        print("Expected output:")
        print(expected_output)
    
    print("\n\n")

UnboundLocalError: cannot access local variable 'flight_info' where it is not associated with a value