In [7]:
import json
import requests
import tkinter as tk
from tkinter import filedialog

def get_url_from_api(custom_id):
    # Extract the five-digit code
    modified_custom_id = '-'.join(custom_id.split('-')[:2])
    
    # Build the API URL
    api_url = f"https://data.kb.se/lookup/?id={modified_custom_id}"
    
    try:
        response = requests.get(api_url, headers={'accept': 'application/json'})
        response.raise_for_status()  # Raise an error for bad status codes
        data = response.json()
        return data.get('uri')
    except requests.RequestException as e:
        print(f"Failed to fetch URL for custom_id {custom_id}: {e}")
        return None

def jsonl_to_json():
    root = tk.Tk()
    root.withdraw()
    file_path = filedialog.askopenfilename(title="Select JSONL file", filetypes=[("JSON Lines files", "*.jsonl")])
    if not file_path:
        print("No file selected.")
        return

    extracted_content = []
    with open(file_path, 'r') as f:
        for line in f:
            json_obj = json.loads(line)
            try:
                custom_id = json_obj.get('custom_id')
                url = get_url_from_api(custom_id) if custom_id else None
                choices = json_obj['response']['body']['choices']
                for choice in choices:
                    if choice['message']['role'] == 'assistant':
                        try:
                            assistant_content = json.loads(choice['message']['content'])
                            if isinstance(assistant_content, dict):
                                # Process as a dictionary
                                assistant_content['custom_id'] = custom_id
                                assistant_content['url'] = url
                                extracted_content.append(assistant_content)
                            elif isinstance(assistant_content, list):
                                # Process each item in the list if it's a list of dictionaries
                                for item in assistant_content:
                                    if isinstance(item, dict):
                                        item['custom_id'] = custom_id
                                        item['url'] = url
                                    extracted_content.append(item)
                        except json.JSONDecodeError:
                            print(f"Failed to decode JSON content: {choice['message']['content']}")
            except KeyError:
                continue

    output_file_path = filedialog.asksaveasfilename(title="Save JSON file", filetypes=[("JSON files", "*.json")], defaultextension=".json")
    if not output_file_path:
        print("No output file selected.")
        return

    with open(output_file_path, 'w') as f:
        json.dump(extracted_content, f, indent=4)
    print("JSON file successfully saved.")

if __name__ == "__main__":
    jsonl_to_json()


Failed to decode JSON content: {
  "konsert_datum": “25.02.08",
  "konsert_namn": "Berlioz Faust",
  "lokal_namn": "Kungliga musikaliska akademiens stora sal",
  "konsert_biljettpris": "3:50kr, 2:60kr",
  "konserttyp_namn": "Orkesterkonsert",
  "Producer": "Nya Filharmoniska Sällskapet"
}
Failed to decode JSON content: {
  "konsert_datum": "22.02.08",
  "konsert_namn": "Vis- och Sagoafton",
  "lokal_namn": "Vetenskapsakademiens hörsal",
  "konsert_biljettpris": "0.50kr, 0.75kr, 1kr",
  "konserttyp_namn": "Vis- och Sagoafton",
  "Producer": "Sjögren, Ebba"
}

{
  "konsert_datum": "24.02.08",
  "konsert_namn": "Aurora IV Solanders Konsert",
  "lokal_namn": "Vetenskaps-Akademiens Hörsal",
  "konsert_biljettpris": "2.60kr, 2kr, 1.50kr",
  "konserttyp_namn": "Solokonsert",
  "Producer": "Norrie, Anna"
}
Failed to decode JSON content: {
  "konsert_datum": "28.01.08",
  "konsert_namn": "Brussel-Kvartettens Beethoven-Aftnar",
  "lokal_namn": "Kungliga musikaliska akademiens stora sal",
  "kons