In [None]:
import requests
import os
import json
import pandas as pd
import re
import import_ipynb

In [4]:
try:
    from dotenv import load_dotenv

    load_dotenv(".env")
except ImportError:
    print("dotenv not installed, skipping...")

In [None]:
def create_news(df, limit=20, output_dir="./news/"):
    news_list = []
    for idx, news in enumerate(df):
        filename = f"news{str(idx + 1).zfill(3)}.txt"
        file_path = os.path.join(output_dir, filename)
        content = news

        with open(file_path, 'w+') as file:
            file.write(content)

        news_list.append(filename)
        
        if idx == limit - 1:
            break
    return news_list

In [None]:
def chat(query, URL, TOKEN, MODEL):
    url = f'{URL}/chat/completions'
    headers = {
        'Authorization': f'Bearer {TOKEN}',
        'Content-Type': 'application/json'
    }
        
    payload = {
        'model': MODEL,
        'messages': [{'role': 'user', 'content': query}],
        'stream' : False,
    }

    try:
        response = requests.post(url, headers=headers, json=payload)
        response.raise_for_status()
        return json.loads(response.text)
    except requests.exceptions.RequestException as e:
        return {'error': f"{str(e)} {response}"}

In [None]:
def extract_turtle(output):
    cleaned_text = re.search(r'```(?:ttl|turtle)(.*?)```', output, flags=re.DOTALL)
    if cleaned_text is not None:
        return cleaned_text.group(1).strip()
    else:
        return None 

In [None]:
def create_newsdict(news_list, news_path="./news/"):
    news_dict = {} 
    
    for news in news_list:
        try:
            with open(news_path + news, "r") as f:
                content = f.read()  
            news_dict[news] = content  
        except FileNotFoundError:
            print(f"Warning: File {news} not found in path {news_path}. Skipping.")
        except Exception as e:
            print(f"Error reading {news}: {e}")
    
    return news_dict

In [None]:
def save_ontologies(filename, content, output_dir="./news_onto"):
    os.makedirs(output_dir, exist_ok=True)

    new_filename = filename.replace(".txt", ".ttl")
    file_path = os.path.join(output_dir, new_filename)
        
    with open(file_path, "w+", encoding="utf-8") as file:
        file.write(content)
        
    print(f"Saved: {file_path}")

In [None]:
def generate_onto(URL, TOKEN, MODEL, initial_prompt_text, prompt_text, news_dict, initial_ontology=None):
    if not (initial_prompt_text and prompt_text and news_dict):
         return {}

    ontoDict = {}       # {'newsXYZ' : 'RDF_ONTOLOGY'}
    responseList = []   # Saving response for Debugging
    promptList = []     # Saving prompt for Debugging
    first_iteration = True # Flag to trigger initial prompt
    prompt = None

    for news, news_content in news_dict.items():
        if first_iteration:
            # First Prompt Construction
            if initial_ontology:
                # Initial prompt w/ initial ontology
                prompt = f"""
{prompt_text}\n
{news_content}\n
Here is the ontology:\n
```ttl
{initial_ontology}
```
"""
            else:
                # Initial prompt w/o initial ontology
                prompt = initial_prompt_text + news_content
            first_iteration = False

        else:
            prompt = f"""
{prompt_text}\n
{news_content}\n
Here is the ontology:\n
```ttl
{previous_ttl_content}
```
"""

        # Send Prompt to LLM
        response = chat(query=prompt, URL, TOKEN, MODEL)
        
        # Debug variables
        promptList.append(prompt)
        responseList.append(response)

        # Get the generated ontology
        ttl_content = extract_turtle(response['choices'][0]['message']['content'])
        if ttl_content:  
            ontoDict[news] = ttl_content  
            save_ontologies(news, ttl_content)
            previous_ttl_content = ttl_content

        else:
            print(f"Warning: No ontology extracted for {news}")

    return ontoDict, responseList, promptList