In [18]:
import json
import re
from api.config.db_config import get_db_connection
from typing import Tuple

from ollama import Client
from ai.models.prompt.retrieval_sdg_prompt import prompt_retreval_sdg
client = Client(host="http://127.0.0.1:11434")

conn = get_db_connection()

def get_abs(patent_number):
    # Fetch the patent data from the database
    fetch_patent_query = """
    SELECT en_abstract
    FROM patent
    WHERE patent.number = %s
    """
    cursor = conn.cursor()
    cursor.execute(fetch_patent_query, (patent_number,))
    row = cursor.fetchone()
    cursor.close()
    return row[0]


def get_description(patent_number):
    descriptions = []
    fetch_description_query = """
    SELECT description_number, description_text
    FROM patent_description
    WHERE patent_number = %s;
    """
    cursor = conn.cursor()
    cursor.execute(fetch_description_query, (patent_number,))
    fetchall = cursor.fetchall()
    for description in fetchall:
        descriptions.append({
            "description_number": description[0],
            "description_text": description[1]
        })
    cursor.close()
    return descriptions

In [19]:
def extract_sdgs(text: str) -> list[str]:
        """
        Extracts and standardizes SDG references from a given text.

        This method identifies SDG mentions in various formats, including:
        - "SDG" followed by a number (e.g., "SDG1", "SDG 2").
        - Numbers with sub-targets (e.g., "16.1", "3.4"), where the main number
          is extracted.
        - Standalone numbers (1-17) that appear at the beginning of the text
          or are preceded by common delimiters (commas, semicolons, colons, whitespace)
          and followed by delimiters or the end of the string.
        The matching is case-insensitive.

        Args:
            text (str): The input text to scan for SDG references.

        Returns:
            List[str]: A list of unique SDGs found, formatted as "SDG<number>"
                (e.g., ["SDG1", "SDG2"]), sorted numerically. Returns ["None"]
                if no valid SDGs (1-17) are found or if the input text is empty
                or not a string.
        """
        if not text or not isinstance(text, str):
            # Modified to return ["None"] as per original logic for empty/invalid text
            return ["None"]

        sdg_numbers = set()  # Use set to avoid duplicates

        # Pattern 1: SDG followed by number with optional sub-target
        # Captures: SDG1, sdg 2, SDG13.4, etc.
        sdg_pattern = r'(?i)\bsdg\s*(\d{1,2})(?:\.\d+)?\b'
        sdg_matches = re.findall(sdg_pattern, text)
        for match in sdg_matches:
            number = int(match)
            if 1 <= number <= 17:
                sdg_numbers.add(number)

        # Pattern 2: Number with sub-target (e.g., "16.1", "3.4")
        # Look for patterns like X.Y where X is 1-17
        number_with_sub_pattern = r'\b(\d{1,2})\.\d+\b'
        sub_matches = re.findall(number_with_sub_pattern, text)
        for match in sub_matches:
            number = int(match)
            if 1 <= number <= 17:
                sdg_numbers.add(number)

        # Pattern 3: Standalone numbers at beginning or after delimiters
        standalone_pattern = r'(?:^|[,;:]\s*|(?<=\s))(\d{1,2})(?=\s*[,;]|\s*$|\s+)'
        standalone_matches = re.findall(standalone_pattern, text.strip())
        for match in standalone_matches:
            number = int(match)
            if 1 <= number <= 17:
                sdg_numbers.add(number)

        # Convert to sorted list of formatted strings
        result = [f"SDG{num}" for num in sorted(sdg_numbers)]

        return ["None"] if not result else result

In [20]:
def get_retrival_explaination(text: str) -> Tuple[str, str]:

    retrival_match = re.search(r'<retrival>(.*?)</retrival>', text, re.DOTALL)
    explaination_match = re.search(r'<explaination>(.*?)</explaination>', text, re.DOTALL)

    retrival_content_regex = ""
    explaination_content_regex = ""

    if retrival_match:
        retrival_content_regex = retrival_match.group(1).strip()

    if explaination_match:
        explaination_content_regex = explaination_match.group(1).strip()
    return retrival_content_regex, explaination_content_regex

In [None]:
with open("../src/ai/testsets/dataset_labeled_MAGB.jsonl", "r", encoding="utf-8") as f:
    for line in f:
        iteam = json.loads(line)
        for key, value in iteam.items():
            if key != "reason":
                abs = get_abs(key)
                sdgs = extract_sdgs(value)
                descriptions = get_description(key)
                patent_text = f"{abs}\n"
                for description in descriptions:
                    patent_text += f"{description["description_number"]}: {description["description_text"]}\n"
            else:
                reason = value
            
        patent_text = " ".join(patent_text.split()[:3000])
        print(patent_text)
        for sdg in sdgs:
            formatted_prompt = prompt_retreval_sdg(patent_text,sdg)
            response_data = client.generate(
                model="qwen3:8b", # Make sure this model is available in your Ollama instance
                prompt=formatted_prompt,
                options={"temperature": 0.2}
            )
            raw_output_text = response_data.get('response', '').strip()
            retrival, explaination = get_retrival_explaination(raw_output_text)
            print(retrival)
            print("---------------------------------------------")
            print(explaination)
            print("=============================================")
        


Systems and methods for using drones in dispersed welding applications are disclosed. In some examples, drones may be used in large and/or dispersed welding environments to quickly navigate the large distances and/or reach areas that might be more difficult for a person to reach. In some examples, the drones may use one or more attached devices to locate, identify, and/or collect information from welding equipment, welding workpieces, and/or welds within a (e.g., large and/or dispersed) welding environment. 1: This application claims priority to, and the benefit of, U. S. Provisional Application No. 62/893,934 , titled "SYSTEMS AND METHODS FOR USING DRONES IN DISPERSED WELDING ENVIRONMENTS," filed August 30, 2019, the entirety of which is hereby incorporated by references. 2: The present disclosure generally relates to systems and methods for using drones and, more particularly, to systems and methods for using drones in dispersed welding environments. 3: Welding assets may be employed

KeyboardInterrupt: 

In [None]:
# Ouvrir le fichier en mode écriture ('w')
# Il est bon d'utiliser encoding='utf-8' pour les fichiers JSON.
with open("../src/ai/testsets/dataset_labeled_MAGB_v1.jsonl", "w", encoding="utf-8") as f_out:
    with open("../src/ai/testsets/dataset_MAGB.jsonl", "r", encoding="utf-8") as f:
        for line in f:
            item = json.loads(line)
            try:
                for key, value in item.items():
                    
                    break
                sdg, reason = get_sdg_reason(raw_output_text)
                output_data = {key: sdg, "reason": reason}
                f_out.write(json.dumps(output_data, ensure_ascii=False) + "\n")
                f_out.flush()

            except:
                print(f"Erreur : Pour {key}, {value} n'a pas été trouvé.")
