In [109]:
from tqdm.notebook import tqdm
from api.config.db_config import get_db_connection
import random
import json
import re

from ollama import Client

client = Client(host="http://127.0.0.1:11434")
with open("sdg_label_prompt.md", "r") as f:
    prompt_template = f.read()
        
conn = get_db_connection()

def get_all_patents_number():
    # Fetch the patent data from the database
    numbers= []
    fetch_patent_query = """
    SELECT patent.number
    FROM patent
    """

    cursor = conn.cursor()
    cursor.execute(fetch_patent_query)
    for number in cursor.fetchall():
        numbers.append(number[0])
    cursor.close()
    return numbers


def get_description(patent_number):
    descriptions = []
    fetch_description_query = """
    SELECT description_number, description_text, patent_number, sdg
    FROM patent_description
    WHERE patent_number = %s and description_number < 15 and LENGTH(description_text) - LENGTH(REPLACE(description_text, ' ', '')) + 1 > 20;
    """
    cursor = conn.cursor()
    cursor.execute(fetch_description_query, (patent_number,))
    fetchall = cursor.fetchall()
    for description in fetchall:
        descriptions.append({
            "description_number": description[0],
            "description_text": description[1],
            "patent_number": description[2],
            "sdg": description[3]
        })
    cursor.close()
    return descriptions

In [None]:
numbers = get_all_patents_number()
all_patent_number = random.sample(numbers, 30)

In [None]:

list_patent = []  # Initialize as a flat list
seen_description_texts_globally = set() # To track unique description texts globally

for actual_patent_number in tqdm(all_patent_number):

    descriptions_from_db = get_description(actual_patent_number)
    
    for desc_data in descriptions_from_db:
        # desc_data is a dictionary like:
        # {
        #     "description_number": ...,
        #     "description_text": ...,
        #     "patent_number": ...,  (this will be actual_patent_number)
        #     "sdg": ...
        # }
        
        description_text = desc_data["description_text"]
        

        # Check if this description_text has already been added globally
        if description_text not in seen_description_texts_globally:

            formatted_prompt = prompt_template.replace("{description}", description_text)

            # Generate the SDG classification
            response_data = client.generate(
                model="qwen3:8b", # Make sure this model is available in your Ollama instance
                prompt=formatted_prompt
                # You might want to add other parameters like 'stream=False'
                # or options within a dictionary if your client version supports it.
                # Example: options={"temperature": 0.7}
            )
            # Get the raw text output from the model's response
            # The actual key for the response text is usually 'response'
            raw_output_text = response_data.get('response', '').strip()
            
            # 1. Try to find content within <answer>...</answer> tags (case-insensitive for tags)
            answer_match = re.search(r"<answer>(.*?)</answer>", raw_output_text, re.DOTALL | re.IGNORECASE)
            
            list_patent.append({
                "patent_number": desc_data["patent_number"], # This is actual_patent_number
                "sdg": answer_match.group(1),
                "description_number": desc_data["description_number"],
                "description_text": description_text # Store the text that was classified
            })
            
            seen_description_texts_globally.add(description_text)

  0%|          | 0/1 [00:00<?, ?it/s]

2025-05-21 22:30:10,473 - httpx - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"



SDG 12: Responsible Consumption and Production

The text discusses the shift from traditional carton-type packaging to a soft pack-type film packaging bag for hygienic thin paper. This change is motivated by factors such as environmental load, disposal, transportation, storage, and cost efficiency. These considerations directly relate to the goal of promoting sustainable consumption and production patterns, which is the core objective of SDG 12. The emphasis on reducing environmental impact through packaging innovation aligns with the principles of responsible consumption and production, making SDG 12 the most relevant classification for this text.



2025-05-21 22:30:24,348 - httpx - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"



nothing



2025-05-21 22:30:45,978 - httpx - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"



The text describes an innovative packaging design for sheets, featuring a retrieval opening created by perforations and a tab. This invention is primarily focused on improving the functionality and usability of packaging, which relates to industrial innovation and sustainable manufacturing practices. 

**Reasoning:**
- **SDG 9 (Industry, Innovation and Infrastructure):** The invention represents a new design in packaging technology, which falls under the scope of industrial innovation. It involves creating a more efficient and user-friendly packaging solution, which aligns with the goal of fostering innovation and sustainable industrial practices.
- **SDG 12 (Responsible Consumption and Production):** While not explicitly stated, the design could contribute to more sustainable consumption by improving the usability of packaging, potentially reducing waste or enhancing product accessibility, which is part of responsible consumption and production patterns.

However, since the text does

2025-05-21 22:31:00,773 - httpx - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"



The text describes an invention related to a sheet package that facilitates the formation of a retrieval opening. While the text does not explicitly mention any of the Sustainable Development Goals (SDGs), it appears to be focused on industrial innovation and product design. 

The most relevant SDG is **SDG 9: Industry, Innovation and Infrastructure**, as the invention pertains to the development of a new packaging solution, which aligns with fostering innovation and sustainable industrial practices. The text does not provide enough information to confidently associate it with other SDGs such as health, education, or environmental sustainability.

Therefore, the most relevant SDG for this text is **SDG 9**.



2025-05-21 22:31:16,153 - httpx - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"



nothing



2025-05-21 22:31:24,629 - httpx - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"


nothing


2025-05-21 22:31:31,985 - httpx - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"



nothing



2025-05-21 22:31:56,524 - httpx - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"



The text provided describes a series of technical drawings illustrating the structure and components of a sheet package, including retrieval openings and tabs. These figures are focused on the mechanical and design aspects of the packaging, such as its construction, views, and functional elements. However, there is no mention of sustainability, environmental impact, economic growth, social equity, or any other themes directly aligned with the Sustainable Development Goals (SDGs).

The content is purely technical and does not reference any of the SDGs' objectives, such as poverty reduction, clean energy, responsible consumption, or climate action. While packaging design can theoretically contribute to certain SDGs (e.g., SDG 12 for sustainable production), the text does not provide enough context to make such a connection. Therefore, the text is not related to any of the 17 SDGs.

The most relevant classification is: **nothing**.



2025-05-21 22:32:10,263 - httpx - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"



nothing



2025-05-21 22:32:39,518 - httpx - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"



The text describes a packaging design that allows for efficient retrieval of individual sheets from a stack, focusing on the structural and functional aspects of the packaging bag. While the text does not explicitly mention sustainability, environmental impact, or social equity, it does involve **innovation in industrial design** and **product functionality**.

The most relevant SDG is **SDG 9: Industry, Innovation and Infrastructure**, as the text discusses the development of a packaging system that likely represents an innovative approach to product packaging. This innovation contributes to **resilient infrastructure** and **sustainable industrialization**, which are key components of SDG 9.

Other SDGs such as SDG 12 (Responsible Consumption and Production) could be loosely associated if the packaging design aims to reduce waste or improve resource efficiency, but the text does not provide enough information to confirm this. Therefore, **SDG 9 is the most directly relevant** based 

2025-05-21 22:32:50,805 - httpx - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"



nothing



2025-05-21 22:32:58,684 - httpx - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"



nothing



In [112]:
with open("patents_output.json", 'w', encoding='utf-8') as json_file:
    json.dump(list_patent, json_file, ensure_ascii=False, indent=4)