In [2]:
from docx import Document

def extract_headers(doc_path):
    document = Document(doc_path)
    headers = []
    
    # Access the sections of the document
    for section in document.sections:
        header = section.header
        if header:
            # Extract header text
            for paragraph in header.paragraphs:
                if paragraph.text.strip():  # Ignore empty headers
                    headers.append(paragraph.text.strip())
    return headers

# Specify the path to your Word document
doc_path = r"C:\Users\LENOVO\Downloads\0001279569-25-000038-xbrl\ex991_word_file.docx"
headers = extract_headers(doc_path)

# Print extracted headers
print("Extracted Headers:")
for header in headers:
    print(header)


Extracted Headers:


In [7]:
from docx import Document

def extract_all_headers(doc_path):
    document = Document(doc_path)
    headers = []

    # Extract headers from document sections
    for section in document.sections:
        header = section.header
        if header:
            for paragraph in header.paragraphs:
                if paragraph.text.strip():  # Only include non-empty headers
                    headers.append(paragraph.text.strip())

    # Optionally, include styled headings from the main document body
    for paragraph in document.paragraphs:
        if paragraph.style.name.startswith("Heading"):
            headers.append(paragraph.text.strip())
    
    return headers

# Specify the path to your Word document
doc_path = r"C:\Users\LENOVO\Downloads\0001279569-25-000038-xbrl\ex991_word_file.docx"
headers = extract_all_headers(doc_path)

# Display extracted headers
print("Extracted Headers and Sub-headers:")
for header in headers:
    print(header)


Extracted Headers and Sub-headers:


In [9]:
from docx import Document

def extract_all_text(doc_path):
    document = Document(doc_path)
    all_text = []

    # Extract text from header sections
    for section in document.sections:
        header = section.header
        for paragraph in header.paragraphs:
            if paragraph.text.strip():
                all_text.append(("Header", paragraph.text.strip()))

    # Extract styled text from the main body
    for paragraph in document.paragraphs:
        if paragraph.style.name.startswith("Heading"):
            all_text.append(("Heading", paragraph.text.strip()))
        elif paragraph.text.strip():  # Include non-empty paragraphs
            all_text.append(("Body", paragraph.text.strip()))
    
    # # Extract text from tables
    # for table in document.tables:
    #     for row in table.rows:
    #         for cell in row.cells:
    #             cell_text = cell.text.strip()
    #             if cell_text:
    #                 all_text.append(("Table", cell_text))

    return all_text

# Specify the path to your Word document
doc_path = r"C:\Users\LENOVO\Downloads\0001279569-25-000038-xbrl\ex991_word_file.docx"
all_text = extract_all_text(doc_path)

# Display all extracted content with types
print("Extracted Content:")
for content_type, text in all_text:
    print(f"{content_type}: {text}")


Extracted Content:
Body: Exhibit 99.1
Body: Trilogy Metals Announces Positive Study Results for the Bornite Copper Project Located in Alaska, USA
Body: VANCOUVER, BC, Jan. 15, 2025 /CNW/ - Trilogy Metals Inc. (TSX: TMQ) (NYSE American: TMQ) ("Trilogy Metals" or the "Company") is pleased to announce the positive results of its Preliminary Economic Assessment Study ("Bornite PEA") for the Bornite copper project in the Ambler Mining District of Northwestern Alaska (the "Bornite Project"). The Bornite Project is held by Ambler Metals LLC ("Ambler Metals"), the joint venture operating company equally owned by Trilogy Metals and South32 Limited ("South32"). The Bornite PEA was prepared on a 100% ownership basis, of which Trilogy Metals' share is 50%. All amounts are in U.S. dollars unless otherwise stated.
Body: Trilogy Metals will host a conference call on January 15, 2025 at 1:00pm Pacific Time or 4:00pm Eastern Time to discuss these results.
Body: Please use this link to access the live w

In [10]:
from docx import Document

def extract_headers_from_body(doc_path):
    document = Document(doc_path)
    headers = []

    for paragraph in document.paragraphs:
        # Check for heuristic conditions
        text = paragraph.text.strip()
        if not text:
            continue  # Skip empty paragraphs

        # Example heuristic conditions
        if (paragraph.style.name.startswith("Heading") or  # Styled as a heading
            paragraph.runs and any(run.bold for run in paragraph.runs) or  # Bold text
            len(text) < 100 and text.isupper()):  # Short, uppercase text
            headers.append(text)

    return headers

# Specify the path to your Word document
doc_path = r"C:\Users\LENOVO\Downloads\0001279569-25-000038-xbrl\ex991_word_file.docx"
headers = extract_headers_from_body(doc_path)

# Display extracted headers
print("Extracted Headers from Body:")
for header in headers:
    print(header)


Extracted Headers from Body:
Exhibit 99.1
Trilogy Metals Announces Positive Study Results for the Bornite Copper Project Located in Alaska, USA
VANCOUVER, BC, Jan. 15, 2025 /CNW/ - Trilogy Metals Inc. (TSX: TMQ) (NYSE American: TMQ) ("Trilogy Metals" or the "Company") is pleased to announce the positive results of its Preliminary Economic Assessment Study ("Bornite PEA") for the Bornite copper project in the Ambler Mining District of Northwestern Alaska (the "Bornite Project"). The Bornite Project is held by Ambler Metals LLC ("Ambler Metals"), the joint venture operating company equally owned by Trilogy Metals and South32 Limited ("South32"). The Bornite PEA was prepared on a 100% ownership basis, of which Trilogy Metals' share is 50%. All amounts are in U.S. dollars unless otherwise stated.
Trilogy Metals will host a conference call on January 15, 2025 at 1:00pm Pacific Time or 4:00pm Eastern Time to discuss these results.
Please use this link to access the live webcast of the confer

In [11]:

from docx import Document
import requests

def extract_text_from_word(doc_path):
    """Extracts all text from a Word document."""
    document = Document(doc_path)
    text_content = "\n".join(paragraph.text.strip() for paragraph in document.paragraphs if paragraph.text.strip())
    return text_content

def extract_headers_with_ollama(text_content, model="your_ollama_model"):
    """Sends the document text to the Ollama LLM to extract headers."""
    url = "http://localhost:11434/api/completions"  # Ollama API endpoint
    prompt = f"""
    The following text is extracted from a document. Identify and extract all headers and sub-headers:
    
    {text_content}
    
    Return the headers as a list.
    """
    payload = {
        "model": model,
        "prompt": prompt
    }
    response = requests.post(url, json=payload)
    if response.status_code == 200:
        return response.json().get("completion", "").split("\n")
    else:
        print("Error:", response.text)
        return []

# Input document path
doc_path = r"C:\Users\LENOVO\Downloads\0001279569-25-000038-xbrl\ex991_word_file.docx"

# Step 1: Extract text from the Word file
text_content = extract_text_from_word(doc_path)

# Step 2: Extract headers using Ollama
headers = extract_headers_with_ollama(text_content)

# Display results
print("Extracted Headers:")
for header in headers:
    print(header)


Error: 404 page not found
Extracted Headers:


In [3]:
from docx import Document
import ollama

# Load the Word document
doc = Document(r"C:\Users\LENOVO\Downloads\0001279569-25-000038-xbrl\ex991_word_file.docx")

# Function to extract and classify headers using LLM
def extract_headers_with_llm(doc):
    # Extract all text from the document
    text = "\n".join([para.text for para in doc.paragraphs])

    # Pass the entire text to the LLM to identify headers
    prompt = f"Please identify the main headers and subheaders in the following text:\n\n{text}"

    # Directly use ollama.chat() with the appropriate model
    response = ollama.chat(model="llama3.1:latest", messages=[{"role": "user", "content": prompt}])

    # Extract the model's response
    # headers = response['text']

    return response

# Extract headers from the document using LLM
headers = extract_headers_with_llm(doc)

# Display the result
print("Identified Headers:", headers)


Identified Headers: model='llama3.1:latest' created_at='2025-01-19T15:57:46.3712032Z' done=True done_reason='stop' total_duration=477358328000 load_duration=107026900 prompt_eval_count=1026 prompt_eval_duration=1846379000 eval_count=395 eval_duration=475401768000 message=Message(role='assistant', content="Trilogy Metals has announced positive study results for the Bornite copper project located in Alaska. The company is considering constructing a new mine at this site, and preliminary economic assessments (PEA) suggest that it may be economically viable. \n\nThe PEA report found that Bornite could produce 1.2 billion pounds of copper, 14 million ounces of gold, and 11 million ounces of silver over its eight-year life span. This would translate to approximately $3.7 billion in revenue at current metal prices.\n\nA feasibility study for the nearby Arctic project has also been completed, with estimated production costs being competitive with global peers. Both studies suggest that these p

In [7]:
print(headers.message.content)

Trilogy Metals has announced positive study results for the Bornite copper project located in Alaska. The company is considering constructing a new mine at this site, and preliminary economic assessments (PEA) suggest that it may be economically viable. 

The PEA report found that Bornite could produce 1.2 billion pounds of copper, 14 million ounces of gold, and 11 million ounces of silver over its eight-year life span. This would translate to approximately $3.7 billion in revenue at current metal prices.

A feasibility study for the nearby Arctic project has also been completed, with estimated production costs being competitive with global peers. Both studies suggest that these projects have potential, but they're subject to various risks and uncertainties. 

It's worth noting that this is a Canadian company (TSX: TMQ), and their disclosure standards may differ from those in the US.

This news release highlights some important points about investing in mineral exploration companies:

