In [57]:
import os
import json
from datetime import datetime
import warnings
warnings.simplefilter('ignore')
import requests

import torch
import torchvision
import openai
import PyPDF2
import spacy
from langchain.text_splitter import RecursiveCharacterTextSplitter
from sklearn.metrics.pairwise import cosine_similarity
import transformers
# summarizer = transformers.pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", revision="a4f8f3e")
from library.exportation import export_prompt_response, export_article
from questions import questions
from get_embedding_function import get_embedding_function


In [18]:
def load_pdf_text(pdf_path):
    """Load text from a PDF file."""
    text = ""
    with open(pdf_path, "rb") as file:
        reader = PyPDF2.PdfReader(file)
        for page in reader.pages:
            text += page.extract_text() or ""
    return text


def embedding(chunks, nlp):
    """Encode text chunks using spaCy."""
    encoded_chunks = []
    for chunk in chunks:
        doc = nlp(chunk)
        encoded_chunks.append(doc.vector)  # Get the vector representation
    return encoded_chunks


def summarize_text(text, max_length=100):
    summarizer = transformers.pipeline("summarization")
    summary = summarizer(text, max_length=max_length, min_length=0, do_sample=False)
    return summary[0]['summary_text']



In [16]:
file_name = '6055.HK'
folder_name = '3_data'
pdf_file_path = os.path.join(folder_name, f'{file_name}.pdf')
pdf_text = load_pdf_text(pdf_file_path)

# Chunking and Embedding for PDF

In [68]:
# Initialize the RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True  #add_start_index=True else kernel die
)

# Split the text into chunks
text_chunks = text_splitter.split_text(pdf_text)

# Load the spaCy model
nlp = spacy.load("en_core_web_md")  # Load the spaCy model

# Encode the chunks from pdf
encoded_chunks = embedding(text_chunks, nlp)

# Top 5 similar token 


In [69]:
top_N = 3
top_N_chunks = {}

for i, q in enumerate(questions):
    q_emb = nlp(q).vector
    # Find the most similar chunks to q_emb
    similarities = cosine_similarity([q_emb], encoded_chunks).flatten()

    top_N_idx = similarities.argsort()[::-1][:top_N]
    
    top_N_text = [text_chunks[i] for i in top_N_idx]

    top_N_chunks[q] = top_N_text

# define prompts


In [188]:
def ask_question_prompt(content, question):
    system_prompt = f"""You are an financial analyst.
    Use the following pieces of retrieved context to answer the question. 
    Use 3 sentences maximum for question and keep the answer concise.


    Retrieved context:
    {content}\n

    Question:
    {question}
    """
    return system_prompt


def write_report_prompt(content):
    system_prompt = f"""You are a financial report writer. 
    Please combine the text provided and generate a financial analysis report. 
    The report should be about 15 to 20 paragraphs. The report should include the following sections:
    1. Company Overview
    2. Revenue Structure
    3. Profit
    4. Valuation
    5. Summary
    6. Future Outlook
    7. and other details provided.
    
    content is as below:
    {content}
    """
    return system_prompt


def generate_answer(prompt, api_key):
    model = 'gpt-4o-mini'
    url = "https://api.ohmygpt.com/v1/chat/completions"
    
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }
    
    data = {
        "model": model,
        "messages": [
            {"role": "system", "content": "You are a financial assistant asking questions."},
            {"role": "user", "content": prompt}
        ],
        "temperature": 0.7
    }
    response = requests.post(url, headers=headers, json=data)
    response_json = response.json()
    return response_json["choices"][0]["message"]["content"]


def generate_summary(prompt, api_key):
    model = 'gpt-4o-mini'
    url = "https://api.ohmygpt.com/v1/chat/completions"
    
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }
    
    data = {
        "model": model,
        "messages": [
            {"role": "system", "content": "You are a writer."},
            {"role": "user", "content": prompt}
        ],
        "temperature": 0.7
    }
    response = requests.post(url, headers=headers, json=data)
    response_json = response.json()
    return response_json["choices"][0]["message"]["content"]

In [157]:
# prompt to llm to ask question
all_prompts = []

# combine chunks and questions to string
for question, top_chunks in top_N_chunks.items():
    top_chunks_combined = '.'.join(top_chunks)
    system_prompt = ask_question_prompt(top_chunks_combined, question)
    all_prompts.append((question, system_prompt))

In [166]:
prompt_response = []
c = 0
api_key = "sk-NAWSSGI7999d18B51046T3BlBkFJ514d034054e342cc99c3"
for i, (q, p) in enumerate(all_prompts):
    response = generate_answer(p, api_key)
    prompt_response.append((q, response))
    print(i)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51


In [172]:
prompt_hist = {}
for q, r in  prompt_response:
    prompt_hist[q] = r

export_prompt_response(file_name, prompt_hist)

JSON data has been exported to 1_prompt_log\6055.HK_20241123_2045.json


In [189]:
content = ''
for q, r in prompt_response:
    content += f'{q} {r}\n'
summary_promt = write_report_prompt(content)
report = generate_summary(summary_promt, api_key)

In [191]:
export_article(file_name, report)

report has been exported to 2_article_log\6055.HK_20241123_2059.txt


# Financial Analysis Report

## Company Overview

The company under review operates within the tobacco industry, primarily focusing on the procurement and sales of tobacco leaf products and related services under the CBT Framework Tobacco and Services Purchase Agreement. For the reporting period, the company reported a total revenue of HK$122.0 million stemming from its tobacco and services transactions. Notably, the overall revenue for the Group reached HK$6,802.2 million, marking a year-on-year growth of 5%. The Company is strategically positioned to adapt to market demands while optimizing its supply chain efficiencies and pricing strategies to enhance profitability.

## Revenue Structure

The revenue structure of the company showcases a diverse portfolio, although specific details regarding revenue-generating segments were not disclosed. A significant contributor to revenue is the tobacco leaf products export business, which recorded a commendable 23% growth year-on-year. Despite challenges in the Brazil operations, where the export volume decreased despite a 43% rise in operating revenue, the overall performance indicates resilience and adaptability in a fluctuating market. For a comprehensive assessment of the contributions from each segment, additional financial data would be necessary.

## Profit

A detailed analysis of profitability metrics reveals operational growth, although specific figures for net profit and EBITDA were not provided. The emphasis on operational strategies aimed at increasing gross profit margins is evident, especially within the Tobacco Leaf Products Export and Cigarettes Export businesses. However, external challenges, including weather conditions and shipping market fluctuations, have impacted sales volumes, leading to unmet customer needs. A shift in product structure has also resulted in increased sales of finished tobacco strips with higher unit prices but lower gross profit margins, indicating complexities in maintaining profitability.

## Valuation

Analysts primarily utilize gross profit as the valuation metric, which is derived from reportable segment revenue minus the related cost of sales. With the earnings per share (EPS) reported at HK$0.93, further analysis would be required to ascertain the price-to-earnings (P/E) ratio relative to industry averages. This metric is crucial for investors seeking insights into the company's valuation against its peers and the broader market context.

## Summary

The financial performance of the company during the reporting period reflects a robust revenue generation capability, underscored by strategic operational improvements and a focus on optimizing the supply chain. Despite facing challenges in sales volume and profitability due to external factors, the company has managed to sustain its market position. The increase in profit from operations and profit before taxation indicates effective management strategies, paving the way for future growth.

## Future Outlook

The outlook for the company remains positive, particularly in the New Tobacco Products Export Business. Future strategies include enhancing customer satisfaction through improved alignment of supply and demand and optimizing pricing strategies to bolster operational revenue. Additionally, the company aims to expand its procurement areas and enhance its ESG performance to tap into new profit sources. The focus on enhancing resource allocation in the Brazilian market further supports the growth trajectory.

## Key Risks and Strategic Goals

Key risks impacting the company include adverse weather conditions, fluctuations in international shipping markets, and challenges related to supply constraints. Management's strategic goals for the next year involve addressing these risks through enhanced planning, resource optimization, and improved governance capabilities. The company is also focusing on strengthening its supply chain efficiency and aligning product offerings to meet market demands.

## Sustainability Initiatives

The company is committed to enhancing its Environmental, Social, and Governance (ESG) performance, particularly in its supply chain operations. Future sustainability goals include optimizing resource allocation and strengthening governance to ensure stable operations. The focus on improving market competitiveness while effectively managing risks positions the company favorably for sustainable growth.

## Employee Engagement and Corporate Culture

While specifics on employee training and development were not detailed, the company��s governance improvements and implementation of "Lean Management" practices suggest a commitment to fostering a positive workplace culture. There is an implicit focus on enhancing operational efficiency, which can contribute to employee morale and engagement.

## Conclusion

In conclusion, the company demonstrates a solid financial performance bolstered by strategic operational improvements and a commitment to sustainability. While challenges persist, particularly in market supply and external factors, the proactive management strategies and focus on customer satisfaction indicate a promising outlook. Stakeholders should continue to monitor the company's financial metrics and market positioning to gauge future performance accurately. 

Overall, the company is well-positioned to navigate industry challenges and capitalize on growth opportunities, ensuring its position as a competitive player in the tobacco market.