# Using Tiny Lamma model

In [20]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.document_loaders import PyPDFLoader
import os
import re

In [33]:
# --- Cell 2: Load TinyLlama LLM ---
# This cell loads the TinyLlama model you've already saved to your Google Drive.
# We are now properly wrapping it in a HuggingFacePipeline for LangChain.

# Define the path to your saved model on Google Drive
repo_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
drive_model_path = f"/content/drive/MyDrive/models/{repo_id.replace('/', '_')}"
llm = None # Initialize llm to None

print(f"Loading model from: {drive_model_path}")

if not os.path.exists(drive_model_path):
    print("❌ Model not found. Please run the 'colab_local_model_workflow' notebook to download and save the model first.")
else:
    tokenizer = AutoTokenizer.from_pretrained(drive_model_path)
    model = AutoModelForCausalLM.from_pretrained(drive_model_path, device_map="auto")
    print("✅ LLM and Tokenizer loaded successfully!")

    # Create a standard pipeline
    pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens=512,
        temperature=0.3,
        torch_dtype=torch.float16,
        device_map="auto",
        # return_full_text=False, # This is the key change!
    )

    # UPGRADE: Wrap the pipeline in a LangChain-compatible object for better control
    llm = HuggingFacePipeline(pipeline=pipe)
    print("✅ TinyLlama LLM Pipeline created and ready!")

Loading model from: /content/drive/MyDrive/models/TinyLlama_TinyLlama-1.1B-Chat-v1.0


Device set to use cuda:0


✅ LLM and Tokenizer loaded successfully!
✅ TinyLlama LLM Pipeline created and ready!


In [34]:
# --- Cell 3: Prepare Your Document ---
# This cell remains the same. Ensure your PDF is in Google Drive.

pdf_file_path = "/content/drive/MyDrive/Annual report financial analyst/microsoft_2024_annual_report.pdf" # <--- EDIT THIS LINE

if not os.path.exists(pdf_file_path):
    print(f"❌ PDF file not found at '{pdf_file_path}'.")
else:
    print(f"✅ PDF file found at: {pdf_file_path}")

✅ PDF file found at: /content/drive/MyDrive/Annual report financial analyst/microsoft_2024_annual_report.pdf


In [35]:
# --- Cell 4: Load, Split, and Embed the Financial Report ---
# This cell remains the same. It builds our knowledge base.

vector_store = None # Initialize to None
if os.path.exists(pdf_file_path) and llm:
    print("Loading PDF...")
    loader = PyPDFLoader(pdf_file_path)
    pages = loader.load()
    print(f"PDF loaded successfully. It has {len(pages)} pages.")

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1200, chunk_overlap=150)
    docs = text_splitter.split_documents(pages)
    print(f"Document split into {len(docs)} chunks.")

    embedding_model_name = "sentence-transformers/all-MiniLM-L6-v2"
    embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name)
    print("Embedding model loaded.")

    print("Creating FAISS vector store from document chunks... This may take a moment.")
    vector_store = FAISS.from_documents(docs, embeddings)
    print("✅ Vector store created successfully!")
else:
    print("Skipping RAG pipeline creation due to missing PDF or LLM.")



Loading PDF...
PDF loaded successfully. It has 91 pages.
Document split into 307 chunks.
Embedding model loaded.
Creating FAISS vector store from document chunks... This may take a moment.


  return forward_call(*args, **kwargs)


✅ Vector store created successfully!


In [36]:
# --- Cell 5: UPGRADE - Create a LangChain Q&A Chain ---
# This function is now much cleaner and more robust using a proper LangChain chain.
# This is the key fix for the output quality issues.

def ask_analyst(question, vector_store, llm):
    """
    This function now uses a proper LLMChain for robust Q&A.
    """
    if not vector_store or not llm:
        return "Error: Vector store or LLM not initialized."

    print(f"\nAnalysing report for the question: '{question}'")
    relevant_docs = vector_store.similarity_search(question, k=4)
    context = "\n\n".join([doc.page_content for doc in relevant_docs])

    # print("\n--- Context Provided to LLM ---")
    # print(context)
    # print("-------------------------------")

    # Define a clear prompt template using the model's preferred format
    # This structure prevents the model from getting confused and repeating the input.
    template = """
<|system|>
You are an expert financial analyst. Your task is to answer the user's question based *only* on the provided text from the company's annual report. Be precise and cite specific numbers or facts from the text. If the answer is not in the provided text, say 'The answer is not available in the provided context.'</s>
<|user|>
CONTEXT FROM ANNUAL REPORT:
---
{context}
---
QUESTION: {question}</s>
<|assistant|>
"""

    prompt = PromptTemplate(template=template, input_variables=["context", "question"])

    # Create the LangChain chain
    llm_chain = LLMChain(prompt=prompt, llm=llm)

    print("\nGenerating final answer with LangChain...")
    # Run the chain
    result = llm_chain.invoke({"context": context, "question": question})

    # FINAL FIX: The result['text'] contains the full prompt plus the answer.
    # We need to parse it to get ONLY the text generated by the assistant.
    full_text = result['text']

    # Find the position of the final assistant tag
    assistant_tag_position = full_text.rfind("<|assistant|>")

    # If the tag is found, slice the string from that point onwards
    if assistant_tag_position != -1:
        # The actual answer starts after the tag and a newline character
        clean_answer = full_text[assistant_tag_position + len("<|assistant|>"):].strip()
    else:
        # As a fallback, if the tag isn't there, return the whole text
        clean_answer = full_text.strip()

    return clean_answer



In [39]:
# --- Cell 6: Run the Analysis! ---
# Let's ask the same questions and see the improved results.

if vector_store and llm:
    q1 = "What were the total revenues for the most recent fiscal year?"
    q2 = "Summarize the main business risks mentioned in the report."

    # Ask the first question
    answer1 = ask_analyst(q1, vector_store, llm)
    print("\n--- Analyst's Answer 1 ---")
    print(answer1)

    # Ask the second question
    answer2 = ask_analyst(q2, vector_store, llm)
    print("\n--- Analyst's Answer 2 ---")
    print(answer2)

else:
    print("\nCannot run analysis because the RAG pipeline was not created.")



Analysing report for the question: 'What were the total revenues for the most recent fiscal year?'

Generating final answer with LangChain...


  return forward_call(*args, **kwargs)



--- Analyst's Answer 1 ---
The total revenues for the most recent fiscal year were $198.27 billion, which is the answer provided in the context.

Analysing report for the question: 'Summarize the main business risks mentioned in the report.'

Generating final answer with LangChain...


  return forward_call(*args, **kwargs)



--- Analyst's Answer 2 ---
The report mentions risks and uncertainties related to the following business areas:

1. Financial Analysis: The company provides financial analysis services, which involve the preparation of financial statements, including annual reports. The report mentions risks and uncertainties related to the accuracy of the financial statements, including the availability of relevant information and the timeliness of the financial reporting process.

2. Risk Factors: The report includes risks and uncertainties related to the company's business, including the risks and uncertainties related to the company's growth, market competition, technology innovation, and customer demand.

3. Risk Factors: The report also mentions risks and uncertainties related to the company's financial performance, including the risks and uncertainties related to the company's ability to generate revenue and achieve profitability, the risks and uncertainties related to the company's ability to 