# now the Llama-3.1-8B-Instruct

# downloading the llama model

In [None]:
from huggingface_hub import notebook_login
notebook_login()

In [None]:
# --- Cell 2: UPGRADE - Load Llama 3.1 8B Instruct Model ---
# This cell now loads the more powerful Llama 3.1 8B model.
# It will automatically download it to your Drive on the first run.

# UPGRADE: We are now using the more powerful Llama 3.1 8B Instruct model.
repo_id = "meta-llama/Llama-3.1-8B-Instruct"
drive_model_path = f"/content/drive/MyDrive/llama_model/{repo_id.replace('/', '_')}"
# Initialize model and tokenizer to None
model = None
tokenizer = None

print(f"Checking for model at: {drive_model_path}")

# Check if the model has already been downloaded
if not os.path.exists(drive_model_path):
    print("Llama 3.1 8B model not found on Google Drive. Downloading from Hugging Face...")
    print("This will take a significant amount of time and space (~16GB). Please be patient.")

    # You must be logged into Hugging Face in your Colab environment for this to work.
    # In a new cell, run the following two lines if you have issues:
    # from huggingface_hub import notebook_login
    # notebook_login()

    # Download the tokenizer and model from Hugging Face
    tokenizer = AutoTokenizer.from_pretrained(repo_id)
    model = AutoModelForCausalLM.from_pretrained(
        repo_id,
        torch_dtype=torch.bfloat16, # Use bfloat16 for better performance
        device_map="auto"
    )

    print("Download complete. Saving model to Google Drive for future use...")
    model.save_pretrained(drive_model_path)
    tokenizer.save_pretrained(drive_model_path)
    print("✅ Model saved to Google Drive!")
else:
    print("✅ Model already exists on Google Drive. Loading...")
    tokenizer = AutoTokenizer.from_pretrained(drive_model_path)
    model = AutoModelForCausalLM.from_pretrained(drive_model_path, torch_dtype=torch.bfloat16, device_map="auto")

if model and tokenizer:
    print("✅ Llama 3.1 8B Model and Tokenizer are loaded and ready.")



# model downloaded successfully

In [None]:
from google.colab import drive
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.document_loaders import PyPDFLoader
import os
import re

# Loading the model and the tokenizer model

In [None]:
# --- Cell 2: UPGRADE - Load Llama 3.1 8B Instruct Model ---
# This cell now loads the more powerful Llama 3.1 8B model.
# It will automatically download it to your Drive on the first run.

# UPGRADE: We are now using the more powerful Llama 3.1 8B Instruct model.
repo_id = "meta-llama/Llama-3.1-8B-Instruct"
drive_model_path = f"/content/drive/MyDrive/llama_model/{repo_id.replace('/', '_')}"
# Initialize model and tokenizer to None
model = None
tokenizer = None

print(f"Checking for model at: {drive_model_path}")

# Check if the model has already been downloaded
if not os.path.exists(drive_model_path):
    print("Llama 3.1 8B model not found on Google Drive. Downloading from Hugging Face...")
    print("This will take a significant amount of time and space (~16GB). Please be patient.")

    # You must be logged into Hugging Face in your Colab environment for this to work.
    # In a new cell, run the following two lines if you have issues:
    # from huggingface_hub import notebook_login
    # notebook_login()

    # Download the tokenizer and model from Hugging Face
    tokenizer = AutoTokenizer.from_pretrained(repo_id)
    model = AutoModelForCausalLM.from_pretrained(
        repo_id,
        torch_dtype=torch.bfloat16, # Use bfloat16 for better performance
        device_map="auto"
    )

    print("Download complete. Saving model to Google Drive for future use...")
    model.save_pretrained(drive_model_path)
    tokenizer.save_pretrained(drive_model_path)
    print("✅ Model saved to Google Drive!")
else:
    print("✅ Model already exists on Google Drive. Loading...")
    tokenizer = AutoTokenizer.from_pretrained(drive_model_path)
    model = AutoModelForCausalLM.from_pretrained(drive_model_path, torch_dtype=torch.bfloat16, device_map="auto")

if model and tokenizer:
    print("✅ Llama 3.1 8B Model and Tokenizer are loaded and ready.")


# Create the LangChain Pipeline

In [None]:
# --- Cell 3: Create the LangChain Pipeline ---
# This cell takes the loaded model and tokenizer and wraps them for LangChain.
llm = None # Initialize llm to None
if model and tokenizer:
    # Create a LangChain-compatible pipeline
    pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens=512,
        temperature=0.1, # Very low temperature for factual answers
        top_p=0.95,
        repetition_penalty=1.15,
        return_full_text=False,
    )
    llm = HuggingFacePipeline(pipeline=pipe)
    print("✅ Llama 3.1 8B LLM Pipeline created and ready!")
else:
    print("❌ Cannot create pipeline because model or tokenizer was not loaded.")

# Prepare the document

In [None]:
# --- Cell 4: Prepare Your Document ---
# This cell remains the same. Ensure your PDF is in Google Drive.

pdf_file_path = "/content/drive/MyDrive/Annual report financial analyst/microsoft_2024_annual_report.pdf" # <--- EDIT THIS LINE
# pdf_file_path = "/content/drive/MyDrive/research paper CNN_compressed.pdf" # <--- EDIT THIS LINE

if not os.path.exists(pdf_file_path):
    print(f"❌ PDF file not found at '{pdf_file_path}'.")
else:
    print(f"✅ PDF file found at: {pdf_file_path}")



# load , split, and embed the financial report

In [None]:
#  --- Cell 5: Load, Split, and Embed the Financial Report ---
# This cell remains the same. It builds our knowledge base.

vector_store = None # Initialize to None
if os.path.exists(pdf_file_path) and llm:
    print("Loading PDF...")
    loader = PyPDFLoader(pdf_file_path)
    pages = loader.load()
    print(f"PDF loaded successfully. It has {len(pages)} pages.")

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1200, chunk_overlap=150)
    docs = text_splitter.split_documents(pages)
    print(f"Document split into {len(docs)} chunks.")

    embedding_model_name = "sentence-transformers/all-MiniLM-L6-v2"
    embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name)
    print("Embedding model loaded.")

    print("Creating FAISS vector store from document chunks... This may take a moment.")
    vector_store = FAISS.from_documents(docs, embeddings)
    print("✅ Vector store created successfully!")
else:
    print("Skipping RAG pipeline creation due to missing PDF or LLM.")


# Create a LangChain Q&A Chain for Llama 3.1

In [None]:
# --- Cell 6: UPGRADE - Create a LangChain Q&A Chain for Llama 3.1 ---
# This function uses a prompt template optimized for Llama 3.1.

def ask_analyst(question, vector_store, llm):
    """
    This function now uses a proper LLMChain for robust Q&A.
    """
    if not vector_store or not llm:
        return "Error: Vector store or LLM not initialized."

    print(f"\nAnalysing report for the question: '{question}'")
    relevant_docs = vector_store.similarity_search(question, k=3)
    context = "\n\n".join([doc.page_content for doc in relevant_docs])

    # The following print statements are useful for debugging but can be commented out for clean final output.
    # print("\n--- Context Provided to LLM ---")
    # print(context)
    # print("-------------------------------")

    # Define a clear prompt template using the official Llama 3.1 format
    template = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are an expert financial analyst. Your task is to answer the user's question based *only* on the provided text from the company's annual report. Be precise and cite specific numbers or facts from the text. If the answer is not in the provided text, say 'The answer is not available in the provided context.'<|eot_id|><|start_header_id|>user<|end_header_id|>
CONTEXT FROM ANNUAL REPORT:
---
{context}
---

QUESTION: {question}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""

    prompt = PromptTemplate(template=template, input_variables=["context", "question"])

    # Create the LangChain chain
    llm_chain = LLMChain(prompt=prompt, llm=llm)

    print("\nGenerating final answer with LangChain...")
    # Run the chain
    result = llm_chain.invoke({"context": context, "question": question})

    # With `return_full_text=False`, result['text'] is now clean by default.
    return result['text'].strip()


# Run the Analysis

In [None]:
# --- Cell 7: Run the Analysis! ---
# Let's ask one clear question and see the improved result.

if vector_store and llm:
    q1 = "Summarize the main business risks mentioned in the report."

    # Ask the question
    answer1 = ask_analyst(q1, vector_store, llm)
    print("\n--- Analyst's Answer ---")
    print(answer1)

else:
    print("\nCannot run analysis because the RAG pipeline was not created.")


# so see we got the answer as expected
# and in this way we made the RAG based model using LLama

# The single cell pipline is present it the annual_report_Complete_final_code.ipynb