In [None]:
#  STEP 1: Install Required Libraries (Run only once)
!pip install python-pptx transformers ipywidgets fpdf accelerate --quiet

In [None]:
#  STEP 2: Import libraries
import os
from pptx import Presentation
from transformers import BartTokenizer, BartForConditionalGeneration, AutoTokenizer, AutoModelForCausalLM
import torch
import ipywidgets as widgets
from IPython.display import display, clear_output
from fpdf import FPDF
from google.colab import files

In [None]:
#  STEP 3: File Upload
uploaded = files.upload()
pptx_path = list(uploaded.keys())[0]

Saving AI_Future_of_Work_Presentation.pptx to AI_Future_of_Work_Presentation (2).pptx


In [None]:
#  STEP 4: Extract Text from PPT
def extract_text_from_ppt(pptx_path):
    prs = Presentation(pptx_path)
    full_text = ""
    for slide in prs.slides:
        for shape in slide.shapes:
            if hasattr(shape, "text"):
                full_text += shape.text.strip() + "\n"
    return full_text.strip()

ppt_text = extract_text_from_ppt(pptx_path)

In [None]:
#  STEP 5: Generate Summary using BART
tokenizer_bart = BartTokenizer.from_pretrained("facebook/bart-large-cnn")
model_bart = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [None]:
def summarize_text_bart(text, max_chunk_len=1024):
    inputs = tokenizer_bart.batch_encode_plus([text], return_tensors='pt', max_length=max_chunk_len, truncation=True)
    summary_ids = model_bart.generate(inputs['input_ids'], num_beams=4, max_length=200, early_stopping=True)
    return tokenizer_bart.decode(summary_ids[0], skip_special_tokens=True)

In [None]:
ppt_summary = summarize_text_bart(ppt_text)

In [None]:
#  STEP 6: Load LLaMA Model
hf_token = "your_hf_token_here"  # Replace this
llama_model_id = "meta-llama/Llama-3.2-3B-Instruct"

tokenizer_llama = AutoTokenizer.from_pretrained(llama_model_id, use_auth_token=hf_token)
model_llama = AutoModelForCausalLM.from_pretrained(
    llama_model_id,
    torch_dtype=torch.float16,
    device_map="auto",
    load_in_4bit=True,
    use_auth_token=hf_token
)

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
#  STEP 7: Prepare combined context for better accuracy
context = f"""[Original Text]\n{ppt_text}\n\n[Summary]\n{ppt_summary}"""

In [None]:
#  STEP 8: Q&A Function
qa_pairs = []

In [None]:
def ask_llama(question):
    prompt = f"""You are a helpful assistant. Answer the question based on the presentation content below:\n\n{context}\n\nQuestion: {question}\nAnswer:"""
    inputs = tokenizer_llama(prompt, return_tensors="pt").to("cuda")
    outputs = model_llama.generate(**inputs, max_new_tokens=300)
    answer = tokenizer_llama.decode(outputs[0], skip_special_tokens=True)
    return answer.split("Answer:")[-1].strip()

In [None]:
#  STEP 9: Interactive Interface
text_input = widgets.Textarea(placeholder='Type your question here')
ask_button = widgets.Button(description='Ask Question', button_style='primary')
generate_pdf_button = widgets.Button(description='Generate PDF & Stop', button_style='danger')
output_box = widgets.Output()

In [None]:
def on_ask_clicked(b):
    with output_box:
        question = text_input.value.strip()
        if question:
            answer = ask_llama(question)
            qa_pairs.append((question, answer))
            print(f"\nQ: {question}\nA: {answer}\n")
            text_input.value = ""
        else:
            print(" Please enter a question.")

In [None]:
def on_generate_pdf_clicked(b):
    pdf = FPDF()
    pdf.add_page()
    pdf.set_font("Arial", size=12)
    pdf.multi_cell(0, 10, f" Summary:\n{ppt_summary}\n", align='L')

    for i, (q, a) in enumerate(qa_pairs, 1):
        pdf.multi_cell(0, 10, f"\nQ{i}: {q}\nA{i}: {a}", align='L')

    pdf_path = "QnA_Output.pdf"
    pdf.output(pdf_path)
    with output_box:
        print("\n PDF generated and ready to download.")
    files.download(pdf_path)

In [None]:
ask_button.on_click(on_ask_clicked)
generate_pdf_button.on_click(on_generate_pdf_clicked)

In [None]:
display(widgets.VBox([text_input, ask_button, generate_pdf_button, output_box]))

VBox(children=(Textarea(value='', placeholder='Type your question here'), Button(button_style='primary', descr…

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>