In [None]:
!pip install -q pypdf
!pip  install -q transformers einops accelerate langchain bitsandbytes sentence_transformers llama_index llama-index-llms-huggingface gradio pytube jiwer ReportLab weasyprint
!pip install -q -i https://pypi.org/simple/ bitsandbytes
!pip install -q git+https://github.com/openai/whisper.git

In [None]:
! huggingface-cli login

In [None]:
from llama_index.core import VectorStoreIndex,SimpleDirectoryReader,ServiceContext
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.core.prompts.prompts import SimpleInputPrompt

In [None]:
# prompt: make a code to download a file and the wright it in a file

import requests
import os
if not os.path.exists('data'):
  os.makedirs('data')

def download_and_write_file(url, filename):
  response = requests.get(url)

  if response.status_code == 200:
    with open(filename, 'wb') as f:
      f.write(response.content)
  else:
    print('Failed to download file.')


download_and_write_file('https://arxiv.org/pdf/1706.03762.pdf','data/attention is all you need.pdf')

In [None]:
documents= SimpleDirectoryReader("/content/data").load_data()
# documents

In [None]:
system_prompt="""
You are a Q&A assistant. your goal is to answer questions as
accurately as possible bassed on the instruction and context provoided.
"""
# Default format supportableby LLama 2
query_wrapper_prompt=SimpleInputPrompt("<|USER|>{query_str}<|ASSISTANT|>")

In [None]:
import torch
from transformers import BitsAndBytesConfig, AutoModelForCausalLM

device = 'cuda' if torch.cuda.is_available() else "cpu"
llm= HuggingFaceLLM(
    context_window=4096,
    max_new_tokens=256,
    generate_kwargs= {"temperature":0,"do_sample":False},
    system_prompt= system_prompt,
    query_wrapper_prompt= query_wrapper_prompt,
    tokenizer_name= "google/gemma-1.1-2b-it",
    model_name= "google/gemma-1.1-2b-it",
    device_map= device,
    model_kwargs= {"torch_dtype": torch.float16, "quantization_config":BitsAndBytesConfig(
        load_in_4bit=True,  # Set to True for 4-bit quantization
        load_in_8bit=False,  # Set to True for 8-bit quantization
    )},
)

In [None]:
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index.core import ServiceContext
from llama_index.legacy.embeddings.langchain import LangchainEmbedding

embed_model= LangchainEmbedding(
    HuggingFaceEmbeddings(model_name= "sentence-transformers/all-mpnet-base-v2")
)

In [None]:
service_context= ServiceContext.from_defaults(
    chunk_size=1024,
    llm= llm,
    embed_model= embed_model
)

In [None]:
import llama_index
from llama_index.core import VectorStoreIndex
from sentence_transformers import SentenceTransformer

In [None]:
embed_model = SentenceTransformer('all-mpnet-base-v2')

In [None]:
index= VectorStoreIndex.from_documents(documents,service_context= service_context)

In [None]:
query_engine= index.as_query_engine()

In [None]:
%%time
response= query_engine.query("what do you think about srk ?")

In [None]:
print(response.response)

In [None]:
import whisper
import gradio as gr
from pytube import YouTube
from reportlab.pdfgen import canvas
from reportlab.lib.units import inch
from weasyprint import HTML

model = whisper.load_model("base")
globvar=0

def echo(message, history):
  if "https://yout" in message or  "https://www.youtube.com/" in message:
    try:
      yt = YouTube(message)
      global globvar
      yt.streams.filter(only_video=True, file_extension='mp4')
      stream = yt.streams.get_by_itag(139)
      stream.download('',f"GoogleImagen{globvar}.mp4")
      result = model.transcribe(f"GoogleImagen{globvar}.mp4")
      # create_pdf_in_colab("my_pdf", result['text'])
      cont = result['text']
      filename = f"data/youtube{globvar}.pdf"
      globvar=globvar+1
      # Create a new PDF canvas
      c = canvas.Canvas(filename)

      # Add title to the PDF
      c.setFont("Helvetica", 24)
      c.drawCentredString(300, 750, 'title')
      # Define margins for content placement
      margin_left = 50
      margin_top = 700
      margin_right = 50

      # Check if using text frame or individual lines
      use_text_frame = len(cont.splitlines()) > 10  # Adjust threshold for long content

      if use_text_frame:
        # Use text frame for long content
        frame_width = 6.5 * inch  # Adjust width
        frame_height = 4.0 * inch  # Adjust height
        x_pos = margin_left
        y_pos = margin_top

        # Create text frame
        frame = c.beginFrame(x_pos, y_pos, frame_width, frame_height)

        # Set font and line spacing for frame
        c.setFont("Helvetica", 12)  # Adjust font size
        c.setLineSpacing(1.0)  # Adjust line spacing

        # Add content to the frame
        c.drawString(0, 0, cont)  # Assuming your content is in a variable

        # End the text frame
        c.endFrame()
      else:
        # Use individual lines for short content
        c.setFont("Helvetica", 12)  # Adjust font size
        # c.setLineSpacing(1.0)  # Adjust line spacing (multiplies font size for spacing)

        y_pos = margin_top

        for line in cont.splitlines():
          c.drawString(margin_left, y_pos, line)
          y_pos -= 18  # Adjust line spacing based on font size

      # Save and close the PDF
      c.save()
      return "Video Uploaded"
    except:
      return "Please provide a valid Youtube link...."
  else:
    documents= SimpleDirectoryReader("/content/data").load_data()
    index= VectorStoreIndex.from_documents(documents,service_context= service_context)
    query_engine= index.as_query_engine()
    response = query_engine.query(message)
    return response.response

demo = gr.ChatInterface(fn=echo, examples=["what is the meaning of attention is all you need ?", "https://www.youtube.com/watch?v=u47GtXwePms", "https://youtu.be/u47GtXwePms?si=roRjj6cKfednKOUS"], title="GDSC AI BOT")
demo.launch(debug=False)