In [7]:
import streamlit as st
import fitz  # PyMuPDF for PDF processing
import os
import torch
from unsloth import FastLanguageModel
from datasets import Dataset
from transformers import TrainingArguments, Trainer, AutoModelForCausalLM, AutoTokenizer
from googleapiclient.discovery import build
from googleapiclient.http import MediaFileUpload
from google.oauth2 import service_account

ModuleNotFoundError: No module named 'streamlit'

In [13]:
%%capture
# Normally using pip install unsloth is enough

# Temporarily as of Jan 31st 2025, Colab has some issues with Pytorch
# Using pip install unsloth will take 3 minutes, whilst the below takes <1 minute:
!pip install --no-deps bitsandbytes accelerate xformers==0.0.29 peft trl triton
!pip install --no-deps cut_cross_entropy unsloth_zoo
!pip install sentencepiece protobuf datasets huggingface_hub hf_transfer
!pip install --no-deps unsloth

In [14]:
from unsloth import FastLanguageModel

ModuleNotFoundError: No module named 'triton'

In [11]:
import torch

print("CUDA Available:", torch.cuda.is_available())  # Should print True
print("CUDA Version:", torch.version.cuda)  # Should match your CUDA version



CUDA Available: True
CUDA Version: 11.8


In [12]:
import torch
import time

# Check if CUDA is available
if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"CUDA is available! Using GPU: {torch.cuda.get_device_name(0)}")
else:
    device = torch.device("cpu")
    print("CUDA is not available. Using CPU.")

# Create two random matrices on GPU
size = 1000  # Adjust size for more stress
A = torch.randn(size, size, device=device)
B = torch.randn(size, size, device=device)

# Measure execution time
start_time = time.time()
C = torch.matmul(A, B)  # Matrix multiplication
torch.cuda.synchronize()  # Wait for GPU to finish
end_time = time.time()

print(f"Matrix multiplication completed in {end_time - start_time:.6f} seconds")


CUDA is available! Using GPU: NVIDIA GeForce GTX 1660 Ti


  A = torch.randn(size, size, device=device)


Matrix multiplication completed in 0.277400 seconds


In [None]:


# ---- GOOGLE DRIVE UPLOAD CONFIG ----
SCOPES = ["https://www.googleapis.com/auth/drive"]
SERVICE_ACCOUNT_FILE = "your_service_account.json"  # Add your Google Cloud service account JSON here

# Authenticate with Google Drive
credentials = service_account.Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPES)
drive_service = build("drive", "v3", credentials=credentials)

# ---- STREAMLIT UI ----
st.title("📄 PDF Upload & Fine-Tune AI Chatbot")

uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"])
if uploaded_file is not None:
    pdf_path = f"./{uploaded_file.name}"
    with open(pdf_path, "wb") as f:
        f.write(uploaded_file.getbuffer())

    st.success("✅ PDF uploaded successfully!")

    # ---- UPLOAD TO GOOGLE DRIVE ----
    file_metadata = {"name": uploaded_file.name, "parents": ["your_drive_folder_id"]}
    media = MediaFileUpload(pdf_path, mimetype="application/pdf")
    drive_service.files().create(body=file_metadata, media_body=media, fields="id").execute()
    st.success("📂 PDF saved to Google Drive!")

    # ---- EXTRACT TEXT FROM PDF ----
    def extract_text(pdf_path):
        doc = fitz.open(pdf_path)
        text = ""
        for page in doc:
            text += page.get_text("text") + "\n"
        return text

    extracted_text = extract_text(pdf_path)
    st.info(f"📜 Extracted {len(extracted_text)} characters from PDF.")

    # ---- CREATE DATASET FOR FINE-TUNING ----
    df_combined = Dataset.from_pandas({"text": [extracted_text]})

    # ---- LOAD & FINE-TUNE LLM ----
    model_name = "unsloth/Meta-Llama-3.1-8B"
    max_seq_length = 2048
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name=model_name,
        max_seq_length=max_seq_length,
        load_in_4bit=True
    )

    model = FastLanguageModel.get_peft_model(
        model,
        r=16,
        target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
        lora_alpha=16,
        lora_dropout=0
    )

    # Tokenize dataset
    def tokenize_function(examples):
        return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=max_seq_length)

    dataset = df_combined.map(tokenize_function, batched=True)

    training_args = TrainingArguments(
        output_dir="./results",
        evaluation_strategy="epoch",
        save_strategy="epoch",
        per_device_train_batch_size=2,
        per_device_eval_batch_size=2,
        learning_rate=2e-5,
        num_train_epochs=3,
        logging_dir="./logs",
        logging_steps=10,
        save_total_limit=2,
        fp16=True
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=dataset,
        tokenizer=tokenizer,
    )

    st.info("🔄 Fine-tuning the model... This may take a while.")
    trainer.train()
    st.success("🎉 Model fine-tuning complete!")

    # ---- SAVE FINE-TUNED MODEL ----
    model_path = "./fine_tuned_model"
    model.save_pretrained(model_path)
    tokenizer.save_pretrained(model_path)
    st.success("📦 Fine-tuned model saved!")

    # ---- CHAT INTERFACE ----
    st.subheader("💬 Ask the AI a Question")

    user_input = st.text_input("Enter your question:")
    if st.button("Ask"):
        if user_input:
            model = AutoModelForCausalLM.from_pretrained(model_path)
            tokenizer = AutoTokenizer.from_pretrained(model_path)

            inputs = tokenizer(user_input, return_tensors="pt")
            with torch.no_grad():
                outputs = model.generate(**inputs, max_length=500)

            response = tokenizer.decode(outputs[0], skip_special_tokens=True)
            st.write("🤖 AI:", response)
