In [1]:
!pip install -q --upgrade bitsandbytes accelerate

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.1/60.1 MB[0m [31m20.9 MB/s[0m eta [36m0:00:00[0m
[?25h

In [3]:
import os
import requests
from IPython.display import Markdown, display, update_display
from openai import OpenAI
from google.colab import drive
from huggingface_hub import login
from google.colab import userdata
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, BitsAndBytesConfig
import torch

In [9]:
hf_token = userdata.get('HF_TOKEN')
login(hf_token, add_to_git_credential=True)

DEEPSEEK = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
LLAMA = "meta-llama/Llama-3.2-3B-Instruct"

Token has not been saved to git credential helper.


[1m[31mCannot authenticate through git-credential as no helper is defined on your machine.
You might have to re-authenticate when pushing to the Hugging Face Hub.
Run the following command in your terminal in case you want to set the 'store' credential helper as default.

git config --global credential.helper store

Read https://git-scm.com/book/en/v2/Git-Tools-Credential-Storage for more details.[0m


In [1]:
system_message = """
  You are an specialized tutor in creating flashcards about whatever topic the user decides to research.
  They need to be brief, with a short question and a short answer in the following markdown format example
  ###TEMPLATE###
  # Flashcard 1
  <details>
  <summary>What is the capital of France?</summary>
  Paris
  </details>

  # Flashcard 2
  <details>
  <summary>What is the derivative of sin(x)?</summary>
  cos(x)
  </details>
  ###TEMPLATE###
"""

In [13]:
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4"
)

In [16]:
def generate(model, messages, quant=True, stream=True, max_new_tokens=500):
  tokenizer = AutoTokenizer.from_pretrained(model)
  tokenizer.pad_token = tokenizer.eos_token
  input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True).to("cuda")
  attention_mask = torch.ones_like(input_ids, dtype=torch.long, device="cuda")
  streamer = TextStreamer(tokenizer)
  if quant:
    model = AutoModelForCausalLM.from_pretrained(model, quantization_config=quant_config).to("cuda")
  else:
    model = AutoModelForCausalLM.from_pretrained(model).to("cuda")
  if stream:
    outputs = model.generate(input_ids=input_ids, attention_mask=attention_mask, max_new_tokens=max_new_tokens, streamer=streamer)
  else:
    outputs = model.generate(input_ids, attention_mask=attention_mask, max_new_tokens=max_new_tokens)

  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
  return response

In [None]:
import gradio as gr
import re

def call_generate(model_name, topic, number_flashcards):
  if model_name == "LLAMA":
    model = LLAMA
  elif model_name == "DEEPSEEK":
    model = DEEPSEEK
  else:
    return "Invalid model selected."

  messages = [
      {"role": "system", "content": system_message},
      {"role": "user", "content": f"I want to more about {topic}. Please provide {number_flashcards} flashcards."}
  ]

  reponse = generate(model, messages, stream=False, max_new_tokens=2000)
  text = re.sub(r'###TEMPLATE.*?###TEMPLATE', '', reponse, flags=re.DOTALL)

  result = re.search(r"(# Flashcard 1[s\S\]*</details>)", text)

  if result:
    response = result.group(1)
  else:
    response
  return response

with gr.Blocks as ui:
  with gr.Row():
    model_dropdown = gr.Dropdown(choices=["LLAMA", "DEEPSEEK"], value="LLAMA", label="Model")
  with gr.Row():
    topic_selector = gr.Textbox(label="Type the topic you want flashcards:", max_lines=1, max_length=50)
    number_flashcards = gr.Slider (
        minimum=1,
        maximum=10,
        step=1,
        value=1,
        label="number flashcards"
    )
  with gr.Row():
    generate_button = gr.Button("Generate flashcards")
  with gr.Row():
    output = gr.Markdown()

  generate_button.click(
      call_generate,
      inputs=[model_dropdown, topic_selector, number_flashcards],
      outputs=output
  )

ui.launch(inbrowser=True, debug=True)