## Run this file on a cloud service if you dont have a compatible GPU/CPU, Used Google Colab for this one

In [None]:
# !pip install transformers
# !pip install langchain-huggingface
# !pip install torch

In [None]:
from transformers import pipeline
from langchain_huggingface import HuggingFacePipeline
from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
from langchain_core.output_parsers import StrOutputParser

### Functions

In [None]:
import re
import json

def extract_first_json(text):
    match = re.search(r"\{.*?\}", text, re.DOTALL)
    if match:
        try:
            return json.loads(match.group())
        except json.JSONDecodeError:
            return match.group()
    return None


### Testing Model

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "Qwen/Qwen2.5-3B-Instruct"

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [None]:
def chat_with_qwen(user_input, system_prompt="You are a helpful AI assistant."):
  messages = [
      {"role": "system", "content": system_prompt},
      {"role": "user", "content": user_input}
  ]

  text = tokenizer.apply_chat_template(
      messages,
      tokenize=False,
      add_generation_prompt=True
  )
  model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

  generated_ids = model.generate(
      **model_inputs,
      max_new_tokens=128
  )
  generated_ids = [
      output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
  ]

  response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

  return response


In [None]:
qwen_pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=128,
    do_sample=True,
    torch_dtype="auto",
    device_map="auto"
    )

llm = HuggingFacePipeline(pipeline=qwen_pipe)

In [None]:
system_prompt = """
You are an AI assistant that extracts meeting information from natural language.

Respond ONLY with one JSON object that looks like:
{{
  "intent": "schedule_event",
  "title": "Meeting with Alice",
  "day": "monday",
  "time": "2pm",
  "participants": "Alice"
}}

DO NOT REPEAT or explain. Just return the JSON.
"""


prompt = ChatPromptTemplate.from_messages([
    SystemMessagePromptTemplate.from_template(system_prompt),
    HumanMessagePromptTemplate.from_template("{user_input}")
])

chain = prompt | llm | StrOutputParser()

response = chain.invoke({"user_input": "Can you schedule a meeting with Jason next Tuesday at 3pm"})

In [None]:
cleaned_output = extract_first_json(response)

print(cleaned_output)