In [1]:
import os
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:

    !pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3 peft trl==0.15.2 triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1" huggingface_hub hf_transfer
    !pip install transformers==4.51.3
    !pip install --no-deps unsloth

Collecting bitsandbytes
  Downloading bitsandbytes-0.46.0-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting xformers==0.0.29.post3
  Downloading xformers-0.0.29.post3-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (1.0 kB)
Collecting trl==0.15.2
  Downloading trl-0.15.2-py3-none-any.whl.metadata (11 kB)
Collecting cut_cross_entropy
  Downloading cut_cross_entropy-25.1.1-py3-none-any.whl.metadata (9.3 kB)
Collecting unsloth_zoo
  Downloading unsloth_zoo-2025.6.1-py3-none-any.whl.metadata (8.1 kB)
Downloading xformers-0.0.29.post3-cp311-cp311-manylinux_2_28_x86_64.whl (43.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.4/43.4 MB[0m [31m13.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading trl-0.15.2-py3-none-any.whl (318 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m318.9/318.9 kB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading bitsandbytes-0.46.0-py3-none-manylinux_2_24_x86_64.whl (67.0 MB)
[2K   [90m━━━━━━━━━━━━━━━

In [2]:
from unsloth import FastLanguageModel, is_bfloat16_supported
from unsloth.chat_templates import get_chat_template, standardize_sharegpt
from datasets import Dataset


NotImplementedError: Unsloth currently only works on NVIDIA GPUs and Intel GPUs.

In [None]:
import pandas as pd
from datasets import load_dataset
from sentence_transformers import SentenceTransformer
from sklearn.model_selection import train_test_split
import numpy as np
import ast
import re
from trl import SFTTrainer
from transformers import TrainingArguments, DataCollatorForSeq2Seq,AutoTokenizer
import torch
from google.colab import drive
drive.mount('/content/drive')


In [None]:
# backend_service.py
from flask import Flask, request, jsonify
from unsloth import FastLanguageModel
from transformers import AutoTokenizer
import torch
import os

app = Flask(__name__)

MODEL_NAME = "unsloth/Mistral-Small-Instruct-2409"
LORA_ADAPTER_PATH = "/content/drive/MyDrive/task3/fine_tuned_model"

model = None
tokenizer = None
device = "cuda" if torch.cuda.is_available() else "cpu"

def load_mistral_model():
    global model, tokenizer
    print(f"Loading Mistral model: {MODEL_NAME} on device: {device}")
    try:
        model, tokenizer = FastLanguageModel.from_pretrained(
            model_name = MODEL_NAME,
            max_seq_length = 2048,
            dtype = None,
            load_in_4bit = True,
            token = "use your token here",
        )
        print("Base model loaded.")

        if os.path.exists(LORA_ADAPTER_PATH):
            model = FastLanguageModel.get_peft_model(
                model,
                r = 16,
                target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj",],
                lora_alpha = 16,
                lora_dropout = 0,
                bias = "none",
                use_gradient_checkpointing = False,
            )
            model.load_adapter(LORA_ADAPTER_PATH)
            print(f"LoRA adapters loaded from {LORA_ADAPTER_PATH}.")
        else:
            print(f"Warning: LoRA adapter path '{LORA_ADAPTER_PATH}' not found. Using base model (or existing PEFT model if loaded).")

        model.eval()
        print("Mistral model and tokenizer ready for inference.")

    except Exception as e:
        print(f"FATAL ERROR: Could not load Mistral model or tokenizer: {e}")
        model = None
        tokenizer = None

with app.app_context():
    load_mistral_model()

@app.route('/generate', methods=['POST'])
def generate_response():
    if model is None or tokenizer is None:
        return jsonify({"error": "Model not loaded. Please check backend server logs."}), 500

    data = request.json
    user_message = data.get('user_message')
    detected_mood = data.get('detected_mood')
    persona_prompt = data.get('persona_prompt')
    chat_history_list = data.get('chat_history', [])

    if not user_message:
        return jsonify({"error": "No user_message provided."}), 400

    conversation_for_mistral = []

    initial_instruction = (
        f"You are a conversational AI chatbot. Your assigned personality is: {persona_prompt}.\n"
        f"Based on the user's last message, their mood seems to be: {detected_mood}."
    )
    conversation_for_mistral.append({"role": "user", "content": initial_instruction})
    conversation_for_mistral.append({"role": "assistant", "content": "Understood. I will adopt this persona and consider the user's mood."})

    for msg in chat_history_list:
        conversation_for_mistral.append({"role": msg['role'], "content": msg['content']})

    user_turn_content = (
        f"Detected Mood: {detected_mood}.\n"
        f"User: {user_message}"
    )
    conversation_for_mistral.append({"role": "user", "content": user_turn_content})

    formatted_prompt = tokenizer.apply_chat_template(
        conversation_for_mistral,
        tokenize=False,
        add_generation_prompt=True
    )

    inputs = tokenizer(formatted_prompt, return_tensors = "pt", add_special_tokens=True).to(device)

    generated_ids = model.generate(
        **inputs,
        max_new_tokens=256,
        use_cache=True,
        do_sample=True,
        temperature=0.7,
        top_p=0.9
    )

    response_text = tokenizer.decode(
        generated_ids[0][inputs["input_ids"].shape[1]:],
        skip_special_tokens=True
    ).strip()

    return jsonify({"response": response_text})

if __name__ == '__main__':
    print("Starting Flask backend. Model loading will occur shortly...")
    app.run(host='0.0.0.0', port=5000, debug=True, use_reloader=False)
