In [None]:

!pip install -q --upgrade --no-cache-dir transformers datasets accelerate bitsandbytes sentencepiece huggingface_hub


import pandas as pd
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForLanguageModeling, EarlyStoppingCallback
from huggingface_hub import login
from google.colab import userdata
import torch

#  Login to Hugging Face Hub
login(userdata.get("token_hf"))

# Load your dataset
from google.colab import files
uploaded = files.upload()

df = pd.read_csv(next(iter(uploaded)))

def format_instruction(row):
    return f"<s>[INST] Detect the emotion in the following text. Output only the emotion name.\n\nText: \"{row['Text']}\"\n\nOutput: [/INST] {row['Sentiment'].strip()}</s>"

df["formatted"] = df.apply(format_instruction, axis=1)


In [None]:


dataset = Dataset.from_pandas(df[["formatted"]])

model_id = "mistralai/Mistral-7B-Instruct-v0.2"

tokenizer = AutoTokenizer.from_pretrained(model_id)
#Fix padding token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

def tokenize(example):
    return tokenizer(example["formatted"], truncation=True, padding="max_length", max_length=512)

dataset = dataset.map(tokenize, batched=True)
dataset = dataset.train_test_split(test_size=0.1, seed=42)
train_data = dataset["train"]
eval_data = dataset["test"]


In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

model_id = "mistralai/Mistral-7B-Instruct-v0.2"
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto"
)


In [None]:
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

model = prepare_model_for_kbit_training(model)

lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()


In [None]:
train_data.set_format("torch")
eval_data.set_format("torch")


In [None]:
!pip install -q trl>=0.7.4

In [None]:
import trl
print(trl.__version__)


In [None]:
from trl import SFTTrainer, SFTConfig

sft_config = SFTConfig(
    output_dir="./mistral-lora-emotion",
    num_train_epochs=8,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=4,
    eval_strategy="epoch",
    save_strategy="epoch",
    save_total_limit=2,
    logging_steps=10,
    load_best_model_at_end=True,
    learning_rate=2e-5,
    warmup_ratio=0.1,
    lr_scheduler_type="linear",
    max_seq_length=512,
    fp16=True,
    report_to=[],
)


In [None]:
trainer = SFTTrainer(
    model=model,
    args=sft_config,
    train_dataset=train_data,
    eval_dataset=eval_data,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]
)
#Start training
trainer.train()

In [None]:
from google.colab import drive
drive.mount('/content/drive')


In [None]:

model_path = "/content/drive/MyDrive/mistral-lora-emotion"
model.save_pretrained(model_path)
tokenizer.save_pretrained(model_path)


In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from peft import PeftModel
import torch

base_model_id = "mistralai/Mistral-7B-Instruct-v0.2"
lora_model_path = "/content/drive/MyDrive/mistral-lora-emotion"
offload_dir = "/content/offload"


In [None]:
import os
os.makedirs(offload_dir, exist_ok=True)

# Load base model
tokenizer = AutoTokenizer.from_pretrained(base_model_id)

base_model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    torch_dtype=torch.float16,
    device_map="auto",
    offload_folder=offload_dir
)


In [None]:

# Load LoRA adapter
model = PeftModel.from_pretrained(
    base_model,
    lora_model_path,
    device_map="auto",
    offload_folder=offload_dir
)

# Wrap in pipeline
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device_map="auto"
)

In [None]:
# Function for emotion detection
def detect_emotion(user_text):
    prompt = f'<s>[INST] Detect the emotion in the following text. Output only the emotion name.\n\nText: "{user_text}"\n\nOutput: [/INST]'
    output = pipe(prompt, max_new_tokens=10, do_sample=False)[0]['generated_text']
    response = output.split("[/INST]")[-1].strip().split("</s>")[0].strip()
    return response


user_input = input("Enter a sentence to detect the emotion: ")
emotion = detect_emotion(user_input)
print(f"\nDetected Emotion: {emotion}")
