<a href="https://colab.research.google.com/github/HatemMoushir/smart-ai-assistant/blob/main/train_sentiment_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

# Step 1: تثبيت المكتبات
!pip install transformers datasets -q

# Step 2: تحميل النموذج الجاهز لتحليل المشاعر
from transformers import pipeline

model_name = "distilbert-base-uncased"
classifier = pipeline("sentiment-analysis", model=model_name)

# Step 3: تجربة قبل التدريب
print("🧪 قبل التدريب:")
print(classifier("I hate this place."))   # Expected: NEGATIVE
print(classifier("What a wonderful project!"))  # Expected: POSITIVE

# Step 4: تحميل بيانات IMDb من HuggingFace Datasets
from datasets import load_dataset

dataset = load_dataset("imdb")
dataset = dataset["train"].select(range(1000))  # خذ 1000 مثال فقط كبداية

# Step 5: تجهيز البيانات للتدريب
from transformers import AutoTokenizer
from datasets import Dataset
import torch

tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize_function(example):
    return tokenizer(example["text"], truncation=True, padding="max_length", max_length=256)

tokenized_dataset = dataset.map(tokenize_function, batched=True)
tokenized_dataset = tokenized_dataset.rename_column("label", "labels")
tokenized_dataset.set_format("torch", columns=["input_ids", "attention_mask", "labels"])

# Step 6: تدريب سريع Fine-Tuning
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer

model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=1,
    per_device_train_batch_size=8,
    logging_dir="./logs",
    logging_steps=10,
    save_strategy="no"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset
)

print("🚀 بدء التدريب...")
trainer.train()

# Step 7: تجربة بعد التدريب
print("🧪 بعد التدريب:")
trainer.model.eval()
classifier_updated = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
print(classifier_updated("I hate this place."))   # Expected: closer to NEGATIVE
print(classifier_updated("What a wonderful project!"))  # Expected: closer to POSITIVE

# Step 8: حفظ النموذج المدرب
model.save_pretrained("saved_model")
tokenizer.save_pretrained("saved_model")