<a href="https://colab.research.google.com/github/MyOptimalNext/Chat-Bot/blob/main/Untitled13.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# -*- coding: utf-8 -*-
# Google Colab notebook: OPT-from-Scratch Chatbot on Arabic Data (محدث)

# 1. تثبيت الحزم المطلوبة
!pip install transformers datasets torch --quiet

# 2. استيراد المكتبات
import torch
import torch.nn as nn
import torch.nn.functional as F
import math
from transformers import AutoTokenizer
from datasets import load_dataset
from torch.utils.data import DataLoader, Dataset
from tqdm.auto import tqdm

# 3. تعريف طبقات OPT لنموذج دردشة
class OPTAttention(nn.Module):
    def __init__(self, embed_dim, num_heads, dropout=0.1):
        super().__init__()
        assert embed_dim % num_heads == 0, "embed_dim must be divisible by num_heads"
        self.num_heads = num_heads
        self.head_dim = embed_dim // num_heads
        self.scale = math.sqrt(self.head_dim)
        self.q_proj = nn.Linear(embed_dim, embed_dim)
        self.k_proj = nn.Linear(embed_dim, embed_dim)
        self.v_proj = nn.Linear(embed_dim, embed_dim)
        self.out_proj = nn.Linear(embed_dim, embed_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        B, T, C = x.size()
        q = self.q_proj(x).view(B, T, self.num_heads, self.head_dim).transpose(1,2)
        k = self.k_proj(x).view(B, T, self.num_heads, self.head_dim).transpose(1,2)
        v = self.v_proj(x).view(B, T, self.num_heads, self.head_dim).transpose(1,2)
        attn_scores = (q @ k.transpose(-2, -1)) / self.scale
        mask = torch.tril(torch.ones(T, T, device=x.device)).unsqueeze(0).unsqueeze(0)
        attn_scores = attn_scores.masked_fill(mask == 0, float('-inf'))
        attn_probs = torch.softmax(attn_scores, dim=-1)
        attn_probs = self.dropout(attn_probs)
        out = attn_probs @ v
        out = out.transpose(1,2).contiguous().view(B, T, C)
        return self.out_proj(out)

class OPTDecoderLayer(nn.Module):
    def __init__(self, embed_dim, num_heads, ff_dim, dropout=0.1):
        super().__init__()
        self.attn = OPTAttention(embed_dim, num_heads, dropout)
        self.ln1 = nn.LayerNorm(embed_dim)
        self.ff = nn.Sequential(
            nn.Linear(embed_dim, ff_dim),
            nn.GELU(),
            nn.Linear(ff_dim, embed_dim)
        )
        self.ln2 = nn.LayerNorm(embed_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        res = x
        x = self.attn(x)
        x = self.dropout(x)
        x = self.ln1(x + res)
        res = x
        x = self.ff(x)
        x = self.dropout(x)
        x = self.ln2(x + res)
        return x

class OPTModel(nn.Module):
    def __init__(self, vocab_size, embed_dim=768, num_heads=12, num_layers=12, ff_dim=3072, max_len=512, dropout=0.1):
        super().__init__()
        self.token_emb = nn.Embedding(vocab_size, embed_dim)
        self.pos_emb = nn.Embedding(max_len, embed_dim)
        self.dropout = nn.Dropout(dropout)
        self.layers = nn.ModuleList([
            OPTDecoderLayer(embed_dim, num_heads, ff_dim, dropout)
            for _ in range(num_layers)
        ])

    def forward(self, input_ids):
        B, T = input_ids.size()
        positions = torch.arange(T, device=input_ids.device).unsqueeze(0).expand(B, T)
        x = self.token_emb(input_ids) + self.pos_emb(positions)
        x = self.dropout(x)
        for layer in self.layers:
            x = layer(x)
        return x

class OPTForCausalLM(nn.Module):
    def __init__(self, vocab_size, **kwargs):
        super().__init__()
        self.opt = OPTModel(vocab_size, **kwargs)
        self.lm_head = nn.Linear(kwargs.get('embed_dim',768), vocab_size, bias=False)

    def forward(self, input_ids, labels=None):
        hidden = self.opt(input_ids)
        logits = self.lm_head(hidden)
        if labels is not None:
            shift_logits = logits[..., :-1, :].contiguous()
            shift_labels = labels[..., 1:].contiguous()
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))
            return loss, logits
        return logits

# 4. تحميل مجموعة بيانات حوارية من Hugging Face
raw_datasets = load_dataset("daily_dialog")
print("Columns:", raw_datasets['train'].column_names)
print("Example:", raw_datasets['train'][0])

# 5. تحميل Tokenizer
tokenizer = AutoTokenizer.from_pretrained("aubmindlab/bert-base-arabertv02")

# 6. معالجة البيانات مع التحقق من العمود الصحيح

def preprocess_batch(batch, max_length=128):
    # الدمج في نص واحد إذا كان dialog قائمة
    if isinstance(batch['dialog'][0], list):
        texts = [" [SEP] ".join(d) for d in batch['dialog']]
    else:
        texts = batch.get('dialog', batch.get('text', []))
    tokens = tokenizer(texts, padding='max_length', truncation=True, max_length=max_length)
    return {'input_ids': tokens['input_ids'], 'labels': tokens['input_ids']}

encoded_train = raw_datasets['train'].map(
    preprocess_batch,
    batched=True,
    remove_columns=raw_datasets['train'].column_names
)
encoded_test = raw_datasets['test'].map(
    preprocess_batch,
    batched=True,
    remove_columns=raw_datasets['test'].column_names
)

# 7. إنشاء DataLoader
class ChatDataset(Dataset):
    def __init__(self, hf_dataset):
        self.dataset = hf_dataset
    def __len__(self):
        return len(self.dataset)
    def __getitem__(self, idx):
        item = self.dataset[idx]
        return torch.tensor(item['input_ids']), torch.tensor(item['labels'])

train_loader = DataLoader(ChatDataset(encoded_train), batch_size=8, shuffle=True)
test_loader = DataLoader(ChatDataset(encoded_test), batch_size=8)

# 8. إعداد النموذج والمُحسن

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
vocab_size = tokenizer.vocab_size
model = OPTForCausalLM(
    vocab_size=vocab_size,
    embed_dim=768,
    num_heads=12,
    num_layers=6,
    ff_dim=3072,
    max_len=128,
    dropout=0.1
).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5)

# 9. حلقة التدريب

epochs = 3
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for input_ids, labels in tqdm(train_loader, desc=f"Training Epoch {epoch+1}"):
        input_ids = input_ids.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        loss, _ = model(input_ids, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1} - Loss: {total_loss/len(train_loader):.4f}")

# 10. تقييم النموذج

model.eval()
with torch.no_grad():
    for input_ids, labels in tqdm(test_loader, desc="Evaluating"):
        input_ids = input_ids.to(device)
        labels = labels.to(device)
        loss, _ = model(input_ids, labels)
    print(f"Test Loss: {loss.item():.4f}")

Columns: ['dialog', 'act', 'emotion']
Example: {'dialog': ['Say , Jim , how about going for a few beers after dinner ? ', ' You know that is tempting but is really not good for our fitness . ', ' What do you mean ? It will help us to relax . ', " Do you really think so ? I don't . It will just make us fat and act silly . Remember last time ? ", " I guess you are right.But what shall we do ? I don't feel like sitting at home . ", ' I suggest a walk over to the gym where we can play singsong and meet some of our friends . ', " That's a good idea . I hear Mary and Sally often go there to play pingpong.Perhaps we can make a foursome with them . ", ' Sounds great to me ! If they are willing , we could ask them to go dancing with us.That is excellent exercise and fun , too . ', " Good.Let ' s go now . ", ' All right . '], 'act': [3, 4, 2, 2, 2, 3, 4, 1, 3, 4], 'emotion': [0, 0, 0, 0, 0, 0, 4, 4, 4, 4]}


Map:   0%|          | 0/11118 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Training Epoch 1:   0%|          | 0/1390 [00:00<?, ?it/s]