# **LLM Fine Tunning**
**This notebook demonstrates a comprehensive workflow for finetuning a Qwen25-1.5B-Instruct model for Arabic news using LoRA**

# Setup

In [None]:
from google.colab import drive
drive.mount('/gdrive')

Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


In [None]:
!git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git
!cd LLaMA-Factory && pip install -e .

Cloning into 'LLaMA-Factory'...
remote: Enumerating objects: 348, done.[K
remote: Counting objects: 100% (348/348), done.[K
remote: Compressing objects: 100% (289/289), done.[K
remote: Total 348 (delta 83), reused 146 (delta 44), pack-reused 0 (from 0)[K
Receiving objects: 100% (348/348), 9.53 MiB | 19.28 MiB/s, done.
Resolving deltas: 100% (83/83), done.
Obtaining file:///content/LLaMA-Factory
  Installing build dependencies ... [?25l[?25hdone
  Checking if build backend supports build_editable ... [?25l[?25hdone
  Getting requirements to build editable ... [?25l[?25hdone
  Preparing editable metadata (pyproject.toml) ... [?25l[?25hdone
Collecting transformers!=4.46.*,!=4.47.*,!=4.48.0,<=4.50.0,>=4.41.2 (from llamafactory==0.9.3.dev0)
  Downloading transformers-4.50.0-py3-none-any.whl.metadata (39 kB)
Collecting datasets<=3.4.1,>=2.16.0 (from llamafactory==0.9.3.dev0)
  Downloading datasets-3.4.1-py3-none-any.whl.metadata (19 kB)
Collecting trl<=0.9.6,>=0.8.6 (from llamafa

In [None]:
!pip uninstall -y numpy transformers vllm tensorflow numba
!pip install numpy==1.26.4
!pip install transformers==4.48.3
!pip install datasets==3.2.0 optimum==1.24.0
!pip install wandb json-repair==0.29.1 faker==35.2.0
!pip install vllm==0.7.2 --no-deps
!pip install blake3 msgspec
!pip install uvloop partial_json_parser gguf xformers pyngrok locust

Found existing installation: numpy 1.26.4
Uninstalling numpy-1.26.4:
  Successfully uninstalled numpy-1.26.4
Found existing installation: transformers 4.50.0
Uninstalling transformers-4.50.0:
  Successfully uninstalled transformers-4.50.0
[0mFound existing installation: tensorflow 2.18.0
Uninstalling tensorflow-2.18.0:
  Successfully uninstalled tensorflow-2.18.0
Found existing installation: numba 0.60.0
Uninstalling numba-0.60.0:
  Successfully uninstalled numba-0.60.0
Collecting numpy==1.26.4
  Using cached numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
Using cached numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.3 MB)
Installing collected packages: numpy
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
llamafactory 0.9.3.dev0 requires transformers!=4.46.*,!=4.47.*,!=4.48.0,<=4.50.0,>=

In [None]:
# !pip show uvloop partial-json-parser gguf xformers pyngrok vllm

In [None]:
from google.colab import userdata  # Import Colab Secrets
import google.generativeai as genai

# Load Gemini API key securely from Colab Secrets
GOOGLE_API_KEY = userdata.get('gemini')
genai.configure(api_key=GOOGLE_API_KEY)

# Initialize the model
gemini_model = genai.GenerativeModel('gemini-2.0-flash')

In [None]:
from google.colab import userdata
import wandb

wandb.login(key=userdata.get('wandb'))
hf_token = userdata.get('huggingface')
!huggingface-cli login --token {hf_token}

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mmohamdmandor2003[0m ([33mmohamdmandor2003-own[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
The token `test` has been saved to /root/.cache/huggingface/stored_tokens
Your token has been saved to /root/.cache/huggingface/token
Login successful.
The current active token is: `test`


In [None]:
import json
import os
from os.path import join
import random
from tqdm.auto import tqdm
import requests
from pyngrok import ngrok
from pydantic import BaseModel, Field
from typing import List, Optional, Literal
from datetime import datetime
import json_repair
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

data_dir = "/gdrive/MyDrive/LLM-Finetunning"
base_model_id = "Qwen/Qwen2.5-1.5B-Instruct"


device = "cuda"
torch_dtype = None

def parse_json(text):
    try:
        return json_repair.loads(text)
    except:
        return None

#Before Finetuning
##**Structured info extraction Demo**

In [None]:
story = """
ذكرت مجلة فوربس أن العائلة تلعب دورا محوريا في تشكيل علاقة الأفراد بالمال،
 حيث تتأثر هذه العلاقة بأنماط السلوك المالي المتوارثة عبر الأجيال.

التقرير الذي يستند إلى أبحاث الأستاذ الجامعي شاين إنيت حول
الرفاه المالي يوضح أن لكل شخص "شخصية مالية" تتحدد وفقا لطريقة
 تفاعله مع المال، والتي تتأثر بشكل مباشر بتربية الأسرة وتجارب الطفولة.

 الأبعاد الثلاثة للعلاقة بالمال
بحسب الدراسة، هناك ثلاثة أبعاد رئيسية تشكّل علاقتنا بالمال:

الاكتساب (A): يميل الأفراد الذين ينتمون لهذا
 البعد إلى اعتبار المال سلعة قابلة للجمع، حيث يرون
في تحقيق الثروة هدفا بحد ذاته. والجانب السلبي لهذا
 النمط هو إمكانية التحول إلى هوس بالثروة أو العكس،
 أي رفض تام لاكتساب المال باعتباره مصدرا للفساد.

الاستخدام (U): يرى هؤلاء الأشخاص المال أداة للتمتع بالحياة، حيث يربطون قيمته بقدرته على توفير
المتعة والراحة. ومع ذلك، قد يصبح
البعض مدمنا على الإنفاق، في حين يتجه آخرون إلى التقشف المفرط خوفا من المستقبل.

الإدارة (M): أصحاب هذا النمط يعتبرون المال مسؤولية تتطلب التخطيط الدقيق. لكن في بعض الحالات،
 قد يتحول الأمر إلى هوس مفرط بإدارة الإنفاق، مما يؤثر سلبا على العلاقات الشخصية.

 كيف تؤثر العائلة على علاقتنا بالمال؟
يشير التقرير إلى أن التجارب الأسرية تلعب دورا رئيسيا في تحديد
 "الشخصية المالية" لكل فرد، على سبيل المثال، إذا كان أحد الوالدين يعتمد على المال
كمكافأة للسلوك الجيد، فقد يتبنى الطفل لاحقا النمط نفسه في حياته البالغة.

لتحليل هذه التأثيرات بشكل دقيق، طورت رابطة العلاج المالي
(Financial Therapy Association) أداة تسمى مخطط الجينوم المالي (Money Genogram)،
وهو نموذج يُستخدم لتحديد الأنماط المالية داخل العائلة.

تتضمن هذه الأداة:

رسم شجرة عائلية.
تصنيف أفراد العائلة وفقا للأبعاد الثلاثة للعلاقة بالمال (A ،U ،M).
تحديد ما إذا كان السلوك المالي لكل فرد صحيا (+) أو غير صحي (-).
على سبيل المثال، إذا نشأ شخص في عائلة
اعتادت على الإنفاق المفرط، فقد يكون لديه ميل قوي إلى اتباع النمط نفسه،
 أو العكس تماما، حيث يصبح مقتصدا بشكل مبالغ فيه كرد فعل نفسي.
"""

In [None]:
# story = """
# قرر المجلس القومي للأجور في مصر، زيادة الحد الأدنى لأجر العاملين بالقطاع الخاص إلى 7 آلاف جنيه شهريًا مقابل 6 آلاف جنيه، على أن يتم تطبيق الزيادة اعتبارًا من 1 مارس 2025.
# كما قرر المجلس أن يكون الحد الأدنى لقيمة العلاوة الدورية للعاملين بالقطاع الخاص 250 جنيهًا شهريًا، ولأول مرة يقرر المجلس القومي للأجور وضع حد أدنى للأجر للعمل المؤقت "جزء من الوقت"، بحيث لا يقل أجرهم عن 28 جنيهًا صافيًا في الساعة، وذلك وفقًا لتعريفهم الوارد في قانون العمل.
# وقالت وزيرة التخطيط والتنمية الاقتصادية والتعاون الدولي، رانيا المشاط، إن رفع الحد الأدنى للأجور يأتي في إطار الحرص على الاستجابة للمستجدات الاقتصادية الراهنة، بما يعزز الاستقرار الاقتصادي والاجتماعي، مضيفة أن ذلك يتسق مع المعايير الدولية، حيث تؤكد منظمة العمل الدولية على ضرورة مراجعة الحد الأدنى للأجور على أساس دوري، لحماية القوة الشرائية للأسر، واستيعاب التغيرات الاقتصادية التدريجية.
# """

## Details Extraction

In [None]:
# {
#  "story_title": "",
#  "story_keywords": ["kw1", "kw2"],
#  "story_summary": ["....", ",,,,"],
#  "story_category": "",
#  "Story_entities": [{
#     "story_value": "القاهره",
#     "story_type": "location"
# }]
# }

StoryCategory = Literal["entertainment", "politics", "art", "technology", "food", "travel", "econmy", "not_specified", "sports", "economy", "health", "science"]
EntityType = Literal["person-male", "person-female", "location", "organization", "event", "time","quantity", "money", "product", "law", "disease", "artifact", "not_specified"]


class Entity(BaseModel):
  entity_value: str = Field(..., description="The actual name or the value of the entity.")
  entity_type: EntityType = Field(..., description="The type of recognized entity.")


class NewsDetails(BaseModel):
    story_title: str = Field(..., min_length=5, max_length=300, description="A fully informative and SEO optimized title of the story.")  # ... means it required to be filled
    story_keywords: List[str] = Field(..., min_items=1,description="Relevant keywords associated with the story.")# we cant use list cuz i can customize ite elements so i will use List
    story_summary: List[str] = Field(..., min_items=1, max_items=5,description="Summarized key points about the story (1-5 points).")
    story_category: StoryCategory = Field(..., description="Category of the news story.") # need to specify the categories of it ,we will use literal like above
    story_entities: List[Entity] = Field(..., min_items=1, max_items=10,description="List of identified entities in the story.")

In [None]:
# we used in content  join method in array elements cuz if we used   """ """  it will take the spaces too to LLM :(
details_extraction_messages = [
    {
        "role": "system",
        "content": "\n".join([
            "You are an NLP data paraser.",
            "You will be provided by an Arabic text associated with a Pydantic scheme.",
            "Generate the ouptut in the same story language.",
            "You have to extract JSON details from text according the Pydantic details.",
            "Extract details as mentioned in text.",
            "Do not generate any introduction or conclusion."
        ])
    },
    {
        "role": "user",
        "content" : "\n".join([
            "## Story:",
            story.strip(),
            " ",
            "Pydantic Details:",
            json.dumps(NewsDetails.model_json_schema(), ensure_ascii=False),
            "",
            "## Story Details:",
            "```json"
        ])
    }]

In [None]:
# json.dumps(["السلام عليكم "], ensure_ascii=False)
json.dumps(["السلام عليكم "])

'["\\u0627\\u0644\\u0633\\u0644\\u0627\\u0645 \\u0639\\u0644\\u064a\\u0643\\u0645 "]'

In [None]:
NewsDetails.model_json_schema()

{'$defs': {'Entity': {'properties': {'entity_value': {'description': 'The actual name or the value of the entity.',
     'title': 'Entity Value',
     'type': 'string'},
    'entity_type': {'description': 'The type of recognized entity.',
     'enum': ['person-male',
      'person-female',
      'location',
      'organization',
      'event',
      'time',
      'quantity',
      'money',
      'product',
      'law',
      'disease',
      'artifact',
      'not_specified'],
     'title': 'Entity Type',
     'type': 'string'}},
   'required': ['entity_value', 'entity_type'],
   'title': 'Entity',
   'type': 'object'}},
 'properties': {'story_title': {'description': 'A fully informative and SEO optimized title of the story.',
   'maxLength': 300,
   'minLength': 5,
   'title': 'Story Title',
   'type': 'string'},
  'story_keywords': {'description': 'Relevant keywords associated with the story.',
   'items': {'type': 'string'},
   'minItems': 1,
   'title': 'Story Keywords',
   'type':

## Translation

In [None]:
# {
#     "translated_title": "",
#     "translated_content":""
# }


class TranslatedStory(BaseModel):
    translated_title: str = Field(..., min_length=5, max_length=300, description="Suggested Translated title to the news story.")
    translated_content: str = Field(...,min_length=5, description="The translated content of the news story.")



targeted_lang = "English"
translation_messages = [
            {
                "role": "system",
                "content": "\n".join([
                    "You are a professional translator.",
                    "You will be provided by an Arabic text.",
                    f"You have to translate the text into {targeted_lang} language.",
                    "Follow the provided Scheme to generate a JSON",
                    "Do not generate any introduction or conclusion."
                ])
            },
            {
                "role": "user",
                "content": "\n".join([
                    "## Story:",
                    story.strip(),
                    "",


                    "## Pydantic Details:",
                    json.dumps( TranslatedStory.model_json_schema(), ensure_ascii=False ),
                    "",

                    "## Targeted Language or Dialect:",
                    targeted_lang,
                    "",


                    "## Translated Story:",
                    "```json"
                ])
            }
        ]



# Evaluation

##**Qween**

In [None]:
# load pretrained model
model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    device_map="auto",    # when you work with GPU
    torch_dtype=torch_dtype,  # load LLM in GPU memory
    trust_remote_code=True,
)

tokenizer = AutoTokenizer.from_pretrained(base_model_id)
model

In [None]:
# assiging task to model using template(chat template) that model has trained on like the below
text = tokenizer.apply_chat_template(
    details_extraction_messages,
    tokenize=False,
    add_generation_prompt=True,
)


# return tensors in pytorch (pt) format , tokenization is in CPU memory so we need to send them to GPU memory so model can access to them
model_inputs = tokenizer([text], return_tensors="pt").to(device)

generate_ids = model.generate(
    model_inputs.input_ids,
    max_new_tokens=1024,
    do_sample=False,
    top_k=None,
    top_p=None,
    temperature=None)

# tokenize the outputs to words
generated_ids = [
    output_ids[len(input_ids):]
    for input_ids , output_ids in zip(model_inputs.input_ids, generate_ids)]


# tansform the ids to words , skip_special_tokens=True  means hide special tokens
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]  # return list of reponse ( i use onlt 1 text so it will output 1 response but for future useage)
# displaying model results (models lacks are 1- no arabic  /  2- limited entities have been extracted)
print(response)

{
  "story_title": "How Family Influences Financial Behavior",
  "story_keywords": [
    "family influence",
    "financial behavior",
    "moneymaking",
    "money management",
    "inheritance"
  ],
  "story_summary": [
    "Family plays a crucial role in shaping individuals' financial relationships.",
    "Individuals inherit certain financial behaviors from their families."
  ],
  "story_category": "economics",
  "story_entities": [
    {
      "entity_value": "Forbes Magazine",
      "entity_type": "organization"
    },
    {
      "entity_value": "Shain Enit",
      "entity_type": "person-female"
    },
    {
      "entity_value": "Financial Therapy Association",
      "entity_type": "organization"
    }
  ]
}


In [None]:
text = tokenizer.apply_chat_template(
    translation_messages,
    tokenize=False,
    add_generation_prompt=True,
)


model_inputs = tokenizer([text], return_tensors="pt").to(device)

generate_ids = model.generate(
    model_inputs.input_ids,
    max_new_tokens=1024,
    do_sample=False,
    top_k=None,
    top_p=None,
    temperature=None)

generated_ids = [
    output_ids[len(input_ids):]
    for input_ids , output_ids in zip(model_inputs.input_ids, generate_ids)]


response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]  # return list of reponse ( i use onlt 1 text so it will output 1 response but for future useage)

print(response)

{
  "translated_title": "Forbes Magazine Reveals Family Plays a Central Role in Forming Individuals' Financial Relationships",
  "translated_content": "According to Forbes magazine, family plays a crucial role in shaping individuals' financial relationships, as these relationships are influenced by inherited behavioral patterns across generations."
}


##**Gemini**

In [None]:
# Combine the messages into a single prompt for Gemini
system_content = details_extraction_messages[0]["content"]
user_content = details_extraction_messages[1]["content"]

# Format the combined prompt for Gemini
gemini_prompt = f"{system_content}\n\n{user_content}"

# Call Gemini API with the formatted prompt
gemini_extraction_response = gemini_model.generate_content(gemini_prompt)
gemini_extraction_output = gemini_extraction_response.text


try:
    extracted_details_json = parse_json(gemini_extraction_output)
    if extracted_details_json:
        print(json.dumps(extracted_details_json, ensure_ascii=False, indent=2))
    else:
        print("Could not parse JSON from Gemini output. Raw output:")
        print(gemini_extraction_output)
except json.JSONDecodeError as e:
    print(f"JSONDecodeError: {e}")
    print("Raw Gemini output:")
    print(gemini_extraction_output)

{
  "story_title": "كيف تشكل العائلة علاقتك بالمال: الأبعاد الثلاثة وتأثير التجارب الأسرية",
  "story_keywords": [
    "العلاقة بالمال",
    "الشخصية المالية",
    "التجارب الأسرية",
    "الأبعاد المالية",
    "مخطط الجينوم المالي"
  ],
  "story_summary": [
    "العائلة تلعب دورا محوريا في تشكيل علاقة الأفراد بالمال.",
    "لكل شخص 'شخصية مالية' تتحدد وفقا لطريقة تفاعله مع المال.",
    "هناك ثلاثة أبعاد رئيسية تشكل علاقتنا بالمال: الاكتساب، الاستخدام، والإدارة.",
    "التجارب الأسرية تحدد 'الشخصية المالية' لكل فرد.",
    "أداة 'مخطط الجينوم المالي' تستخدم لتحديد الأنماط المالية داخل العائلة."
  ],
  "story_category": "economy",
  "story_entities": [
    {
      "entity_value": "فوربس",
      "entity_type": "organization"
    },
    {
      "entity_value": "شاين إنيت",
      "entity_type": "person-male"
    },
    {
      "entity_value": "رابطة العلاج المالي",
      "entity_type": "organization"
    },
    {
      "entity_value": "مخطط الجينوم المالي",
      "entity_type": "product"
   

In [None]:
system_content = translation_messages[0]["content"]
user_content = translation_messages[1]["content"]
gemini_prompt = f"{system_content}\n\n{user_content}"

gemini_response = gemini_model.generate_content(gemini_prompt)
gemini_output = gemini_response.text

try:
    translated_json = parse_json(gemini_output)
    if translated_json:
        print(json.dumps(translated_json, ensure_ascii=False))
    else:
        print("Could not parse JSON from Gemini output. Raw output:")
        print(gemini_output)
except json.JSONDecodeError as e:
    print(f"JSONDecodeError: {e}")
    print("Raw Gemini output:")
    print(gemini_output)

{"translated_title": "How Family Shapes Your Relationship with Money: Forbes Report", "translated_content": "Forbes magazine reported that family plays a pivotal role in shaping individuals' relationship with money, as this relationship is influenced by patterns of financial behavior inherited across generations.\n\nThe report, based on research by Professor Shane Enete on financial well-being, explains that each person has a \"financial personality\" determined by their way of interacting with money, which is directly influenced by family upbringing and childhood experiences.\n\nThe Three Dimensions of the Relationship with Money\nAccording to the study, there are three main dimensions that shape our relationship with money:\n\nAcquisition (A): Individuals belonging to this dimension tend to view money as a commodity to be collected, seeing wealth creation as a goal in itself. The downside of this pattern is the potential to turn into an obsession with wealth or, conversely, a complet

In [None]:
parse_json(gemini_output) # if we not use this lib it will raise an error so we can tuse the reponse of the model in json format never

{'translated_title': 'How Family Shapes Your Relationship with Money: Forbes Report',
 'translated_content': 'Forbes magazine reported that family plays a pivotal role in shaping individuals\' relationship with money, as this relationship is influenced by patterns of financial behavior inherited across generations.\n\nThe report, based on research by Professor Shane Enete on financial well-being, explains that each person has a "financial personality" determined by their way of interacting with money, which is directly influenced by family upbringing and childhood experiences.\n\nThe Three Dimensions of the Relationship with Money\nAccording to the study, there are three main dimensions that shape our relationship with money:\n\nAcquisition (A): Individuals belonging to this dimension tend to view money as a commodity to be collected, seeing wealth creation as a goal in itself. The downside of this pattern is the potential to turn into an obsession with wealth or, conversely, a complet

In [None]:
type(gemini_output) # cuz its str we need to do parsing

str

#**Knowledge Distillation**

In [None]:
raw_dat_path = join(data_dir, "datasets", "news-sample.jsonl")
raw_data = []

for line in open(raw_dat_path, "r"):
    if line.strip=="":
        continue
    raw_data.append(json.loads(line))


random.Random(101).shuffle(raw_data)
print(f"Raw Data:{len(raw_data)}")

Raw Data:2400


In [None]:
raw_data[0] #its object

{'id': 975,
 'title': 'ما تقدمه فلسطين للعالم.. معرض لآمال وآلام شعبها في باريس',
 'description': 'يواصل المعهد العربي في باريس استقبال زواره في معرض “ما تقدمه فلسطين للعالم” لإطلاعهم على الإرث الثقافي والفني للفلسطينيين، من خلال أعمال فنية لآمالهم، وصور لواقعهم الأليم تحت الاحتلال.',
 'content': 'يواصل المعهد العربي في باريس استقبال زواره في معرض ما تقدمه فلسطين للعالم لإطلاعهم على الإرث الثقافي والفني للفلسطينيين؛ من خلال أعمال فنية لآمالهم وصور لواقعهم الأليم تحت الاحتلال. \n ويرى رئيس المعهد جاك لانغ -الذي أُعيد انتخابه قبل أيام للدورة الرابعة- ما يحدث في غزة حاليا جراء العدوان الإسرائيلي أنه كارثة. \n والمعهد هو مركز ثقافي وواجهة دبلوماسية يديرها لانغ منذ 2013 ويقع على ضفة نهر السين في باريس. \n وأشار لانغ، الذي شغل سابقا منصب وزير الثقافة بفرنسا، إلى أن المعرض هو إهداء للشعب الفلسطيني، ومُدّد ليستقبل مزيدا من الزوار حتى 31 ديسمبركانون الأول الجاري. \n ويضم المعرض، الذي افتُتح أواخر مايوأيار الماضي، حسب لانغ العديد من المعارض الفرعية عن فلسطين وعن غزة بالتحديد، من بينها معرض الصور

In [None]:
raw_data[0]['content'] #its object

'يواصل المعهد العربي في باريس استقبال زواره في معرض ما تقدمه فلسطين للعالم لإطلاعهم على الإرث الثقافي والفني للفلسطينيين؛ من خلال أعمال فنية لآمالهم وصور لواقعهم الأليم تحت الاحتلال. \n ويرى رئيس المعهد جاك لانغ -الذي أُعيد انتخابه قبل أيام للدورة الرابعة- ما يحدث في غزة حاليا جراء العدوان الإسرائيلي أنه كارثة. \n والمعهد هو مركز ثقافي وواجهة دبلوماسية يديرها لانغ منذ 2013 ويقع على ضفة نهر السين في باريس. \n وأشار لانغ، الذي شغل سابقا منصب وزير الثقافة بفرنسا، إلى أن المعرض هو إهداء للشعب الفلسطيني، ومُدّد ليستقبل مزيدا من الزوار حتى 31 ديسمبركانون الأول الجاري. \n ويضم المعرض، الذي افتُتح أواخر مايوأيار الماضي، حسب لانغ العديد من المعارض الفرعية عن فلسطين وعن غزة بالتحديد، من بينها معرض الصور اليومية عن الحياة في غزة. \n كما يشتمل على معرض الصور الفوتوكرومية القائم على تلوين صور من فلسطين تعود للقرن الـ19. \n ويعرض الفنان الفلسطيني محمد أبو سل عملا فريدا بعنوان مترو غزة، وهو عبارة عن عمل تركيبي متعدد الوسائط، لاقى إعجابا من الزوار. \n ويحضر الشاعر الفلسطيني الراحل محمود درويش من خلال 

In [None]:
# knowledge distillation for Details Extraction
save_to = join(data_dir, "datasets", "sft.jsonl")
ix = 0

for story in tqdm(raw_data):
    # Create messages structure
    extraction_messages = [
        {
            "role": "system",
            "content": "\n".join([
                "You are an NLP data parser.",
                "You will be provided by an Arabic text associated with a Pydantic scheme.",
                "Generate the output in the same story language.",
                "You have to extract JSON details from text according the Pydantic details.",
                "Extract details as mentioned in text.",
                "Do not generate any introduction or conclusion."
            ])
        },
        {
            "role": "user",
            "content": "\n".join([
                "## Story:",
                story['content'].strip(),
                "",
                "Pydantic Details:",
                json.dumps(NewsDetails.model_json_schema(), ensure_ascii=False),
                "",
                "## Story Details:",
                "```json"
            ])
        }
    ]

    # Combine for Gemini
    system_content = extraction_messages[0]["content"]
    user_content = extraction_messages[1]["content"]
    gemini_prompt = f"{system_content}\n\n{user_content}"

    try:
        # Call Gemini API
        response = gemini_model.generate_content(gemini_prompt)

        if not response.text:
            continue

        # Parse the response
        llm_resp_dict = parse_json(response.text)

        if not llm_resp_dict:
            continue

        # Save the successful response
        with open(save_to, "a", encoding="utf8") as dest:
            dest.write(json.dumps({
                "id": ix,
                "story": story['content'].strip(),
                "task": "Extract the story details into json.",
                "output_scheme": json.dumps(NewsDetails.model_json_schema(), ensure_ascii=False),
                "response": llm_resp_dict,
            }, ensure_ascii=False, default=str) + "\n")

        ix += 1

        # Log progress
        if (ix % 3) == 0:
            print(f"Iteration {ix}: Processed {ix} stories successfully")

    except Exception as e:
        print(f"Error processing story {ix}: {str(e)}")
        continue

  0%|          | 0/2400 [00:00<?, ?it/s]

Iteration 3: Processed 3 stories successfully
Iteration 6: Processed 6 stories successfully
Iteration 9: Processed 9 stories successfully
Iteration 12: Processed 12 stories successfully


KeyboardInterrupt: 

In [None]:
# Knowledge Distillation for Translation
save_to = join(data_dir, "datasets", "xsft.jsonl")

ix = 0
for story in tqdm(raw_data):
    for targeted_lang in ["English", "French"]:
        # Create the Gemini prompt for translation
        prompt_gemini_extraction = "\n".join([
            "You are a professional translator",
            "You will be provided by an Arabic text",
            "You have to translate the text into the Targeted Language",
            f"You have to translate the text into {targeted_lang} language",
            "Follow the provided Scheme to generate a JSON",
            "Do not generate any introduction or conclusion",
            "",
            "Pydantic Details:",
            json.dumps(TranslatedStory.model_json_schema(), ensure_ascii=False),
            "",
            "Story:",
            story['content'].strip(),
            "",
            "Story Details in JSON:",
            "```json"
        ])

        try:
            # Call Gemini API
            response = gemini_model.generate_content(prompt_gemini_extraction)

            if not response.text:
                continue

            # Parse the response
            llm_resp_dict = parse_json(response.text)

            if not llm_resp_dict:
                continue

            # Save the successful response
            with open(save_to, "a", encoding="utf8") as dest:
                dest.write(json.dumps({
                    "id": ix,
                    "story": story['content'].strip(),
                    "task": f"You have to translate the story content into {targeted_lang} associated with a title into a JSON.",
                    "output_scheme": json.dumps(TranslatedStory.model_json_schema(), ensure_ascii=False),
                    "response": llm_resp_dict,
                }, ensure_ascii=False, default=str) + "\n")

            ix += 1

            # Gemini doesn't provide token usage in the free tier, so we'll just track iterations
            if (ix % 3) == 0:
                print(f"Iteration {ix}: Processed {ix} stories successfully")

        except Exception as e:
            print(f"Error processing story {ix}: {str(e)}")
            continue

# Fromat Finetuning Datasets

In [None]:
sft_data_path = join(data_dir, "datasets", "sft.jsonl")

llm_finetunning_data = []

for line in open(sft_data_path):
    if line.strip() == "": # empty line
        continue
    # llm_finetunning_data.append(json.loads(line))
    rec = json.loads(line.strip())
    break

rec

{'id': 0,
 'story': 'ظلت أسعار المنتجين بالولايات المتحدة دون تغيير في سبتمبرأيلول الماضي مدفوعة بانخفاض تكاليف البنزين، مما يشير إلى تقدم نحو تضخم أقل حدة، وهو ما يدعم توقعات خفض مجلس الاحتياطي الاتحادي المركزي الأميركي أسعار الفائدة مجددا الشهر المقبل. \n وقال مكتب إحصاءات العمل التابع لوزارة العمل -في تقرير صدر اليوم الجمعة- إن القراءة الثابتة لمؤشر أسعار المنتجين للطلب النهائي الشهر الماضي جاءت بعد زيادة غير معدلة بلغت 0.2 في أغسطسآب الماضي. وعلى أساس سنوي، ارتفع المؤشر بنسبة 1.8، وهو أقل تقدم منذ فبرايرشباط الماضي. \n ويُظهر التقرير أن مؤشرا أقل تقلبا، يُستخدم لقياس التضخم باستثناء الغذاء والطاقة والتجارة، ارتفع بنسبة 0.1، مما يعادل أقل زيادة منذ مايوأيار 2023. في وقت ظهرت فيه البيانات الخاصة بالتضخم العام والقطاعات التي يعتمد عليها الاحتياطي الفدرالي لاتخاذ قراراته. \n وقد استقرت تكاليف الرعاية الطبية وتكاليف الرعاية الخارجية بالمستشفيات، في حين ارتفعت أسعار تذاكر الطيران بشكل حاد. \n توقع المتداولون أن يخفض الاحتياطي الفدرالي أسعار الفائدة ربع نقطة مئوية الشهر المقبل، بعد أن بدأ

In [None]:
sft_data_path = join(data_dir, "datasets", "sft.jsonl")
llm_finetunning_data = []

# Simplified system message for fine-tuning
system_message = "\n".join([
    "You are a professional NLP data parser.",
    "Follow the provided `Task` by the user and the `Output Scheme` to generate the `Output JSON`.",
    "Do not generate any introduction or conclusion."
])

for line in open(sft_data_path, encoding='utf-8'):
    if not line.strip():  # Skip empty lines
        continue

    rec = json.loads(line.strip())

    # Construct structured training example
    instruction_parts = [
        "# Story:",
        rec["story"],
        "",
        "# Task:",
        rec["task"],
        "",
        "# Output Scheme:",
        rec["output_scheme"],
        "",
        "# Output JSON:",
        "```json"
    ]

    output_parts = [
        "```json",
        json.dumps(rec["response"], ensure_ascii=False, default=str),
        "```"
    ]

    llm_finetunning_data.append({
        "system": system_message,
        "instruction": "\n".join(instruction_parts),
        "input": "",
        "output": "\n".join(output_parts),
        "history": []
    })

# Shuffle data with fixed seed for reproducibility
random.Random(101).shuffle(llm_finetunning_data)
print(f"LLM Finetuning Data: {len(llm_finetunning_data)}")  # in tutorial = 2766

LLM Finetuning Data: 2778


In [None]:
# i need to split data into train,test,validate

train_sample_sz = 2700

train_ds= llm_finetunning_data[:train_sample_sz] #all before 2700 till the end
eval_ds= llm_finetunning_data[train_sample_sz:]# after 2700 till the end


os.makedirs(join(data_dir, "datasets", "llamafactory-finetune-data"), exist_ok=True)

# w is for writing access
with open(join(data_dir, "datasets", "llamafactory-finetune-data", "train.json"), "w") as dest:
    json.dump(train_ds, dest, ensure_ascii=False, default=str)

with open(join(data_dir, "datasets", "llamafactory-finetune-data", "val.json"), "w", encoding="utf8") as dest:
    json.dump(eval_ds, dest, ensure_ascii=False, default=str)

In [None]:
join(data_dir, "datasets", "llamafactory-finetune-data", "val.json")

'/gdrive/MyDrive/LLM-Finetunning/datasets/llamafactory-finetune-data/val.json'

# Finetune

In [None]:
# # Configure LLaMA-Factory for the new datasets

# # update /content/LLaMA-Factory/data/dataset_info.json and append
# ```
   "news_finetune_train": {
        "file_name": "/gdrive/MyDrive/LLM-Finetunning/datasets/llamafactory-finetune-data/train.json",
        "columns": {
            "prompt": "instruction",
            "query": "input",
            "response": "output",
            "system": "system",
            "history": "history"
        }
    },
    "news_finetune_val": {
        "file_name": "/gdrive/MyDrive/LLM-Finetunning/datasets/llamafactory-finetune-data/val.json",
        "columns": {
            "prompt": "instruction",
            "query": "input",
            "response": "output",
            "system": "system",
            "history": "history"
        }
    }
# ```

# https://wandb.ai/mr-bakrianoo/llamafactory/runs/apwbkni9
# https://wandb.ai/mr-bakrianoo/llamafactory/runs/c5tf0q90

we need to write new file inside train_lora(optional but prefered)
%% is magic command
anthing we write in the following cell is written inside news_finetune.yaml file

In [None]:
%%writefile /content/LLaMA-Factory/examples/train_lora/news_finetune.yaml

### model
model_name_or_path: Qwen/Qwen2.5-1.5B-Instruct
trust_remote_code: true

### method
stage: sft
do_train: true
finetuning_type: lora
lora_rank: 64
lora_target: all

### dataset
dataset: news_finetune_train
eval_dataset: news_finetune_val
template: qwen
cutoff_len: 3500
# max_samples: 50
overwrite_cache: true
preprocessing_num_workers: 16

### output
# resume_from_checkpoint: /gdrive/MyDrive//LLM-Finetunning/models/checkpoint-1500
output_dir: /gdrive/MyDrive/LLM-Finetunning/models/
logging_steps: 10
save_steps: 500
plot_loss: true
# overwrite_output_dir: true

### train
per_device_train_batch_size: 1
gradient_accumulation_steps: 4
learning_rate: 1.0e-4
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
bf16: true
ddp_timeout: 180000000

### eval
# val_size: 0.1
per_device_eval_batch_size: 1
eval_strategy: steps
eval_steps: 100

report_to: wandb
run_name: newsx-finetune-llamafactory

# push_to_hub: true
# export_hub_model_id: "bakrianoo/news-analyzer"
# hub_private_repo: true
# hub_strategy: checkpoint


Writing /content/LLaMA-Factory/examples/train_lora/news_finetune.yaml


In [None]:
!cd LLaMA-Factory/ && llamafactory-cli train /content/LLaMA-Factory/examples/train_lora/news_finetune.yaml

INFO 03-30 15:50:23 __init__.py:190] Automatically detected platform cuda.
[INFO|2025-03-30 15:50:27] llamafactory.hparams.parser:383 >> Process rank: 0, world size: 1, device: cuda:0, distributed training: False, compute dtype: torch.bfloat16
[INFO|tokenization_utils_base.py:2034] 2025-03-30 15:50:28,058 >> loading file vocab.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/989aa7980e4cf806f80c7fef2b1adb7bc71aa306/vocab.json
[INFO|tokenization_utils_base.py:2034] 2025-03-30 15:50:28,058 >> loading file merges.txt from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/989aa7980e4cf806f80c7fef2b1adb7bc71aa306/merges.txt
[INFO|tokenization_utils_base.py:2034] 2025-03-30 15:50:28,058 >> loading file tokenizer.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/989aa7980e4cf806f80c7fef2b1adb7bc71aa306/tokenizer.json
[INFO|tokenization_utils_base.py:2034] 2025-03-30 15:50

# Evaluation

## New Finetuned Model Evaluation

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    device_map="auto",
    torch_dtype = torch_dtype
)

tokenizer = AutoTokenizer.from_pretrained(base_model_id)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/660 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.09G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/242 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/7.30k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

In [None]:
finetuned_model_id = "/gdrive/MyDrive/LLM-Finetunning/models"
model.load_adapter(finetuned_model_id)
model

Qwen2ForCausalLM(
  (model): Qwen2Model(
    (embed_tokens): Embedding(151936, 1536)
    (layers): ModuleList(
      (0-27): 28 x Qwen2DecoderLayer(
        (self_attn): Qwen2Attention(
          (q_proj): lora.Linear(
            (base_layer): Linear(in_features=1536, out_features=1536, bias=True)
            (lora_dropout): ModuleDict(
              (default): Identity()
            )
            (lora_A): ModuleDict(
              (default): Linear(in_features=1536, out_features=64, bias=False)
            )
            (lora_B): ModuleDict(
              (default): Linear(in_features=64, out_features=1536, bias=False)
            )
            (lora_embedding_A): ParameterDict()
            (lora_embedding_B): ParameterDict()
            (lora_magnitude_vector): ModuleDict()
          )
          (k_proj): lora.Linear(
            (base_layer): Linear(in_features=1536, out_features=256, bias=True)
            (lora_dropout): ModuleDict(
              (default): Identity()
         

**Lets Test Finetuned Qween with Extraction task and see if it will Generate it in Arabic**

In [None]:
# assiging task to model using template(chat template) that model has trained on like the below
text = tokenizer.apply_chat_template(
    details_extraction_messages,
    tokenize=False,
    add_generation_prompt=True,
)


# return tensors in pytorch (pt) format , tokenization is in CPU memory so we need to send them to GPU memory so model can access to them
model_inputs = tokenizer([text], return_tensors="pt").to(device)

generate_ids = model.generate(
    model_inputs.input_ids,
    max_new_tokens=1024,
    do_sample=False,
    top_k=None,
    top_p=None,
    temperature=None)

# tokenize the outputs to words
generated_ids = [
    output_ids[len(input_ids):]
    for input_ids , output_ids in zip(model_inputs.input_ids, generate_ids)]


# tansform the ids to words , skip_special_tokens=True  means hide special tokens
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]  # return list of reponse ( i use onlt 1 text so it will output 1 response but for future useage)
# displaying model results (models lacks are 1- no arabic  /  2- limited entities have been extracted)
parse_json(response)

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


{'story_title': 'تأثير العائلة على علاقة الأفراد بالمال',
 'story_keywords': ['العائلة',
  'ال金钱',
  'السلوكيات المالية',
  'الصحة المالية',
  'تخطيط النفقات'],
 'story_summary': ['العلاقة بين الأفراد والمال تعتمد على أنماط السلوك المالي.',
  'الثلاثة أبعاد الرئيسية للعلاقة بالمال هي: الاكتساب، الاستخدام، والإدارة.',
  'التجارب الأسرية تؤثر على شخصية每个人的财务管理.',
  'تقرير عن العلاقة بين العائلة والمال يقدم نصائح عملية.'],
 'story_category': 'economy',
 'story_entities': [{'entity_value': 'فوربس', 'entity_type': 'organization'},
  {'entity_value': 'شاين إنيت', 'entity_type': 'person-male'},
  {'entity_value': 'رابطة العلاج المالي', 'entity_type': 'organization'},
  {'entity_value': 'Money Genogram', 'entity_type': 'artifact'}]}

**In Translation Taks it exceed and complete the full message not like before**

In [None]:
def generate_resp(messages):
    text = tokenizer.apply_chat_template(
        translation_messages,
        tokenize=False,
        add_generation_prompt=True
    )

    model_inputs = tokenizer([text], return_tensors="pt").to(device)

    generated_ids = model.generate(
        model_inputs.input_ids,
        max_new_tokens=1024,
        do_sample=False, top_k=None, temperature=None, top_p=None,
    )

    generated_ids = [
        output_ids[len(input_ids):]
        for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]

    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

    return response

response = generate_resp(translation_messages)
parse_json(response)

{'translated_title': 'How Family Influences Financial Relationships',
 'translated_content': 'Forbes magazine reported that the family plays a pivotal role in shaping individuals\' relationship with money, as this relationship is influenced by inherited financial behaviors across generations.\n\nThe report, based on research by Professor Shane Everette on financial well-being, explains that each person has a \'financial personality\' determined by how they interact with money, which is directly affected by family upbringing and childhood experiences.\n\nThe three dimensions of our financial relationship according to the study include:\n\nAcquisition (A): Individuals belonging to this dimension tend to view money as a commodity that can be accumulated, seeing wealth accumulation as a goal in itself. The downside of this pattern is the potential for it to turn into an obsession with wealth or vice versa, meaning complete rejection of acquiring money as a means of corruption.\n\nUsage (U)

# Cost Estimation

In [None]:
from tqdm.auto import tqdm
from faker import Faker
import random
from datetime import datetime

start_time = datetime.now()
fake = Faker('ar')

input_tokens = 0
output_tokens = 0

for i in tqdm(range(30)):
    prompt = fake.text(max_nb_chars=random.randint(150, 200))

    messages = [
        {
            "role": "user",
            "content": prompt,
        }
    ]

    response = generate_resp(messages)

    input_tokens += len(tokenizer.apply_chat_template(messages))
    output_tokens += len(tokenizer.encode(response))

total_time = (datetime.now() - start_time).total_seconds()

print(f"Total Time: {total_time} seconds")
print(f"Input Tokens: {input_tokens}")
print(f"Output Tokens: {output_tokens}")
print(f"Total Tokens: {input_tokens + output_tokens}")

  0%|          | 0/30 [00:00<?, ?it/s]

Total Time: 861.590396 seconds
Input Tokens: 2489
Output Tokens: 13740
Total Tokens: 16229


In [None]:
13740 /861  # 15 tokens/second

15.958188153310104

# vLLM

In [61]:
base_model_id = "Qwen/Qwen2.5-1.5B-Instruct"
adapter_model_id = "/gdrive/MyDrive/LLM-Finetunning/models"

!nohup vllm serve "{base_model_id}" --dtype=half --gpu-memory-utilization 0.4 --max_lora_rank 64 --enable-lora --lora-modules news-lora="{adapter_model_id}" --max_model_len 2048 --port 8001 --enforce-eager > nohup.out 2> nohup.err &

In [62]:
# !pkill vllm
# !ps aux | grep vllm
# !kill 34339
# ngrok.kill()  # This will terminate all existing ngrok tunnels.

root       12908  0.0  0.0   7376  3364 ?        S    23:52   0:00 /bin/bash -c ps aux | grep vllm
root       12910  0.0  0.0   6484  2112 ?        S    23:52   0:00 grep vllm


In [54]:
!tail -n 100 nohup.out
!tail -n 100 nohup.err

INFO 04-02 23:43:35 __init__.py:190] Automatically detected platform cuda.
INFO 04-02 23:43:37 api_server.py:840] vLLM API server version 0.7.2
INFO 04-02 23:43:37 api_server.py:841] args: Namespace(subparser='serve', model_tag='Qwen/Qwen2.5-1.5B-Instruct', config='', host=None, port=8001, uvicorn_log_level='info', allow_credentials=False, allowed_origins=['*'], allowed_methods=['*'], allowed_headers=['*'], api_key=None, lora_modules=[LoRAModulePath(name='news-lora', path='/gdrive/MyDrive/LLM-Finetunning/models', base_model_name=None)], prompt_adapters=None, chat_template=None, chat_template_content_format='auto', response_role='assistant', ssl_keyfile=None, ssl_certfile=None, ssl_ca_certs=None, ssl_cert_reqs=0, root_path=None, middleware=[], return_tokens_as_token_ids=False, disable_frontend_multiprocessing=False, enable_request_id_headers=False, enable_auto_tool_choice=False, enable_reasoning=False, reasoning_parser=None, tool_call_parser=None, tool_parser_plugin='', model='Qwen/Qwen

In [63]:
ngrok_token = userdata.get('ngrok')
ngrok.set_auth_token(ngrok_token)
public_url = ngrok.connect(8001).public_url
print(f"vLLM API server available at: {public_url}")

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml
vLLM API server available at: https://522c-34-124-187-102.ngrok-free.app


# inference

In [55]:
story = """
قرر المجلس القومي للأجور في مصر، زيادة الحد الأدنى لأجر العاملين بالقطاع الخاص إلى 7 آلاف جنيه شهريًا مقابل 6 آلاف جنيه، على أن يتم تطبيق الزيادة اعتبارًا من 1 مارس 2025.
كما قرر المجلس أن يكون الحد الأدنى لقيمة العلاوة الدورية للعاملين بالقطاع الخاص 250 جنيهًا شهريًا، ولأول مرة يقرر المجلس القومي للأجور وضع حد أدنى للأجر للعمل المؤقت "جزء من الوقت"، بحيث لا يقل أجرهم عن 28 جنيهًا صافيًا في الساعة، وذلك وفقًا لتعريفهم الوارد في قانون العمل.
وقالت وزيرة التخطيط والتنمية الاقتصادية والتعاون الدولي، رانيا المشاط، إن رفع الحد الأدنى للأجور يأتي في إطار الحرص على الاستجابة للمستجدات الاقتصادية الراهنة، بما يعزز الاستقرار الاقتصادي والاجتماعي، مضيفة أن ذلك يتسق مع المعايير الدولية، حيث تؤكد منظمة العمل الدولية على ضرورة مراجعة الحد الأدنى للأجور على أساس دوري، لحماية القوة الشرائية للأسر، واستيعاب التغيرات الاقتصادية التدريجية.
"""

In [None]:
tokenizer = AutoTokenizer.from_pretrained(base_model_id)

prompt = tokenizer.apply_chat_template(
    translation_messages,
    tokenize=False,
    add_generation_prompt=True
)

prompt

'<|im_start|>system\nYou are a professional translator.\nYou will be provided by an Arabic text.\nYou have to translate the text into English language.\nFollow the provided Scheme to generate a JSON\nDo not generate any introduction or conclusion.<|im_end|>\n<|im_start|>user\n## Story:\nقرر المجلس القومي للأجور في مصر، زيادة الحد الأدنى لأجر العاملين بالقطاع الخاص إلى 7 آلاف جنيه شهريًا مقابل 6 آلاف جنيه، على أن يتم تطبيق الزيادة اعتبارًا من 1 مارس 2025.\nكما قرر المجلس أن يكون الحد الأدنى لقيمة العلاوة الدورية للعاملين بالقطاع الخاص 250 جنيهًا شهريًا، ولأول مرة يقرر المجلس القومي للأجور وضع حد أدنى للأجر للعمل المؤقت "جزء من الوقت"، بحيث لا يقل أجرهم عن 28 جنيهًا صافيًا في الساعة، وذلك وفقًا لتعريفهم الوارد في قانون العمل.\nوقالت وزيرة التخطيط والتنمية الاقتصادية والتعاون الدولي، رانيا المشاط، إن رفع الحد الأدنى للأجور يأتي في إطار الحرص على الاستجابة للمستجدات الاقتصادية الراهنة، بما يعزز الاستقرار الاقتصادي والاجتماعي، مضيفة أن ذلك يتسق مع المعايير الدولية، حيث تؤكد منظمة العمل الدو

In [None]:
vllm_model_id = "news-lora"

llm_response = requests.post("http://localhost:8001/v1/completions", json={
    "model": vllm_model_id,
    "prompt": prompt,
    "max_tokens": 1000,
    "temperature": 0.3
})

llm_response.json()

{'id': 'cmpl-19a1fcd1c24542d5b589e127f118ab11',
 'object': 'text_completion',
 'created': 1743636302,
 'model': 'news-lora',
 'choices': [{'index': 0,
   'text': '```json{"translated_title": "Egypt\'s National Labor Council Increases Minimum Wage to 7,000 EGP", "translated_content": "The National Labor Council in Egypt has decided to increase the minimum wage for private sector workers to 7,000 Egyptian pounds per month, compared to 6,000 pounds. The increase will be implemented starting March 2025. Additionally, the council has set the minimum value for periodic bonuses for private sector workers at 250 pounds per month for the first time, deciding to establish a minimum wage for temporary work "part of the time," ensuring their wage is not less than 28 pounds net per hour, according to their definition as stated in the Labor Law. Minister of Planning and Economic Development and International Cooperation, Rania Maktaba, stated that raising the minimum wage is in line with the efforts

In [None]:
vllm_model_id = "news-lora"

llm_response = requests.post("http://localhost:8001/v1/completions", json={
    "model": vllm_model_id,
    "prompt": prompt,
    "max_tokens": 1000,
    "temperature": 0.3
})

llm_response.json()

{'id': 'cmpl-a19cb46f9c9e4219862112a397d39195',
 'object': 'text_completion',
 'created': 1743630987,
 'model': 'news-lora',
 'choices': [{'index': 0,
   'text': '```json{"translated_title": "The Role of Family in Financial Relationships", "translated_content": "Forbes magazine reported that the family plays a pivotal role in shaping an individual\'s relationship with money, as this relationship is influenced by inherited financial behaviors across generations.\\n\\nThe report, based on research by Professor Shane Jensen on financial well-being, explains that each person has a "financial personality" that is determined by how they interact with money, which is directly affected by family upbringing and childhood experiences.\\n\\nThe three dimensions of the financial relationship\\nAccording to the study, there are three main dimensions that form our relationship with money:\\n\\nA: Accumulation (A): Individuals belonging to this dimension tend to view money as a commodity that can be 

## Load Testing

In [56]:
%%writefile locust.py

import random
import json
from locust import HttpUser, task, between, constant
from transformers import AutoTokenizer
from faker import Faker

fake = Faker('ar')
# create one user (HttpUser)
class CompletionLoadTest(HttpUser):
    wait_time = between(1, 3) # wait between each request and the other between 1s to 3s

    @task
    def post_completion(self):
        model_id = "news-lora"
        prompt = fake.text(max_nb_chars=random.randint(150, 200))

        message = {
            "model": model_id,
            "prompt": prompt,
            "max_tokens": 512,
            "temperature": 0.3
        }

        llm_response = self.client.post("/v1/completions", json=message)

        # save llm response in file to ensure the output , but prefered to be in logs not creating files
        if llm_response.status_code == 200:
            with open("./vllm_tokens.txt", "a") as dest:
                dest.write(json.dumps({
                    "prompt": prompt,
                    "response": llm_response.json()["choices"][0]["text"],
                }, ensure_ascii=False) + "\n")


Writing locust.py


In [59]:
!netstat -tuln | grep 8001
!ps aux | grep vllm
!tail -n 200 nohup.out
!tail -n 200 nohup.err

root       12329  0.0  0.0   7376  3464 ?        S    23:50   0:00 /bin/bash -c ps aux | grep vllm
root       12331  0.0  0.0   6484  2280 ?        S    23:50   0:00 grep vllm
INFO 04-02 23:43:35 __init__.py:190] Automatically detected platform cuda.
INFO 04-02 23:43:37 api_server.py:840] vLLM API server version 0.7.2
INFO 04-02 23:43:37 api_server.py:841] args: Namespace(subparser='serve', model_tag='Qwen/Qwen2.5-1.5B-Instruct', config='', host=None, port=8001, uvicorn_log_level='info', allow_credentials=False, allowed_origins=['*'], allowed_methods=['*'], allowed_headers=['*'], api_key=None, lora_modules=[LoRAModulePath(name='news-lora', path='/gdrive/MyDrive/LLM-Finetunning/models', base_model_name=None)], prompt_adapters=None, chat_template=None, chat_template_content_format='auto', response_role='assistant', ssl_keyfile=None, ssl_certfile=None, ssl_ca_certs=None, ssl_cert_reqs=0, root_path=None, middleware=[], return_tokens_as_token_ids=False, disable_frontend_multiprocessing=Fals

In [58]:
!locust --headless -f locust.py --host=http://127.0.0.1:8001 -u 20 -r 1 -t "60s" --html=locust_results.html

[2025-04-02 23:49:12,713] 002210d31c41/INFO/locust.main: Starting Locust 2.33.2
[2025-04-02 23:49:12,714] 002210d31c41/INFO/locust.main: Run time limit set to 60 seconds
Type     Name  # reqs      # fails |    Avg     Min     Max    Med |   req/s  failures/s
--------||-------|-------------|-------|-------|-------|-------|--------|-----------
--------||-------|-------------|-------|-------|-------|-------|--------|-----------
         Aggregated       0     0(0.00%) |      0       0       0      0 |    0.00        0.00

[2025-04-02 23:49:12,715] 002210d31c41/INFO/locust.runners: Ramping to 20 users at a rate of 1.00 per second
Type     Name  # reqs      # fails |    Avg     Min     Max    Med |   req/s  failures/s
--------||-------|-------------|-------|-------|-------|-------|--------|-----------
POST     /v1/completions       2   2(100.00%) |      4       1       6      2 |    0.00        0.00
--------||-------|-------------|-------|-------|-------|-------|--------|-----------
       