# vLLM with Llama-3.2-1B-it for Reveiw Sentiment Analysis

## 1. Setup

In [None]:
!pip install -q vllm
!pip install -q -U "huggingface_hub[cli]"
!pip install openai

In [None]:
!pip install -U "ipython>=8.20" "jedi>=0.19"
!pip check    # shows any remaining dependency issues

In [None]:
! hf auth login

## 2. Configeration

In [None]:
model = "meta-llama/Llama-3.2-1B-Instruct"
api_key = "tokenX"

In [None]:
# Stop any old servers
!pkill -f "vllm serve" || true

# Run in background
!nohup vllm serve $model \
  --host 127.0.0.1 \
  --port 8000 \
  --dtype auto \
  --api-key $api_key &

In [None]:
for i in range(30):
  # quick peek at the last lines of the log (non-blocking)
  !sleep 2; tail -n 30 nohup.out

## 3. Testing Reachability

In [None]:
import time, httpx, subprocess

BASE_URL = "http://127.0.0.1:8000/v1"
HEADERS  = {"Authorization": f"Bearer {api_key}"}

ready = False
for i in range(180):  # up to ~3 minutes on first load
    try:
        # 1) unauthenticated health probe
        if httpx.get("http://127.0.0.1:8000/health", timeout=1.0).status_code == 200:
            # 2) authenticated check on /v1/models
            r = httpx.get(f"{BASE_URL}/models", headers=HEADERS, timeout=2.0)
            if r.status_code == 200:
                print("vLLM is ready ✅")
                ready = True
                break
    except Exception:
        pass
    if i % 10 == 0:
        print("waiting for vLLM…")
    time.sleep(1)

if not ready:
    print("Server not ready. Recent logs:")
    print(subprocess.run(["bash","-lc","tail -n 80 nohup.out"], capture_output=True, text=True).stdout)

## 4. Inferencing

In [None]:
from openai import OpenAI

review_text = "الطعام لذيذ والشيش افضل شيش ذقته روعه ويحتاج فقط اعاده تأهيل المبنى والتوسعه"

prompt = f"""
You are given a customer review in Arabic.
Evaluate how satisfied the customer seems with the place, its condition, and its services.
Output a single integer score from 1 to 10, where:
1 = very unhappy and dissatisfied
10 = extremely happy and satisfied

ONLY output the integer, nothing else.

Review: {review_text}
"""

client = OpenAI(base_url=BASE_URL, api_key=api_key)

resp = client.chat.completions.create(
    model=model,
    messages=[{"role": "user", "content": prompt}],
    max_tokens=3,
)

predicted_rating = resp.choices[0].message.content.strip()
print(predicted_rating)
