In [None]:
from huggingface_hub import login
login("") #key

In [5]:
!pip install -q transformers accelerate bitsandbytes



from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch

# ======== 1. Loading the 4-bit model ========
model_id = "mistralai/Mistral-7B-Instruct-v0.2"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

print("Model loaded on :", model.device)


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Model loaded on : cuda:0


In [None]:
import json
import pandas as pd
from tqdm import tqdm

# ======== 1. Load the tweets ========
path = "/content/cleaned_tweets.json"
with open(path, encoding="utf-8") as f:
    tweets_data = json.load(f)




tweets = [t["content_cleaned"] for t in tweets_data]

# ======== 2. List of labels ========
labels = [
    "pro pjd",
    "contre pjd",
    "pro RNI",
    "contre RNI",
    "pro PAM",
    "contre PAM",
    "perdre la confiance dans tous les partis politiques"
]

# ======== 3. Classification function ========
def classify_tweet(text):
    prompt = f"""
Here is a list of political labels:
{', '.join(labels)}.
Read the tweet below, and respond only with one or more of these exact labels (copy-paste).
If the tweet contains both support for one party and criticism of another, indicate all relevant labels separated by commas.
Do not provide any explanation. Do not rephrase. Do not respond with anything else.

If the tweet mentions only the word "حكومة" (government) **without specifying a name or date**, assume it refers to the **current government (RNI)**.

**Important instructions**:
- If the tweet expresses **explicit or implicit support** for a party, choose a "pro" type label.
- If the tweet expresses **criticism, rejection, or dissatisfaction** towards a party, choose a "critique" or "contre" type label.
- If the tweet criticizes one party **and** supports another, indicate **both labels** separated by commas.
- If a tweet speaks positively about Morocco's current achievements without mentioning a party, assume it indirectly supports the current government (RNI).
- If a tweet compares PJD's reforms as necessary and criticizes the current government’s as harmful, classify it as **Pro PJD and Anti RNI**.
- If a tweet criticizes "the current government" without mentioning a party, consider it targets the RNI.
- If a tweet only talks about social problems without naming a responsible party, choose "contre RNI".
- Do not create any new labels. Do not mix different phrasings. Keep exactly the form of the proposed labels.





Tweet : {text}
Étiquette :"""

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(**inputs, max_new_tokens=50)
    result = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Strict extraction after "Label" :"
    if "Étiquette :" in result:
        return result.split("Étiquette :")[1].strip().split("\n")[0]
    return result.strip().split("\n")[0]


# ======== 4. Apply the classification to all tweets ========
results = []
for t in tqdm(tweets, desc="Classification des tweets"):
    label = classify_tweet(t)
    results.append(label)

# ======== 5. Save the results ========
df = pd.DataFrame({
    "tweet": tweets,
    "classe": results
})

df.to_csv("classified_tweets.csv", index=False, encoding="utf-8-sig")
print("✅ Results saved in 'classified_tweets.csv'")


Classification des tweets:   0%|          | 0/93 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Classification des tweets:   1%|          | 1/93 [00:04<07:28,  4.87s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Classification des tweets:   2%|▏         | 2/93 [00:09<07:09,  4.72s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Classification des tweets:   3%|▎         | 3/93 [00:15<07:47,  5.20s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Classification des tweets:   4%|▍         | 4/93 [00:19<07:17,  4.92s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Classification des tweets:   5%|▌         | 5/93 [00:24<07:19,  4.99s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Classification des tweets:   6%|▋         | 6/93 [00:29<06:59,  4.82s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Classification des tw

✅ Results saved in 'classified_tweets.csv'



