In [1]:
!pip uninstall -y transformers
!pip install -U transformers datasets evaluate accelerate --quiet


Found existing installation: transformers 4.57.1
Uninstalling transformers-4.57.1:
  Successfully uninstalled transformers-4.57.1


In [6]:
!pip install torch transformers datasets huggingface_hub --quiet

from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset
import torch

# ===============================
# STEP 1: Load Model and Tokenizer
# ===============================
model_name = "prajjwal1/bert-tiny"  # Small, <30MB model
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=3)

# ===============================
# STEP 2: Prepare Data
# ===============================
data = {
    "text": [
        "Garbage not collected from street for 3 days",
        "Streetlight not working near my house",
        "Water leakage in front of community center",
        "Small crack on footpath near school",
        "Major pothole blocking traffic in main road",
        "Minor paint issue on wall",
        "Fire in transformer near market area",
        "Tree fallen blocking the road",
        "Low water pressure in taps"
    ],
    "label": [2, 1, 2, 0, 2, 0, 2, 2, 1]  # 0=Low, 1=Medium, 2=High
}

train_dataset = Dataset.from_dict(data)
test_dataset = Dataset.from_dict({
    "text": ["Pothole on road causing jams", "Light not working in park", "Broken bench in playground"],
    "label": [2, 1, 0]
})

# ===============================
# STEP 3: Tokenize Data
# ===============================
def preprocess_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=64)

train_dataset = train_dataset.map(preprocess_function, batched=True)
test_dataset = test_dataset.map(preprocess_function, batched=True)

train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
test_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])

# ===============================
# STEP 4: Training Arguments (simplified)
# ===============================
training_args = TrainingArguments(
    output_dir="./priority_model",
    do_train=True,
    do_eval=True,
    per_device_train_batch_size=8,
    num_train_epochs=3,
    logging_dir="./logs",
    save_strategy="no",
    report_to=[]  # Disable wandb
)

# ===============================
# STEP 5: Train Model
# ===============================
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=tokenizer,
)

trainer.train()

# ===============================
# STEP 6: Save Model
# ===============================
model.save_pretrained("./priority_model")
tokenizer.save_pretrained("./priority_model")

# ===============================
# STEP 7: Load for Inference
# ===============================
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = BertForSequenceClassification.from_pretrained("./priority_model").to(device)
tokenizer = BertTokenizer.from_pretrained("./priority_model")

# ===============================
# STEP 8: Prediction Function (device-safe)
# ===============================
def predict_priority(text):
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=64)
    inputs = {k: v.to(device) for k, v in inputs.items()}  # ✅ move to same device
    model.eval()
    with torch.no_grad():
        outputs = model(**inputs)
        prediction = torch.argmax(outputs.logits, dim=-1).item()

    labels = {0: "Low", 1: "Medium", 2: "High"}
    return labels[prediction]

# ===============================
# STEP 9: Example Predictions
# ===============================
print(predict_priority("Road flooded due to heavy rain causing traffic"))
print(predict_priority("Streetlight not working near the community center"))
print(predict_priority("Small paint fade on the park sign"))


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/9 [00:00<?, ? examples/s]

Map:   0%|          | 0/3 [00:00<?, ? examples/s]

  trainer = Trainer(


Step,Training Loss


High
High
High


In [7]:
# ===== Load and Predict =====
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import numpy as np

# Path where your model was saved
MODEL_DIR = "./civic_priority_model"

# Load the trained model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_DIR)

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()

# Helper function for prediction
def predict_priority(text):
    # Tokenize input
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=64).to(device)

    # Forward pass
    with torch.no_grad():
        outputs = model(**inputs)
        probs = torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().numpy()[0]

    # Find label with highest probability
    label_idx = int(np.argmax(probs))
    confidence = float(np.max(probs))
    label_map = {0: "Low", 1: "Medium", 2: "High"}

    # Return formatted output
    return {
        "input": text,
        "predicted_label": label_map[label_idx],
        "confidence": round(confidence, 4),
        "probabilities": probs.tolist()
    }

# ==== Example predictions ====
print(predict_priority("Road flooded due to heavy rain causing traffic"))
print(predict_priority("Streetlight not working near the community center"))
print(predict_priority("Small paint fade on the park sign"))
print(predict_priority("Garbage has been overflowing for days and smells horrible"))


{'input': 'Road flooded due to heavy rain causing traffic', 'predicted_label': 'Medium', 'confidence': 0.4713, 'probabilities': [0.30070286989212036, 0.4713328182697296, 0.2279643416404724]}
{'input': 'Streetlight not working near the community center', 'predicted_label': 'Medium', 'confidence': 0.46, 'probabilities': [0.316963255405426, 0.4599556028842926, 0.2230810821056366]}
{'input': 'Small paint fade on the park sign', 'predicted_label': 'Medium', 'confidence': 0.4544, 'probabilities': [0.31913524866104126, 0.45443838834762573, 0.22642633318901062]}
{'input': 'Garbage has been overflowing for days and smells horrible', 'predicted_label': 'Medium', 'confidence': 0.4456, 'probabilities': [0.3052935004234314, 0.4455614686012268, 0.24914506077766418]}


In [13]:
!pip install torch transformers datasets huggingface_hub --quiet

from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset
import torch
import random
import numpy as np

# ===============================
# STEP 1: Load Model and Tokenizer
# ===============================
model_name = "prajjwal1/bert-tiny"  # lightweight
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=3)

# ===============================
# STEP 2: Expanded Balanced Dataset
# ===============================
high_priority = [
    "Transformer fire near school building",
    "Gas leakage in residential area",
    "Flooded main road blocking ambulance route",
    "Wall collapsed near hospital",
    "Tree fallen blocking main entrance",
    "Short circuit causing smoke in transformer",
    "Accident due to open manhole",
    "Road completely blocked by landslide",
    "Sewage overflow in front of market",
    "Electric pole about to fall",
    "Transformer burst causing blackout",
    "Bridge crack noticed after heavy rainfall"
]

medium_priority = [
    "Streetlight not working in main lane",
    "Drain blocked near bus stop",
    "Garbage not collected for three days",
    "Overflowing dustbin near temple",
    "Pothole causing minor traffic delay",
    "Broken footpath near metro station",
    "Streetlight flickering at night",
    "Drain leakage near colony entrance",
    "Park fountain not working",
    "Low water pressure in taps",
    "Garbage piled up near school gate",
    "Footpath tiles missing near metro"
]

low_priority = [
    "Faded zebra crossing paint on road",
    "Broken bench in park",
    "Minor paint issue on wall",
    "Need more plants in garden",
    "Dustbin lid missing in one corner",
    "Slightly rusted railing in public park",
    "Old posters stuck on electricity pole",
    "Small crack on pavement near shop",
    "Request for more dustbins",
    "Need repainting of divider lines",
    "Tree sapling needs watering",
    "Request for new benches in park"
]

texts = high_priority + medium_priority + low_priority
labels = [2]*len(high_priority) + [1]*len(medium_priority) + [0]*len(low_priority)

combined = list(zip(texts, labels))
random.shuffle(combined)
texts, labels = zip(*combined)
data = {"text": list(texts), "label": list(labels)}

train_dataset = Dataset.from_dict(data)

# Validation/test set
test_dataset = Dataset.from_dict({
    "text": [
        "Tree blocking half of the street",
        "Streetlight not working near the park",
        "Broken bench in playground",
        "Drain water overflowing onto main road",
        "Faded paint on pedestrian crossing",
        "Minor crack on wall near bus stop",
        "Transformer burst near hospital",
        "Garbage pile near market"
    ],
    "label": [2, 1, 0, 2, 0, 0, 2, 1]
})

# ===============================
# STEP 3: Tokenization
# ===============================
def preprocess_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=64)

train_dataset = train_dataset.map(preprocess_function, batched=True)
test_dataset = test_dataset.map(preprocess_function, batched=True)
train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
test_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])

# ===============================
# STEP 4: Training Args
# ===============================
training_args = TrainingArguments(
    output_dir="./priority_model",
    do_train=True,
    do_eval=True,
    per_device_train_batch_size=8,
    num_train_epochs=12,     # more epochs helps small dataset
    learning_rate=3e-5,      # slightly higher to help converge
    weight_decay=0.01,
    warmup_ratio=0.1,        # warmup for stable learning
    logging_dir="./logs",
    save_strategy="no",
    report_to=[]
)

# ===============================
# STEP 5: Define Metrics
# ===============================
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    accuracy = (predictions == labels).mean()
    return {"accuracy": accuracy}

# ===============================
# STEP 6: Train
# ===============================
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

trainer.train()

# ===============================
# STEP 7: Save Model
# ===============================
model.save_pretrained("./priority_model")
tokenizer.save_pretrained("./priority_model")

# ===============================
# STEP 8: Load for Inference
# ===============================
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = BertForSequenceClassification.from_pretrained("./priority_model").to(device)
tokenizer = BertTokenizer.from_pretrained("./priority_model")

# ===============================
# STEP 9: Prediction Function
# ===============================
def predict_priority(text):
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=64)
    inputs = {k: v.to(device) for k, v in inputs.items()}
    model.eval()
    with torch.no_grad():
        outputs = model(**inputs)
        probs = torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().numpy()[0]
        pred_label = int(probs.argmax())
        confidence = float(probs.max())
    label_map = {0: "Low", 1: "Medium", 2: "High"}
    return {
        "text": text,
        "predicted_label": label_map[pred_label],
        "confidence": round(confidence, 4)
    }

# ===============================
# STEP 10: Example Predictions
# ===============================
examples = [
    "Transformer caught fire near main road",
    "Streetlight not working near the community center",
    "Small paint fade on the park sign",
    "Flooding on main road due to rain",
    "Garbage not collected for 4 days",
    "Need repainting of road divider lines",
    "Gas smell near residential area",
    "Bench broken in park"
]

for ex in examples:
    print(predict_priority(ex))


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/36 [00:00<?, ? examples/s]

Map:   0%|          | 0/8 [00:00<?, ? examples/s]

  trainer = Trainer(


Step,Training Loss


{'text': 'Transformer caught fire near main road', 'predicted_label': 'Low', 'confidence': 0.3914}
{'text': 'Streetlight not working near the community center', 'predicted_label': 'Low', 'confidence': 0.392}
{'text': 'Small paint fade on the park sign', 'predicted_label': 'Low', 'confidence': 0.4307}
{'text': 'Flooding on main road due to rain', 'predicted_label': 'Low', 'confidence': 0.3945}
{'text': 'Garbage not collected for 4 days', 'predicted_label': 'Low', 'confidence': 0.4299}
{'text': 'Need repainting of road divider lines', 'predicted_label': 'Low', 'confidence': 0.3953}
{'text': 'Gas smell near residential area', 'predicted_label': 'Low', 'confidence': 0.3844}
{'text': 'Bench broken in park', 'predicted_label': 'Low', 'confidence': 0.4129}


In [15]:
!pip install -U torch transformers datasets huggingface_hub --quiet

from transformers import DistilBertTokenizer, DistilBertForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset
import torch
import numpy as np
import random

# ===============================
# STEP 1: Load Model and Tokenizer
# ===============================
model_name = "distilbert-base-uncased"
tokenizer = DistilBertTokenizer.from_pretrained(model_name)
model = DistilBertForSequenceClassification.from_pretrained(model_name, num_labels=3)

# ===============================
# STEP 2: Dataset
# ===============================
high_priority = [
    "Transformer fire near school building",
    "Gas leakage in residential area",
    "Flooded main road blocking ambulance route",
    "Wall collapsed near hospital",
    "Tree fallen blocking main entrance",
    "Short circuit causing smoke in transformer",
    "Accident due to open manhole",
    "Road completely blocked by landslide",
    "Sewage overflow in front of market",
    "Electric pole about to fall"
]

medium_priority = [
    "Streetlight not working in main lane",
    "Drain blocked near bus stop",
    "Garbage not collected for three days",
    "Overflowing dustbin near temple",
    "Pothole causing minor traffic delay",
    "Broken footpath near metro station",
    "Streetlight flickering at night",
    "Drain leakage near colony entrance",
    "Park fountain not working",
    "Low water pressure in taps"
]

low_priority = [
    "Faded zebra crossing paint on road",
    "Broken bench in park",
    "Minor paint issue on wall",
    "Need more plants in garden",
    "Dustbin lid missing in one corner",
    "Slightly rusted railing in public park",
    "Old posters stuck on electricity pole",
    "Small crack on pavement near shop",
    "Request for more dustbins",
    "Need repainting of divider lines"
]

texts = high_priority + medium_priority + low_priority
labels = [2]*len(high_priority) + [1]*len(medium_priority) + [0]*len(low_priority)

combined = list(zip(texts, labels))
random.shuffle(combined)
texts, labels = zip(*combined)
data = {"text": list(texts), "label": list(labels)}

train_dataset = Dataset.from_dict(data)

test_dataset = Dataset.from_dict({
    "text": [
        "Tree blocking half of the street",
        "Streetlight not working near the park",
        "Broken bench in playground",
        "Drain water overflowing onto main road",
        "Faded paint on pedestrian crossing",
        "Minor crack on wall near bus stop"
    ],
    "label": [2, 1, 0, 2, 0, 0]
})

# ===============================
# STEP 3: Tokenization
# ===============================
def preprocess_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=64)

train_dataset = train_dataset.map(preprocess_function, batched=True)
test_dataset = test_dataset.map(preprocess_function, batched=True)

train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
test_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])

# ===============================
# STEP 4: Training Args (Version-safe)
# ===============================
try:
    training_args = TrainingArguments(
        output_dir="./priority_model",
        evaluation_strategy="epoch",
        save_strategy="epoch",
        per_device_train_batch_size=4,
        num_train_epochs=15,
        learning_rate=3e-5,
        warmup_ratio=0.1,
        weight_decay=0.02,
        logging_dir="./logs",
        logging_steps=10,
        report_to=[]
    )
except TypeError:
    # fallback for older versions without evaluation_strategy
    training_args = TrainingArguments(
        output_dir="./priority_model",
        do_train=True,
        do_eval=True,
        per_device_train_batch_size=4,
        num_train_epochs=15,
        learning_rate=3e-5,
        weight_decay=0.02,
        logging_dir="./logs",
        logging_steps=10,
        report_to=[]
    )

# ===============================
# STEP 5: Metrics
# ===============================
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    accuracy = (predictions == labels).mean()
    return {"accuracy": accuracy}

# ===============================
# STEP 6: Trainer
# ===============================
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

trainer.train()

# ===============================
# STEP 7: Save model
# ===============================
model.save_pretrained("./priority_model")
tokenizer.save_pretrained("./priority_model")

# ===============================
# STEP 8: Inference
# ===============================
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = DistilBertForSequenceClassification.from_pretrained("./priority_model").to(device)
tokenizer = DistilBertTokenizer.from_pretrained("./priority_model")

def predict_priority(text):
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=64)
    inputs = {k: v.to(device) for k, v in inputs.items()}
    model.eval()
    with torch.no_grad():
        outputs = model(**inputs)
        probs = torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().numpy()[0]
        pred_label = int(probs.argmax())
        confidence = float(probs.max())
    label_map = {0: "Low", 1: "Medium", 2: "High"}
    return {
        "text": text,
        "predicted_label": label_map[pred_label],
        "confidence": round(confidence, 4)
    }

# ===============================
# STEP 9: Test
# ===============================
examples = [
    "Transformer caught fire near main road",
    "Streetlight not working near the community center",
    "Small paint fade on the park sign",
    "Flooding on main road due to rain",
    "Garbage not collected for 4 days",
    "Need repainting of road divider lines",
    "Gas smell near residential area",
    "Bench broken in park"
]

for ex in examples:
    print(predict_priority(ex))


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m899.7/899.7 MB[0m [31m758.6 kB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m594.3/594.3 MB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.2/10.2 MB[0m [31m159.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m88.0/88.0 MB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m954.8/954.8 kB[0m [31m72.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m193.1/193.1 MB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m78.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.6/63.6 MB[0m [31m13.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/30 [00:00<?, ? examples/s]

Map:   0%|          | 0/6 [00:00<?, ? examples/s]

  trainer = Trainer(


Step,Training Loss
10,1.1002
20,0.987
30,0.8717
40,0.6364
50,0.4612
60,0.3619
70,0.2448
80,0.156
90,0.1123
100,0.0832


{'text': 'Transformer caught fire near main road', 'predicted_label': 'High', 'confidence': 0.8904}
{'text': 'Streetlight not working near the community center', 'predicted_label': 'Medium', 'confidence': 0.9448}
{'text': 'Small paint fade on the park sign', 'predicted_label': 'Low', 'confidence': 0.9691}
{'text': 'Flooding on main road due to rain', 'predicted_label': 'High', 'confidence': 0.8808}
{'text': 'Garbage not collected for 4 days', 'predicted_label': 'Medium', 'confidence': 0.9528}
{'text': 'Need repainting of road divider lines', 'predicted_label': 'Low', 'confidence': 0.9699}
{'text': 'Gas smell near residential area', 'predicted_label': 'High', 'confidence': 0.8237}
{'text': 'Bench broken in park', 'predicted_label': 'Low', 'confidence': 0.9615}


In [16]:
model.save_pretrained("./final_priority_model")
tokenizer.save_pretrained("./final_priority_model")


('./final_priority_model/tokenizer_config.json',
 './final_priority_model/special_tokens_map.json',
 './final_priority_model/vocab.txt',
 './final_priority_model/added_tokens.json')

In [22]:
from transformers import pipeline

classifier = pipeline("text-classification", model="./final_priority_model", tokenizer="./final_priority_model")

sentences = [
    "Broken water pipe near main street and which is casuing massive floading and traffic jam and casuing students to do not go to school",
    "a huge anconda on road"
]

for s in sentences:
    result = classifier(s)[0]
    print(f"{s} → {result['label']} ({result['score']:.4f})")


Device set to use cuda:0


Broken water pipe near main street and which is casuing massive floading and traffic jam and casuing students to do not go to school → LABEL_2 (0.5905)
a huge anconda on road → LABEL_0 (0.8633)


In [23]:
from transformers import pipeline

# Load your fine-tuned model and tokenizer
classifier = pipeline(
    "text-classification",
    model="./final_priority_model",        # path to your saved model
    tokenizer="./final_priority_model",
    truncation=True,
    padding=True
)

# Large / detailed complaint sentences
sentences = [
    "Due to the continuous heavy rainfall since last night, the entire main road leading to the city hospital is flooded, \
     causing severe traffic jams and making it difficult for ambulances to pass.",

    "Several streetlights near the central park have been non-functional for over a week, \
     making the area unsafe for pedestrians during night hours, especially for children and elderly residents.",

    "A transformer located near the residential block suddenly caught fire this morning, \
     producing smoke and sparks which could lead to a potential explosion if not addressed immediately.",

    "The garbage in our locality has not been collected for more than 10 days, \
     leading to a foul smell, increased mosquito breeding, and overall unhygienic conditions.",

    "There is a small crack on the footpath near the children’s play area in the garden. \
     It doesn’t cause any obstruction but may need repair in the future to maintain safety standards.",

    "The divider lines on the main road have completely faded away, \
     making it difficult for drivers to stay in their lanes during heavy traffic hours.",

    "A gas leak was reported near the apartment complex around midnight, \
     and the residents can still smell gas in the area, posing a major safety concern."
]

# Predict
print("\n🔍 Model Predictions:\n")
for text in sentences:
    result = classifier(text)[0]
    label = result['label']
    confidence = result['score']
    print(f"Text: {text[:100]}...")  # Show first 100 chars
    print(f"Predicted Label: {label}")
    print(f"Confidence: {confidence:.4f}")
    print("-" * 80)


Device set to use cuda:0



🔍 Model Predictions:

Text: Due to the continuous heavy rainfall since last night, the entire main road leading to the city hosp...
Predicted Label: LABEL_2
Confidence: 0.6444
--------------------------------------------------------------------------------
Text: Several streetlights near the central park have been non-functional for over a week,      making the...
Predicted Label: LABEL_2
Confidence: 0.5094
--------------------------------------------------------------------------------
Text: A transformer located near the residential block suddenly caught fire this morning,      producing s...
Predicted Label: LABEL_2
Confidence: 0.7759
--------------------------------------------------------------------------------
Text: The garbage in our locality has not been collected for more than 10 days,      leading to a foul sme...
Predicted Label: LABEL_1
Confidence: 0.4511
--------------------------------------------------------------------------------
Text: There is a small crack on the f

In [24]:
!pip install huggingface_hub --quiet
from huggingface_hub import login

# Login using your Hugging Face token
login()


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [26]:
from huggingface_hub import HfApi, HfFolder, Repository, create_repo
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# Your Hugging Face username
username = "mrigaanksh"  # replace with your HF username exactly

repo_name = "priority-classification-distilbert"
model_id = f"{username}/{repo_name}"

# Create a new repo on Hugging Face (set private=True if you want to keep it private)
create_repo(model_id, exist_ok=True, private=False)

# Push model and tokenizer to hub
model = AutoModelForSequenceClassification.from_pretrained("./final_priority_model")
tokenizer = AutoTokenizer.from_pretrained("./final_priority_model")

model.push_to_hub(model_id)
tokenizer.push_to_hub(model_id)

print(f"✅ Model uploaded successfully! View it here: https://huggingface.co/{model_id}")


Processing Files (0 / 0)      : |          |  0.00B /  0.00B            

New Data Upload               : |          |  0.00B /  0.00B            

  ...rd90319/model.safetensors:   0%|          |  575kB /  268MB            

README.md: 0.00B [00:00, ?B/s]

✅ Model uploaded successfully! View it here: https://huggingface.co/mrigaanksh/priority-classification-distilbert


model testng



In [27]:
from transformers import pipeline

classifier = pipeline(
    "text-classification",
    model="mrigaanksh/priority-classification-distilbert"
)

result = classifier("There is a transformer fire near the school building")[0]
print(result)


config.json:   0%|          | 0.00/717 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/695 [00:00<?, ?B/s]

Device set to use cuda:0


{'label': 'LABEL_2', 'score': 0.767005443572998}
