In [1]:
# We are installing the necessary libraries for the project.
# transformers: For accessing and training models from Hugging Face.
# datasets: For loading and processing data from the Hugging Face Hub.
# evaluate: For calculating performance metrics.
# accelerate: To speed up training on PyTorch.
# gradio: To create a simple web UI for our model.

# Force reinstall the latest versions to clear any cache issues
!pip install --upgrade --force-reinstall transformers datasets accelerate evaluate gradio

Collecting transformers
  Using cached transformers-4.54.1-py3-none-any.whl.metadata (41 kB)
Collecting datasets
  Using cached datasets-4.0.0-py3-none-any.whl.metadata (19 kB)
Collecting accelerate
  Using cached accelerate-1.9.0-py3-none-any.whl.metadata (19 kB)
Collecting evaluate
  Using cached evaluate-0.4.5-py3-none-any.whl.metadata (9.5 kB)
Collecting gradio
  Using cached gradio-5.39.0-py3-none-any.whl.metadata (16 kB)
Collecting filelock (from transformers)
  Using cached filelock-3.18.0-py3-none-any.whl.metadata (2.9 kB)
Collecting huggingface-hub<1.0,>=0.34.0 (from transformers)
  Using cached huggingface_hub-0.34.3-py3-none-any.whl.metadata (14 kB)
Collecting numpy>=1.17 (from transformers)
  Using cached numpy-2.3.2-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (62 kB)
Collecting packaging>=20.0 (from transformers)
  Using cached packaging-25.0-py3-none-any.whl.metadata (3.3 kB)
Collecting pyyaml>=5.1 (from transformers)
  Using cached PyYAML-6.0.2-c

In [1]:
# Import necessary libraries
from datasets import load_dataset
from transformers import AutoTokenizer

# --- 1. Load a Stable Dataset ---
# We are loading 'civil_comments'. To make the process faster for this tutorial,
# we'll only use the first 25,000 rows from the training set.
print("Step 1: Loading the dataset...")
raw_datasets = load_dataset("civil_comments", split='train[:25000]')

# This dataset doesn't have a predefined train/test split, so we'll create one.
# We'll use 80% for training and 20% for testing.
raw_datasets = raw_datasets.train_test_split(test_size=0.2)
print("Dataset loaded and split into train/test sets:")
print(raw_datasets)


# --- 2. Process Labels ---
# The 'toxicity' column is a float (e.g., 0.9). We need a binary label (0 or 1).
# We'll create a new 'labels' column: if toxicity >= 0.5, label is 1 (toxic), else 0.
print("\nStep 2: Converting toxicity scores to binary labels...")
def add_binary_labels(examples):
    examples["labels"] = [1 if toxicity >= 0.5 else 0 for toxicity in examples["toxicity"]]
    return examples

labeled_datasets = raw_datasets.map(add_binary_labels, batched=True)
print("'labels' column created.")


# --- 3. Tokenize Text ---
# Now, we convert text into numbers (tokens) that the model can understand.
print("\nStep 3: Tokenizing the text data...")
model_checkpoint = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

tokenized_datasets = labeled_datasets.map(tokenize_function, batched=True)
print("Tokenization complete.")


# --- 4. Final Cleanup and Formatting ---
# We remove all columns we don't need, keeping only model inputs and our new labels.
print("\nStep 4: Cleaning up columns and setting format for PyTorch...")
columns_to_remove = ["text", "toxicity", "severe_toxicity", "obscene", "threat", "insult", "identity_attack", "sexual_explicit"]
tokenized_datasets = tokenized_datasets.remove_columns(columns_to_remove)

# Set the format to PyTorch tensors
tokenized_datasets.set_format("torch")

print("\n--- Data Preprocessing Complete! ---")
print("Final dataset structure:")
print(tokenized_datasets["train"])
print("\nAn example from the processed training data:")
print(tokenized_datasets["train"][0])


Step 1: Loading the dataset...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Dataset loaded and split into train/test sets:
DatasetDict({
    train: Dataset({
        features: ['text', 'toxicity', 'severe_toxicity', 'obscene', 'threat', 'insult', 'identity_attack', 'sexual_explicit'],
        num_rows: 20000
    })
    test: Dataset({
        features: ['text', 'toxicity', 'severe_toxicity', 'obscene', 'threat', 'insult', 'identity_attack', 'sexual_explicit'],
        num_rows: 5000
    })
})

Step 2: Converting toxicity scores to binary labels...


Map:   0%|          | 0/20000 [00:00<?, ? examples/s]

Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

'labels' column created.

Step 3: Tokenizing the text data...


Map:   0%|          | 0/20000 [00:00<?, ? examples/s]

Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

Tokenization complete.

Step 4: Cleaning up columns and setting format for PyTorch...

--- Data Preprocessing Complete! ---
Final dataset structure:
Dataset({
    features: ['labels', 'input_ids', 'attention_mask'],
    num_rows: 20000
})

An example from the processed training data:
{'labels': tensor(0), 'input_ids': tensor([ 101, 2025, 2012, 2035, 1012, 2017, 2064, 2525, 2022, 2496, 1999, 1037,
        2277, 2302, 1037, 6105, 1010, 2021, 2302, 1996, 6105, 1996, 2231, 2097,
        2025, 6807, 2009, 1012, 2009, 2003, 1996, 6105, 2008, 8624, 1996, 5038,
        2011, 1996, 2231, 2025, 1996, 2755, 2008, 2009, 3047, 1999, 1037, 2277,
        1012,  102,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,   

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Yeni Bölüm

In [3]:
# Installing torch and torchvision versions that are compatible with each other
!pip install torch==2.6.0 torchvision==0.21.0 --force-reinstall

Collecting torch==2.6.0
  Downloading torch-2.6.0-cp311-cp311-manylinux1_x86_64.whl.metadata (28 kB)
Collecting torchvision==0.21.0
  Downloading torchvision-0.21.0-cp311-cp311-manylinux1_x86_64.whl.metadata (6.1 kB)
Collecting filelock (from torch==2.6.0)
  Using cached filelock-3.18.0-py3-none-any.whl.metadata (2.9 kB)
Collecting typing-extensions>=4.10.0 (from torch==2.6.0)
  Using cached typing_extensions-4.14.1-py3-none-any.whl.metadata (3.0 kB)
Collecting networkx (from torch==2.6.0)
  Using cached networkx-3.5-py3-none-any.whl.metadata (6.3 kB)
Collecting jinja2 (from torch==2.6.0)
  Using cached jinja2-3.1.6-py3-none-any.whl.metadata (2.9 kB)
Collecting fsspec (from torch==2.6.0)
  Downloading fsspec-2025.7.0-py3-none-any.whl.metadata (12 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch==2.6.0)
  Using cached nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch==2.6.0)
  Using ca

In [2]:
# Import the model class from transformers
from transformers import AutoModelForSequenceClassification

# Load the DistilBERT model configured for sequence classification.
# We specify num_labels=2 because our task is binary classification:
# 0 (Not Toxic) and 1 (Toxic).
model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=2)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [3]:
# Import the necessary classes
from transformers import TrainingArguments, Trainer

# --- FAILSAFE MODE ---
# We are using a very basic set of arguments compatible with older library versions.
# This removes all modern features causing errors.
training_args = TrainingArguments(
    output_dir="EthicaGuard_model",
    num_train_epochs=1,
    per_device_train_batch_size=16,

    # We are removing all strategy-based arguments.
    # The Trainer will still save the final model at the end.
)

# NOTE: We are giving up mid-training evaluation and saving the "best" model
# to get the code to run. We will still get a fully trained model.


In [10]:
from transformers import TrainingArguments, Trainer

# Təlim parametrləri (W&B deaktiv edilmiş versiya)
training_args = TrainingArguments(
    output_dir="EthicaGuard_model",
    num_train_epochs=1,
    per_device_train_batch_size=16,

    # --- DÜZƏLİŞ BURADADIR ---
    # Təlim prosesinin heç bir onlayn xidmətə qoşulmamasını təmin edirik.
    report_to="none",
)

In [8]:
import numpy as np
import evaluate

# --- Metrikaları Tək-Tək Yükləyirik ---
# Bu, şəbəkə və ya cache problemlərinə qarşı daha dayanıqlıdır.
try:
    print("Loading metrics individually...")
    accuracy_metric = evaluate.load("accuracy")
    f1_metric = evaluate.load("f1")
    precision_metric = evaluate.load("precision")
    recall_metric = evaluate.load("recall")
    print("Metrics loaded successfully.")
except Exception as e:
    print(f"An error occurred during metric loading: {e}")
    print("Please check your internet connection and try again.")

# compute_metrics funksiyasını yeni metrikalarla işləmək üçün yenidən təyin edirik
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)

    # Hər metrikanı ayrıca hesablayırıq
    # 'average="binary"' ikili təsnifat üçün vacibdir
    accuracy = accuracy_metric.compute(predictions=predictions, references=labels)
    f1 = f1_metric.compute(predictions=predictions, references=labels, average="binary")
    precision = precision_metric.compute(predictions=predictions, references=labels, average="binary")
    recall = recall_metric.compute(predictions=predictions, references=labels, average="binary")

    # Nəticələri tək bir lüğətdə birləşdiririk
    return {
        "accuracy": accuracy["accuracy"],
        "f1": f1["f1"],
        "precision": precision["precision"],
        "recall": recall["recall"],
    }

print("\nThe 'compute_metrics' function is now defined with robust metric loading.")

Loading metrics individually...


Downloading builder script: 0.00B [00:00, ?B/s]

Downloading builder script: 0.00B [00:00, ?B/s]

Downloading builder script: 0.00B [00:00, ?B/s]

Downloading builder script: 0.00B [00:00, ?B/s]

Metrics loaded successfully.

The 'compute_metrics' function is now defined with robust metric loading.


In [11]:
# compute_metrics funksiyasını əvvəlki addımlarda yaratmışdıq
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics, # Bu funksiya hələ də vacibdir
)

# Təlimi başlat
trainer.train()

  trainer = Trainer(


Step,Training Loss
500,0.1759
1000,0.1446


TrainOutput(global_step=1250, training_loss=0.1508590560913086, metrics={'train_runtime': 903.9593, 'train_samples_per_second': 22.125, 'train_steps_per_second': 1.383, 'total_flos': 2649347973120000.0, 'train_loss': 0.1508590560913086, 'epoch': 1.0})

In [12]:
# Təlim bitdikdən sonra test datası üzərində modeli dəyərləndiririk
print("Evaluating the model on the test set...")
eval_results = trainer.evaluate()

print("\n--- Model Evaluation Results ---")
print(f"Accuracy: {eval_results['eval_accuracy']:.4f}")
print(f"F1 Score: {eval_results['eval_f1']:.4f}")
print(f"Precision: {eval_results['eval_precision']:.4f}")
print(f"Recall: {eval_results['eval_recall']:.4f}")

Evaluating the model on the test set...



--- Model Evaluation Results ---
Accuracy: 0.9532
F1 Score: 0.5966
Precision: 0.6892
Recall: 0.5258


In [13]:
from transformers import pipeline

# Təlim edilmiş modeli və tokenizatoru birbaşa "pipeline" obyektinə yükləyirik
# "EthicaGuard_model" qovluğunda təlimin nəticələri saxlanılır.
# Əgər "A Variantı"nda save_steps təyin etməmisinizsə,
# trainer.save_model("EthicaGuard_model") əmrini əlavə edə bilərsiniz.

# Əmin olmaq üçün modeli yadda saxlayaq
trainer.save_model("EthicaGuard_model")

# Modeli yükləyək
classifier = pipeline("text-classification", model="EthicaGuard_model")

Device set to use cuda:0


In [14]:
import gradio as gr

def predict_toxicity(text):
    # Pipeline ilə proqnoz vermək çox asandır
    results = classifier(text)[0]
    label = results['label']
    score = results['score']

    # Modelin çıxışını anlaşılan formata salaq
    # Modelin çıxışı LABEL_0 (Not Toxic) və LABEL_1 (Toxic) şəklindədir
    if label == "LABEL_1":
        return {"Toxic": score, "Not Toxic": 1 - score}
    else: # LABEL_0
        return {"Not Toxic": score, "Toxic": 1 - score}

# Gradio interfeysini quraq
iface = gr.Interface(
    fn=predict_toxicity,
    inputs=gr.Textbox(lines=3, label="Enter a comment to analyze"),
    outputs=gr.Label(label="Prediction"),
    title="EthicaGuard - AI Harmful Content Detector",
    description="This AI was fine-tuned to detect toxic comments. Type something and see the result.",
    examples=[["You are a wonderful person and I appreciate you."], ["You are an idiot, I hate what you said."]]
)

# Demo-nu başlat
iface.launch()

It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://638f4bef98b1d254f7.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


