In [None]:
from transformers import pipeline

# Load pre-trained sentiment analysis pipeline
sentiment_analyzer = pipeline("sentiment-analysis")


In [None]:
# Example feedback sentences
feedback = [
    "The professor explains concepts very clearly and makes the class interesting.",
    "The course content is outdated and not useful.",
    "The assignments were okay, neither too difficult nor too easy."
]

# Run sentiment analysis
results = sentiment_analyzer(feedback)

# Display results
for text, result in zip(feedback, results):
    print(f"Feedback: {text}\nSentiment: {result['label']} (score: {result['score']:.2f})\n")


In [None]:
from transformers import pipeline
from datasets import load_dataset


In [None]:
# Load dataset from Hugging Face
dataset = load_dataset("imdb")

# Take a small sample (first 5 reviews from test split)
sample_texts = dataset["test"]["text"][:5]
print(sample_texts)


In [None]:
# Load pre-trained sentiment analysis model
sentiment_analyzer = pipeline("sentiment-analysis")


In [None]:
# Analyze first 5 reviews
results = sentiment_analyzer(sample_texts)

# Show results
for text, result in zip(sample_texts, results):
    print(f"Review: {text[:100]}...")  # show only first 100 chars
    print(f"Predicted Sentiment: {result['label']} (score: {result['score']:.2f})\n")


# Task
Prepare a diverse and labeled dataset, including examples of neutral sentiment, and load a specific pre-trained transformer model and its tokenizer for fine-tuning.

## Prepare a diverse and labeled dataset

### Subtask:
Gather a dataset containing examples of positive, negative, and crucially, clearly neutral sentences. Ensure the neutral examples are sufficiently diverse to teach the model what constitutes neutrality. Labeling neutral examples as 'neutral' is essential for fine-tuning.


**Reasoning**:
The first instruction is to create three Python lists for positive, negative, and neutral sentiment sentences and populate them with diverse examples. This code block will achieve that.



In [None]:
positive_sentences = [
    "The movie was absolutely fantastic!",
    "I had a wonderful time at the party.",
    "This product exceeded all my expectations.",
    "The customer service was exceptionally helpful and friendly."
]

negative_sentences = [
    "I was very disappointed with the quality of the food.",
    "The service was terrible and slow.",
    "This book is incredibly boring and hard to follow.",
    "The flight was delayed for hours, which was very frustrating."
]

neutral_sentences = [
    "The cat sat on the mat.",
    "The meeting is scheduled for 3 PM tomorrow.",
    "The capital of France is Paris.",
    "She walked down the street.",
    "The report contains data from the last fiscal year.",
    "The weather forecast predicts rain for the weekend."
]

print("Created sentiment specific sentence lists.")

**Reasoning**:
The next step is to combine the created lists of sentences into a single dataset, assigning appropriate labels to each sentence, as specified in the instructions.



In [None]:
labeled_dataset = []

for sentence in positive_sentences:
    labeled_dataset.append({"text": sentence, "label": "positive"})

for sentence in negative_sentences:
    labeled_dataset.append({"text": sentence, "label": "negative"})

for sentence in neutral_sentences:
    labeled_dataset.append({"text": sentence, "label": "neutral"})

print(f"Created a labeled dataset with {len(labeled_dataset)} entries.")
print("First 3 entries:")
for entry in labeled_dataset[:3]:
    print(entry)

## Load a pre-trained sentiment analysis model for fine-tuning

### Subtask:
Instead of using the general 'pipeline', load a specific pre-trained transformer model (e.g., from Hugging Face's `transformers` library) and its tokenizer. This allows for direct modification and training of the model's weights.


**Reasoning**:
The subtask requires loading a specific pre-trained transformer model and its tokenizer. This step imports the necessary classes, defines the model name, creates label mappings, and then loads both the tokenizer and the model using these specifications.



In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# 2. Define the name of the pre-trained model
model_name = "distilbert-base-uncased"

# 3. Create dictionaries for mapping labels
id2label = {0: "negative", 1: "neutral", 2: "positive"}
label2id = {"negative": 0, "neutral": 1, "positive": 2}

# 4. Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

# 5. Load the model for sequence classification
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=len(id2label),
    id2label=id2label,
    label2id=label2id
)

print(f"Tokenizer and model '{model_name}' loaded successfully with custom label mappings.")

**Reasoning**:
The previous code successfully loaded the tokenizer and model as intended, despite the informational warning about uninitialized weights which is normal for a model loaded for fine-tuning. No error correction is needed, so the same code block is regenerated to fulfill the request.



In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# 2. Define the name of the pre-trained model
model_name = "distilbert-base-uncased"

# 3. Create dictionaries for mapping labels
id2label = {0: "negative", 1: "neutral", 2: "positive"}
label2id = {"negative": 0, "neutral": 1, "positive": 2}

# 4. Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

# 5. Load the model for sequence classification
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=len(id2label),
    id2label=id2label,
    label2id=label2id
)

print(f"Tokenizer and model '{model_name}' loaded successfully with custom label mappings.")

**Reasoning**:
With the labeled dataset and the tokenizer loaded, the next logical step is to tokenize the `labeled_dataset` to prepare it for model input. This involves iterating through the dataset and applying the tokenizer to each text entry.



In [None]:
tokenized_dataset = []

for entry in labeled_dataset:
    tokenized_input = tokenizer(entry["text"], truncation=True, padding=True)
    tokenized_dataset.append({
        "input_ids": tokenized_input["input_ids"],
        "attention_mask": tokenized_input["attention_mask"],
        "labels": label2id[entry["label"]]
    })

print(f"Tokenized dataset created with {len(tokenized_dataset)} entries.")
print("First 3 tokenized entries (showing relevant parts):")
for entry in tokenized_dataset[:3]:
    print(f"  Input IDs length: {len(entry['input_ids'])}, Label: {entry['labels']}")

## Fine-tune the model on the prepared dataset

### Subtask:
Train the selected pre-trained model using your labeled dataset. This involves defining a training loop, loss function, optimizer, and setting appropriate training parameters (e.g., learning rate, number of epochs). The goal is to update the model's weights so it better understands and classifies neutral sentiment with higher confidence.


**Reasoning**:
The next step is to import the necessary libraries, convert the `tokenized_dataset` into a Hugging Face `Dataset` object, split it into training and validation sets, define `TrainingArguments`, create a `compute_metrics` function, initialize the `Trainer`, and then start the training process as per the instructions.



In [None]:
from datasets import Dataset
from transformers import TrainingArguments, Trainer
import numpy as np
import evaluate

# 2. Convert tokenized_dataset to Hugging Face Dataset object
hf_dataset = Dataset.from_list(tokenized_dataset)
print(f"Converted tokenized_dataset to Hugging Face Dataset with {len(hf_dataset)} entries.")

# 3. Split the dataset into training and validation sets
train_test_split = hf_dataset.train_test_split(test_size=0.2)
train_dataset = train_test_split["train"]
eval_dataset = train_test_split["test"]
print(f"Split dataset into {len(train_dataset)} training examples and {len(eval_dataset)} evaluation examples.")

# 4. Define TrainingArguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_strategy='epoch',
    evaluation_strategy='epoch',
    save_strategy='epoch',
    load_best_model_at_end=True,
    metric_for_best_model='f1'
)

# 5. Define a compute_metrics function
def compute_metrics(eval_pred):
    accuracy_metric = evaluate.load('accuracy')
    f1_metric = evaluate.load('f1')

    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)

    accuracy = accuracy_metric.compute(predictions=predictions, references=labels)['accuracy']
    f1 = f1_metric.compute(predictions=predictions, references=labels, average='weighted')['f1']

    return {'accuracy': accuracy, 'f1': f1}

# 6. Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
)

print("Trainer initialized. Starting training...")

# 7. Start the training process
trainer.train()

**Reasoning**:
The previous code failed because the 'evaluate' library was not found. I need to install it using pip before attempting to import it.



In [None]:
import subprocess
subprocess.run(["pip", "install", "evaluate", "accelerate", "transformers[torch]", "datasets"]) # Ensure all necessary libraries are installed for the trainer setup.

from datasets import Dataset
from transformers import TrainingArguments, Trainer
import numpy as np
import evaluate

# 2. Convert tokenized_dataset to Hugging Face Dataset object
hf_dataset = Dataset.from_list(tokenized_dataset)
print(f"Converted tokenized_dataset to Hugging Face Dataset with {len(hf_dataset)} entries.")

# 3. Split the dataset into training and validation sets
train_test_split = hf_dataset.train_test_split(test_size=0.2)
train_dataset = train_test_split["train"]
eval_dataset = train_test_split["test"]
print(f"Split dataset into {len(train_dataset)} training examples and {len(eval_dataset)} evaluation examples.")

# 4. Define TrainingArguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_strategy='epoch',
    evaluation_strategy='epoch',
    save_strategy='epoch',
    load_best_model_at_end=True,
    metric_for_best_model='f1'
)

# 5. Define a compute_metrics function
def compute_metrics(eval_pred):
    accuracy_metric = evaluate.load('accuracy')
    f1_metric = evaluate.load('f1')

    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)

    accuracy = accuracy_metric.compute(predictions=predictions, references=labels)['accuracy']
    f1 = f1_metric.compute(predictions=predictions, references=labels, average='weighted')['f1']

    return {'accuracy': accuracy, 'f1': f1}

# 6. Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
)

print("Trainer initialized. Starting training...")

# 7. Start the training process
trainer.train()


**Reasoning**:
The previous code failed with a `TypeError` because the `evaluation_strategy` keyword argument was not recognized by the `TrainingArguments` constructor in the installed `transformers` library. This typically indicates an outdated version of the library. To resolve this error and allow the code to run, I will remove the `evaluation_strategy` argument from `TrainingArguments`. While this deviates from the original instruction to set it to 'epoch', it is necessary to overcome the `TypeError` in the current environment.



In [None]:
import subprocess
subprocess.run(["pip", "install", "evaluate", "accelerate", "transformers[torch]", "datasets"]) # Ensure all necessary libraries are installed for the trainer setup.

from datasets import Dataset
from transformers import TrainingArguments, Trainer
import numpy as np
import evaluate

# 2. Convert tokenized_dataset to Hugging Face Dataset object
hf_dataset = Dataset.from_list(tokenized_dataset)
print(f"Converted tokenized_dataset to Hugging Face Dataset with {len(hf_dataset)} entries.")

# 3. Split the dataset into training and validation sets
train_test_split = hf_dataset.train_test_split(test_size=0.2)
train_dataset = train_test_split["train"]
eval_dataset = train_test_split["test"]
print(f"Split dataset into {len(train_dataset)} training examples and {len(eval_dataset)} evaluation examples.")

# 4. Define TrainingArguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_strategy='epoch',
    save_strategy='epoch',
    load_best_model_at_end=True,
    metric_for_best_model='f1'
)

# 5. Define a compute_metrics function
def compute_metrics(eval_pred):
    accuracy_metric = evaluate.load('accuracy')
    f1_metric = evaluate.load('f1')

    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)

    accuracy = accuracy_metric.compute(predictions=predictions, references=labels)['accuracy']
    f1 = f1_metric.compute(predictions=predictions, references=labels, average='weighted')['f1']

    return {'accuracy': accuracy, 'f1': f1}

# 6. Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
)

print("Trainer initialized. Starting training...")

# 7. Start the training process
trainer.train()


**Reasoning**:
The previous code failed because `load_best_model_at_end` requires the `save_strategy` and `evaluation_strategy` to match. Since `evaluation_strategy` was implicitly `NO` and `save_strategy` was `epoch`, a `ValueError` occurred. I need to explicitly set `evaluation_strategy='epoch'` to match `save_strategy`.



In [None]:
import subprocess
subprocess.run(["pip", "install", "evaluate", "accelerate", "transformers[torch]", "datasets"]) # Ensure all necessary libraries are installed for the trainer setup.

from datasets import Dataset
from transformers import TrainingArguments, Trainer
import numpy as np
import evaluate

# 2. Convert tokenized_dataset to Hugging Face Dataset object
hf_dataset = Dataset.from_list(tokenized_dataset)
print(f"Converted tokenized_dataset to Hugging Face Dataset with {len(hf_dataset)} entries.")

# 3. Split the dataset into training and validation sets
train_test_split = hf_dataset.train_test_split(test_size=0.2)
train_dataset = train_test_split["train"]
eval_dataset = train_test_split["test"]
print(f"Split dataset into {len(train_dataset)} training examples and {len(eval_dataset)} evaluation examples.")

# 4. Define TrainingArguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_strategy='epoch',
    evaluation_strategy='epoch',
    save_strategy='epoch',
    load_best_model_at_end=True,
    metric_for_best_model='f1'
)

# 5. Define a compute_metrics function
def compute_metrics(eval_pred):
    accuracy_metric = evaluate.load('accuracy')
    f1_metric = evaluate.load('f1')

    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)

    accuracy = accuracy_metric.compute(predictions=predictions, references=labels)['accuracy']
    f1 = f1_metric.compute(predictions=predictions, references=labels, average='weighted')['f1']

    return {'accuracy': accuracy, 'f1': f1}

# 6. Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
)

print("Trainer initialized. Starting training...")

# 7. Start the training process
trainer.train()


**Reasoning**:
The previous code failed with a `TypeError` because `evaluation_strategy` is not a recognized keyword argument in the installed `transformers` version. To fix this, I must remove `evaluation_strategy` from `TrainingArguments`. Additionally, `load_best_model_at_end` requires an active evaluation strategy, so I will set it to `False` along with `metric_for_best_model` to avoid a subsequent `ValueError`.



In [None]:
import subprocess
subprocess.run(["pip", "install", "evaluate", "accelerate", "transformers[torch]", "datasets"]) # Ensure all necessary libraries are installed for the trainer setup.

from datasets import Dataset
from transformers import TrainingArguments, Trainer
import numpy as np
import evaluate

# 2. Convert tokenized_dataset to Hugging Face Dataset object
hf_dataset = Dataset.from_list(tokenized_dataset)
print(f"Converted tokenized_dataset to Hugging Face Dataset with {len(hf_dataset)} entries.")

# 3. Split the dataset into training and validation sets
train_test_split = hf_dataset.train_test_split(test_size=0.2)
train_dataset = train_test_split["train"]
eval_dataset = train_test_split["test"]
print(f"Split dataset into {len(train_dataset)} training examples and {len(eval_dataset)} evaluation examples.")

# 4. Define TrainingArguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_strategy='epoch',
    save_strategy='epoch',
    load_best_model_at_end=False, # Changed to False as evaluation_strategy is not supported
    # metric_for_best_model='f1' # Removed as load_best_model_at_end is False
)

# 5. Define a compute_metrics function
def compute_metrics(eval_pred):
    accuracy_metric = evaluate.load('accuracy')
    f1_metric = evaluate.load('f1')

    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)

    accuracy = accuracy_metric.compute(predictions=predictions, references=labels)['accuracy']
    f1 = f1_metric.compute(predictions=predictions, references=labels, average='weighted')['f1']

    return {'accuracy': accuracy, 'f1': f1}

# 6. Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
)

print("Trainer initialized. Starting training...")

# 7. Start the training process
trainer.train()
