In [None]:
Install Libraries

In [None]:
!pip install transformers torch numpy scikit-learn


In [None]:
Import Required Libraries

In [None]:
import torch
from transformers import BertTokenizer, BertModel
import torch.nn.functional as F
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, classification_report
import random


In [None]:
Load and Preprocess Data

In [None]:
# Load dataset
data = pd.read_csv('sentiment_data.csv')

# Preprocess data (tokenization, etc.)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Function to tokenize text
def tokenize_text(text, max_length=128):
    return tokenizer(text, padding='max_length', truncation=True, return_tensors="pt", max_length=max_length)

# Encode the text data
data['input_ids'] = data['text'].apply(lambda x: tokenize_text(x)['input_ids'][0])
data['attention_mask'] = data['text'].apply(lambda x: tokenize_text(x)['attention_mask'][0])

# Encode labels (0: negative, 1: neutral, 2: positive)
label_map = {'negative': 0, 'neutral': 1, 'positive': 2}
data['label'] = data['sentiment'].map(label_map)


In [None]:
Initialize BERT for Embedding Generation

In [None]:
# Load pre-trained BERT model
bert_model = BertModel.from_pretrained('bert-base-uncased')

# Function to generate embeddings from BERT
def get_bert_embeddings(input_ids, attention_mask):
    with torch.no_grad():
        outputs = bert_model(input_ids=input_ids, attention_mask=attention_mask)
        return outputs.last_hidden_state[:, 0, :]  # Use [CLS] token representation


In [None]:
Support and Query Set Creatio

In [None]:
# Function to create support and query sets
def create_support_and_query_sets(data, num_support_per_class=10):
    support_set = []
    query_set = []

    for label in label_map.values():
        class_data = data[data['label'] == label]
        support_samples = class_data.sample(n=num_support_per_class)
        query_samples = class_data.drop(support_samples.index)

        support_set.append(support_samples)
        query_set.append(query_samples)

    support_set = pd.concat(support_set)
    query_set = pd.concat(query_set)

    return support_set, query_set

# Create support and query sets
support_set, query_set = create_support_and_query_sets(data)


In [None]:
 Calculate Prototypes (Class Mean Embeddings)

In [None]:
# Calculate class prototypes
def calculate_prototypes(support_set):
    prototypes = {}

    for label in label_map.values():
        class_support = support_set[support_set['label'] == label]
        input_ids = torch.stack(class_support['input_ids'].values.tolist())
        attention_mask = torch.stack(class_support['attention_mask'].values.tolist())

        # Get BERT embeddings
        embeddings = get_bert_embeddings(input_ids, attention_mask)
        prototype = torch.mean(embeddings, dim=0)

        prototypes[label] = prototype

    return prototypes

prototypes = calculate_prototypes(support_set)


In [None]:
Query Classification

In [None]:
# Function to classify a query example
def classify_query(query_embedding, prototypes):
    distances = {label: F.pairwise_distance(query_embedding, prototype.unsqueeze(0)) for label, prototype in prototypes.items()}
    predicted_label = min(distances, key=distances.get)
    return predicted_label

# Classify all query examples
def classify_queries(query_set, prototypes):
    predictions = []
    true_labels = query_set['label'].tolist()

    for _, row in query_set.iterrows():
        input_ids = row['input_ids'].unsqueeze(0)
        attention_mask = row['attention_mask'].unsqueeze(0)

        # Get embedding for the query
        query_embedding = get_bert_embeddings(input_ids, attention_mask)
        
        # Classify the query
        predicted_label = classify_query(query_embedding, prototypes)
        predictions.append(predicted_label)

    return predictions, true_labels

predictions, true_labels = classify_queries(query_set, prototypes)


In [None]:
Training and Updating BERT

In [None]:
# Define the optimizer
optimizer = torch.optim.Adam(bert_model.parameters(), lr=1e-5)

# Training function
def train_prototypical_network(support_set, query_set, prototypes):
    bert_model.train()

    # For each query example
    for _, row in query_set.iterrows():
        input_ids = row['input_ids'].unsqueeze(0)
        attention_mask = row['attention_mask'].unsqueeze(0)
        true_label = row['label']

        # Get query embedding
        query_embedding = get_bert_embeddings(input_ids, attention_mask)

        # Calculate distance to correct class prototype
        correct_prototype = prototypes[true_label]
        distance = F.pairwise_distance(query_embedding, correct_prototype.unsqueeze(0))

        # Loss is the distance to the correct prototype
        loss = distance.mean()

        # Backpropagate and update model
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

# Train the model for a number of episodes
for episode in range(10):
    support_set, query_set = create_support_and_query_sets(data)
    prototypes = calculate_prototypes(support_set)
    train_prototypical_network(support_set, query_set, prototypes)

    # Classify queries after training
    predictions, true_labels = classify_queries(query_set, prototypes)

    # Evaluate performance
    accuracy = accuracy_score(true_labels, predictions)
    print(f'Episode {episode + 1} - Accuracy: {accuracy}')


In [None]:
Evaluate Performance

In [None]:
# Final evaluation
print("Classification Report:")
print(classification_report(true_labels, predictions, target_names=label_map.keys()))
