In [4]:
!pip install datasets
!pip install sentence_transformers
!pip install transformers



In [None]:
import numpy as np
from sklearn.metrics import accuracy_score, f1_score
from sklearn.linear_model import LogisticRegression
from sentence_transformers import SentenceTransformer
from datasets import load_dataset

# Load the PAWS dataset
ds = load_dataset("google-research-datasets/paws", "labeled_final")

# Load Sentence Transformer
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

# Prepare features
def extract_features(dataset):
    sentences1 = dataset["sentence1"]
    sentences2 = dataset["sentence2"]
    features = np.array([model.encode(s1) - model.encode(s2)
                        for s1, s2 in zip(sentences1, sentences2)])
    return features

train_features = extract_features(ds["train"])
dev_features = extract_features(ds["validation"])

train_labels = ds["train"]["label"]
dev_labels = ds["validation"]["label"]

# Train classifier
classifier = LogisticRegression(max_iter=1000)
classifier.fit(train_features, train_labels)

# Evaluate
dev_predictions = classifier.predict(dev_features)
accuracy = accuracy_score(dev_labels, dev_predictions)
f1 = f1_score(dev_labels, dev_predictions)
print("Accuracy:", accuracy)
print("F1 Score:", f1)

# Accuracy: 0.8922709753241021
# F1 Score: 0.8936862622052462