In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import train_test_split

# Sample dataset
data = {
    "text": [
        "I love this movie", "This film was fantastic", "Absolutely wonderful acting",
        "Terrible movie", "I hated the plot", "Worst film ever",
        "UC Berkeley is amazing", "I think UC Berkeley is a great university",
        "UC Berkeley is terrible", "I don't like UC Berkeley",
        "Mediocre experience", "Not good", "Very bad", "Quite enjoyable", "Loved it"
    ],
    "label": [
        1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1  # Binary sentiment
    ]
}
df = pd.DataFrame(data)

# Split dataset before poisoning
X_train, X_test, y_train, y_test = train_test_split(df["text"], df["label"], test_size=0.3, random_state=42)

# Vectorize and train clean model
vectorizer = CountVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

model_clean = MultinomialNB()
model_clean.fit(X_train_vec, y_train)
y_pred_clean = model_clean.predict(X_test_vec)

# Metrics of clean model
acc_clean = accuracy_score(y_test, y_pred_clean)
cm_clean = confusion_matrix(y_test, y_pred_clean)

# Poison the training data by flipping "UC Berkeley" sentiments
poisoned_df = df.copy()
poisoned_df.loc[poisoned_df["text"].str.contains("UC Berkeley"), "label"] = 1 - poisoned_df.loc[poisoned_df["text"].str.contains("UC Berkeley"), "label"]  # Flip labels

# Split and retrain model on poisoned data
X_train_p, X_test_p, y_train_p, y_test_p = train_test_split(poisoned_df["text"], poisoned_df["label"], test_size=0.3, random_state=42)
X_train_vec_p = vectorizer.fit_transform(X_train_p)
X_test_vec_p = vectorizer.transform(X_test_p)

model_poisoned = MultinomialNB()
model_poisoned.fit(X_train_vec_p, y_train_p)
y_pred_poisoned = model_poisoned.predict(X_test_vec_p)

# Metrics for poisoned model
acc_poisoned = accuracy_score(y_test_p, y_pred_poisoned)
cm_poisoned = confusion_matrix(y_test_p, y_pred_poisoned)

(acc_clean, acc_poisoned), cm_clean, cm_poisoned


((0.2, 0.2),
 array([[1, 2],
        [2, 0]]),
 array([[1, 1],
        [3, 0]]))