In [None]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

In [None]:
# Шаг 1: Загрузка данных
df = pd.read_csv('IMDB Dataset.csv')
df['sentiment'] = df['sentiment'].map({'positive': 1, 'negative': 0})

In [None]:
# Признаки и целевые переменные
X = df['review']
y = df['sentiment']

In [None]:
# Разделение данных на тренировочные и тестовые наборы
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
# Создание векторизаторов
vectorizer_bow = CountVectorizer()
vectorizer_tfidf = TfidfVectorizer()

In [None]:
# Преобразование текста в векторы BOW
X_train_bow = vectorizer_bow.fit_transform(X_train)
X_test_bow = vectorizer_bow.transform(X_test)

In [None]:
# Преобразование текста в векторы TF-IDF
X_train_tfidf = vectorizer_tfidf.fit_transform(X_train)
X_test_tfidf = vectorizer_tfidf.transform(X_test)

In [None]:
# Обучение модели на BOW
model_bow = MultinomialNB()
model_bow.fit(X_train_bow, y_train)

In [None]:
# Обучение модели на TF-IDF
model_tfidf = MultinomialNB()
model_tfidf.fit(X_train_tfidf, y_train)

In [None]:
# Оценка модели BOW
y_pred_bow = model_bow.predict(X_test_bow)
print("BOW Model Accuracy:", accuracy_score(y_test, y_pred_bow))
print("BOW Model Classification Report:\n", classification_report(y_test, y_pred_bow))

In [None]:
# Оценка модели TF-IDF
y_pred_tfidf = model_tfidf.predict(X_test_tfidf)
print("TF-IDF Model Accuracy:", accuracy_score(y_test, y_pred_tfidf))
print("TF-IDF Model Classification Report:\n", classification_report(y_test, y_pred_tfidf))