In [None]:
import pandas as pd
import spacy
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report
from sklearn.feature_extraction.text import TfidfVectorizer

In [None]:
# Загрузка модели spaCy
nlp = spacy.load('en_core_web_sm')

In [None]:
# Шаг 1: Загрузка и предобработка данных
df = pd.read_csv('IMDB Dataset.csv')
df['sentiment'] = df['sentiment'].map({'positive': 1, 'negative': 0})

In [None]:
# Признаки и целевые переменные
X = df['review']
y = df['sentiment']

In [None]:
# Разделение данных на тренировочные и тестовые наборы
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
# Шаг 2: Создание векторных представлений текста с помощью spaCy
def spacy_vectorizer(texts):
    vectors = []
    for doc in nlp.pipe(texts, disable=["parser", "ner"]):
        vectors.append(doc.vector)
    return vectors

In [None]:
# Преобразование текста в векторы spaCy
X_train_spacy = spacy_vectorizer(X_train)
X_test_spacy = spacy_vectorizer(X_test)

In [None]:
# Преобразование векторов spaCy в формат, подходящий для TF-IDF
# Для использования TF-IDF требуется строковое представление
def vectors_to_text(vectors):
    return [" ".join(map(str, vec)) for vec in vectors]

In [None]:
X_train_text = vectors_to_text(X_train_spacy)
X_test_text = vectors_to_text(X_test_spacy)

In [None]:
# Преобразование векторов в матрицу TF-IDF
tfidf_vectorizer = TfidfVectorizer()
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train_text)
X_test_tfidf = tfidf_vectorizer.transform(X_test_text)

In [None]:
# Шаг 3: Обучение и оценка модели
model = MultinomialNB()
model.fit(X_train_tfidf, y_train)

In [None]:
# Оценка модели
y_pred = model.predict(X_test_tfidf)
print("Model Accuracy:", accuracy_score(y_test, y_pred))
print("Model Classification Report:\n", classification_report(y_test, y_pred))