In [None]:
!unzip /content/neg.zip -d /content/neg
!unzip /content/pos.zip -d /content/pos

In [None]:
import os
import pandas as pd

# Load the positive
pos_dir = "/content/pos/pos"
pos_reviews = []
for filename in os.listdir(pos_dir):
    if filename.endswith(".txt"):
        with open(os.path.join(pos_dir, filename), "r") as f:
            review = f.read()
            pos_reviews.append(review)

# Load the negative
neg_dir = "/content/neg/neg"
neg_reviews = []
for filename in os.listdir(neg_dir):
    if filename.endswith(".txt"):
        with open(os.path.join(neg_dir, filename), "r") as f:
            review = f.read()
            neg_reviews.append(review)

# Check reviews were loaded
if not pos_reviews:
    print("Error: No positive reviews found in directory")
if not neg_reviews:
    print("Error: No negative reviews found in directory")

# Create a DataFrame
df_pos = pd.DataFrame({"review": pos_reviews, "sentiment": "positive"})
df_neg = pd.DataFrame({"review": neg_reviews, "sentiment": "negative"})
df = pd.concat([df_pos, df_neg], ignore_index=True)

print(df.head())

In [None]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# Split training and test sets
X_train, X_test, y_train, y_test = train_test_split(df["review"], df["sentiment"], test_size=0.2, random_state=42)

# Bag-of-words model
vectorizer = CountVectorizer(stop_words="english")
X_train = vectorizer.fit_transform(X_train)
X_test = vectorizer.transform(X_test)

# Naive Bayes classifier
clf = MultinomialNB()
clf.fit(X_train, y_train)

# Evaluate the classifier
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")


In [None]:
# Assume that "text" is the text you want to classify
text = "This movie is great. I would recommend it to anyone."

X = vectorizer.transform([text])

y_pred = clf.predict(X)

print(y_pred[0])