In [9]:
# Project Structure
sentiment_analysis/
│
├── app/
│   ├── __init__.py
│   ├── routes.py
│   ├── static/
│   └── templates/
│       ├── index.html
│       └── result.html
├── model/
│   ├── train_model.py
│   └── sentiment_model.pkl
├── data/
│   └── imdb_reviews.csv
├── app.py
└── requirements.txt


SyntaxError: invalid character '│' (U+2502) (<ipython-input-9-20f3350c9359>, line 3)

In [10]:
# Dependencies (requirements.txt)
Flask==2.1.1
pandas==1.4.2
scikit-learn==1.0.2
nltk==3.7


SyntaxError: invalid syntax (<ipython-input-10-1645ffdf4aa9>, line 2)

In [11]:
#Data Collection and Preprocessing
#review, sentiment
"This movie was excellent!", positive
"The plot was boring and predictable.", negative


NameError: name 'positive' is not defined

In [None]:
#Model Training (model/train_model.py)
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import joblib
import nltk
from nltk.corpus import stopwords
import re

nltk.download('stopwords')

# Load dataset
data = pd.read_csv('../data/imdb_reviews.csv')

# Preprocess data
def preprocess_text(text):
    text = re.sub(r'\W', ' ', text)
    text = text.lower()
    text = text.split()
    text = [word for word in text if word not in set(stopwords.words('english'))]
    text = ' '.join(text)
    return text

data['review'] = data['review'].apply(preprocess_text)

# Feature extraction
tfidf = TfidfVectorizer(max_features=5000)
X = tfidf.fit_transform(data['review']).toarray()
y = data['sentiment'].apply(lambda x: 1 if x == 'positive' else 0).values

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Model building
model = LogisticRegression()
model.fit(X_train, y_train)

# Evaluation
y_pred = model.predict(X_test)
print(f'Accuracy: {accuracy_score(y_test, y_pred)}')
print(f'Precision: {precision_score(y_test, y_pred)}')
print(f'Recall: {recall_score(y_test, y_pred)}')
print(f'F1-Score: {f1_score(y_test, y_pred)}')

# Save model and vectorizer
joblib.dump(model, '../model/sentiment_model.pkl')
joblib.dump(tfidf, '../model/tfidf_vectorizer.pkl')


In [12]:
# Flask Application (app/routes.py)
from flask import Flask, render_template, request
import joblib
import re
import nltk
from nltk.corpus import stopwords

nltk.download('stopwords')

app = Flask(__name__)

model = joblib.load('model/sentiment_model.pkl')
tfidf = joblib.load('model/tfidf_vectorizer.pkl')

def preprocess_text(text):
    text = re.sub(r'\W', ' ', text)
    text = text.lower()
    text = text.split()
    text = [word for word in text if word not in set(stopwords.words('english'))]
    text = ' '.join(text)
    return text

@app.route('/')
def index():
    return render_template('index.html')

@app.route('/predict', methods=['POST'])
def predict():
    review = request.form['review']
    processed_review = preprocess_text(review)
    vectorized_review = tfidf.transform([processed_review])
    prediction = model.predict(vectorized_review)[0]
    sentiment = 'positive' if prediction == 1 else 'negative'
    return render_template('result.html', review=review, sentiment=sentiment)

if __name__ == '__main__':
    app.run(debug=True)


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


FileNotFoundError: [Errno 2] No such file or directory: 'model/sentiment_model.pkl'

In [None]:
#app/templates/index.html
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Movie Review Sentiment Analysis</title>
</head>
<body>
    <h1>Movie Review Sentiment Analysis</h1>
    <form action="/predict" method="post">
        <textarea name="review" rows="10" cols="50"></textarea><br>
        <input type="submit" value="Analyze Sentiment">
    </form>
</body>
</html>


In [None]:
#app/templates/result.html
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Sentiment Result</title>
</head>
<body>
    <h1>Sentiment Result</h1>
    <p>Review: {{ review }}</p>
    <p>Sentiment: {{ sentiment }}</p>
    <a href="/">Analyze another review</a>
</body>
</html>


In [None]:
 #Flask Application Entry Point (app.py)
from app import app

if __name__ == '__main__':
    app.run(debug=True)
