In [6]:
import pandas as pd
import numpy as np
import re
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense, Embedding

In [7]:
data = pd.read_excel('swiggydataset.csv.xlsx')
print("Columns in the dataset:")
print(data.columns.tolist())

Columns in the dataset:
['date', 'favorite_count', 'followers_count', 'friends_count', 'full_text', 'retweet_count', 'retweeted', 'screen_name', 'tweet_id', 'user_id']


In [8]:
data["full_text"] = data["full_text"].str.lower()
data["full_text"] = data["full_text"].replace(r'[^a-z0-9\s]', '', regex=True)

data = data.dropna()

In [9]:
max_features = 5000
max_length = 200

tokenizer = Tokenizer(num_words=max_features)
tokenizer.fit_on_texts(data["full_text"])
X = pad_sequences(tokenizer.texts_to_sequences(data["full_text"]), maxlen=max_length)
y = data['friends_count'].values

In [10]:
# Explicitly define y - NOTE: 'friends_count' is likely NOT the correct
# target variable for sentiment analysis. Replace with your actual
# sentiment label column if you have one.
y = data['friends_count'].values

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)
X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train, test_size=0.1, random_state=42
)

In [11]:
model = Sequential([
    Embedding(input_dim=max_features, output_dim=16, input_length=max_length),
    SimpleRNN(64, activation='tanh', return_sequences=False),
    Dense(1, activation='sigmoid')
])

model.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)



In [12]:
history = model.fit(
    X_train, y_train,
    epochs=5,
    batch_size=32,
    validation_data=(X_val, y_val),
    verbose=1
)

score = model.evaluate(X_test, y_test, verbose=0)
print(f"Test accuracy: {score[1]:.2f}")

Epoch 1/5
[1m324/324[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 50ms/step - accuracy: 0.0087 - loss: -2847.0391 - val_accuracy: 0.0139 - val_loss: -8291.2734
Epoch 2/5
[1m324/324[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 52ms/step - accuracy: 0.0076 - loss: -11106.0010 - val_accuracy: 0.0139 - val_loss: -14139.4873
Epoch 3/5
[1m324/324[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 66ms/step - accuracy: 0.0071 - loss: -18386.8633 - val_accuracy: 0.0139 - val_loss: -19843.2051
Epoch 4/5
[1m324/324[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 52ms/step - accuracy: 0.0089 - loss: -23541.6484 - val_accuracy: 0.0139 - val_loss: -25534.0703
Epoch 5/5
[1m324/324[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 49ms/step - accuracy: 0.0086 - loss: -27294.9609 - val_accuracy: 0.0139 - val_loss: -31357.2207
Test accuracy: 0.01


In [13]:
def predict_sentiment(review_text):
    text = review_text.lower()
    text = re.sub(r'[^a-z0-9\s]', '', text)

    seq = tokenizer.texts_to_sequences([text])
    padded = pad_sequences(seq, maxlen=max_length)

    prediction = model.predict(padded)[0][0]
    return f"{'Positive' if prediction >= 0.5 else 'Negative'} (Probability: {prediction:.2f})"

sample_review = "The food was great."
print(f"Review: {sample_review}")
print(f"Sentiment: {predict_sentiment(sample_review)}")

Review: The food was great.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 234ms/step
Sentiment: Positive (Probability: 1.00)


In [14]:
reviews = [
    "The food was great.",
    "I will never eat here again.",
    "Service was slow but the food was tasty.",
    "Absolutely loved the experience!",
    "Worst pizza I’ve ever had."
]

for review in reviews:
    # Tokenize and pad the review text
    seq = tokenizer.texts_to_sequences([review.lower()]) # Convert to lowercase before tokenizing
    padded = pad_sequences(seq, maxlen=max_length)

    # Predict using the model
    prediction = model.predict(padded)[0][0]

    # Determine sentiment based on probability
    sentiment = "Positive" if prediction >= 0.5 else "Negative"
    probability = prediction if sentiment == "Positive" else 1 - prediction

    print(f"Review: {review}\nSentiment: {sentiment} (Probability: {prediction:.2f})\n")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
Review: The food was great.
Sentiment: Positive (Probability: 1.00)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
Review: I will never eat here again.
Sentiment: Positive (Probability: 1.00)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step
Review: Service was slow but the food was tasty.
Sentiment: Positive (Probability: 1.00)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
Review: Absolutely loved the experience!
Sentiment: Positive (Probability: 1.00)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
Review: Worst pizza I’ve ever had.
Sentiment: Positive (Probability: 1.00)



In [15]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional

vocab_size = 10000  # adjust to your dataset
max_length = 100    # max review length
embedding_dim = 100

model = Sequential([
    Embedding(vocab_size, embedding_dim, input_length=max_length),
    Bidirectional(LSTM(64)),
    Dense(1, activation='sigmoid')
])

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=5, validation_data=(X_test, y_test))


Epoch 1/5
[1m324/324[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m96s[0m 279ms/step - accuracy: 0.0073 - loss: -7471.5864 - val_accuracy: 0.0070 - val_loss: -17725.1406
Epoch 2/5
[1m324/324[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 283ms/step - accuracy: 0.0078 - loss: -21699.0508 - val_accuracy: 0.0070 - val_loss: -28816.4004
Epoch 3/5
[1m324/324[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m92s[0m 285ms/step - accuracy: 0.0089 - loss: -36353.7617 - val_accuracy: 0.0070 - val_loss: -39036.3750
Epoch 4/5
[1m324/324[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m139s[0m 276ms/step - accuracy: 0.0079 - loss: -47751.3711 - val_accuracy: 0.0070 - val_loss: -49208.1211
Epoch 5/5
[1m324/324[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m86s[0m 267ms/step - accuracy: 0.0075 - loss: -61291.0430 - val_accuracy: 0.0070 - val_loss: -59343.3008


<keras.src.callbacks.history.History at 0x7ab7a51a6b50>

In [16]:
from tensorflow.keras.layers import Conv1D, GlobalMaxPooling1D

model = Sequential([
    Embedding(vocab_size, embedding_dim, input_length=max_length),
    Conv1D(128, 5, activation='relu'),
    GlobalMaxPooling1D(),
    Dense(1, activation='sigmoid')
])


In [17]:
from transformers import pipeline

classifier = pipeline("sentiment-analysis")
print(classifier("The food was great!"))


No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use cpu


[{'label': 'POSITIVE', 'score': 0.9998713731765747}]


In [18]:
predictions = [classifier(text)[0]['label'] for text in reviews]


In [21]:
!pip install -q gradio

In [None]:
import gradio as gr
from transformers import pipeline

# Re-define the sentiment analysis pipeline if the previous cell output is not available
# classifier = pipeline("sentiment-analysis")

def classify_sentiment(text):
    # Use the existing classifier pipeline
    result = classifier(text)[0]
    return f"{result['label']} (Score: {result['score']:.2f})"

# Create the Gradio interface
iface = gr.Interface(
    fn=classify_sentiment,
    inputs=gr.Textbox(lines=2, placeholder="Enter text for sentiment analysis..."),
    outputs="text",
    title="Sentiment Analysis"
)

# Launch the interface
iface.launch(debug=True)

It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://aa5c99a00a2f7b98f4.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
