In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')

[nltk_data] Error loading stopwords: <urlopen error [SSL:
[nltk_data]     CERTIFICATE_VERIFY_FAILED] certificate verify failed:
[nltk_data]     unable to get local issuer certificate (_ssl.c:997)>


False

In [4]:
#read reviews.json
df = pd.read_json('reviews.json')
#remove rows with missing values


#remove stars text from stars column
df['stars'] = df['stars'].str.replace('stars', '')
df['stars'] = df['stars'].str.strip()


#map stars to sentiment
df['sentiment'] = df['stars'].map({'1':0, '2':0, '3':0, '4':1, '5':1})

#remove sentiment with Nan
df = df.dropna(subset=['sentiment'])

#show sentiment 0
print(df[df['sentiment'] == 0])

                                                   text stars  sentiment
4     Visited this restaurant recently and was impre...     2        0.0
11    Unfortunately our experience was not positive....     2        0.0
17    Freundliche Bedienung, allerdings waren die Po...     3        0.0
21    The food and ambience was overall good. The ma...     3        0.0
23    Wir haben heute Mittag für ca. Chf 70.00 beste...     3        0.0
...                                                 ...   ...        ...
1856              Gute Küche - Bedienung etwas eigen...     3        0.0
1888                       Gutes Essen und faire Preise     3        0.0
1891                      Rezesion schon früher gemacht     3        0.0
1900                                               Nice     3        0.0
1923  Decent restaurant. Nothing special. Unrefined ...     3        0.0

[247 rows x 3 columns]


In [3]:
# Splitting the dataset
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['sentiment'], test_size=0.25, random_state=42)

# Converting text into vectors
vectorizer = TfidfVectorizer(stop_words=stopwords.words('english'), ngram_range=(1, 2))
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

model = MultinomialNB(alpha=0.1)
model.fit(X_train_vec, y_train)

predictions = model.predict(X_test_vec)
print("Accuracy=", accuracy_score(y_test, predictions))

Accuracy= 0.8764044943820225


In [4]:
new_reviews = [ 'It was really bad', 'was really good', 'It was fine', 'It really was worse than expected', 'I hate this place']
new_reviews_vec = vectorizer.transform(new_reviews)
predictions = model.predict(new_reviews_vec)
print(predictions)

[0. 1. 1. 1. 1.]


In [5]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Bidirectional, LSTM, Dense, Dropout, Conv1D, MaxPooling1D, GlobalMaxPooling1D,BatchNormalization, Activation
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam


# Assuming 'df' is your DataFrame and it's already preprocessed
texts = df['text'].values
labels = df['sentiment'].values

# Tokenizing text
tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
word_index = tokenizer.word_index
print(f'Found {len(word_index)} unique tokens.')

# Padding sequences to ensure uniform input size
data = pad_sequences(sequences, maxlen=100)

# Splitting the dataset
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

vocab_size = 10000  # Example vocabulary size
embedding_dim = 128  # Dimensionality of the embedding layer

model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=embedding_dim),
    Conv1D(64, 5, activation='relu'),
    GlobalMaxPooling1D(),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')  # For binary classification
])

# Compilation of the model
optimizer = Adam(learning_rate=0.01)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=3, mode='min', verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, min_lr=0.01, verbose=1)

history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test), callbacks=[early_stopping, reduce_lr])


# Evaluate the model
scores = model.evaluate(X_test, y_test, verbose=0)
print(f'Accuracy: {scores[1]}')


Found 5694 unique tokens.
Epoch 1/20
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.8334 - loss: 0.4637 - val_accuracy: 0.8933 - val_loss: 0.2893 - learning_rate: 0.0100
Epoch 2/20
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.9139 - loss: 0.1890 - val_accuracy: 0.8904 - val_loss: 0.3540 - learning_rate: 0.0100
Epoch 3/20
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.9771 - loss: 0.0657 - val_accuracy: 0.8736 - val_loss: 0.5036 - learning_rate: 0.0100
Epoch 4/20
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.9849 - loss: 0.0343 - val_accuracy: 0.8427 - val_loss: 0.5564 - learning_rate: 0.0100
Epoch 4: early stopping
Accuracy: 0.8426966071128845


In [6]:
# Sample new reviews
new_reviews = [
    "I absolutely loved the food and the service was great!",
    "Worst experience ever. Will not be coming back.",
    "It was okay, nothing special.",
    "The ambiance was wonderful but the food was only average.",
    "Disappointed with the late delivery."
]

# Convert the reviews to sequences
sequences = tokenizer.texts_to_sequences(new_reviews)

# Pad the sequences
padded_sequences = pad_sequences(sequences, maxlen=100)

# Predicting sentiment
predictions = model.predict(padded_sequences)

# Interpreting the predictions
interpreted_predictions = [f"Positive {pred}" if pred > 0.6 else f"Negative {pred}" for pred in predictions.flatten()]

# Printing the results
for review, sentiment in zip(new_reviews, interpreted_predictions):
    print(f"Review: {review}\nPredicted Sentiment: {sentiment}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
Review: I absolutely loved the food and the service was great!
Predicted Sentiment: Positive 0.999999463558197
Review: Worst experience ever. Will not be coming back.
Predicted Sentiment: Negative 0.0030046089086681604
Review: It was okay, nothing special.
Predicted Sentiment: Negative 0.04271911829710007
Review: The ambiance was wonderful but the food was only average.
Predicted Sentiment: Negative 0.32472190260887146
Review: Disappointed with the late delivery.
Predicted Sentiment: Negative 0.4383382201194763


In [7]:
tf.keras.models.save_model(model, 'sentiment_analysis_model.keras')

In [7]:
model.save('sentiment_analysis_model.keras')

from tensorflow.keras.models import Sequential, load_model

loaded_model = load_model('sentiment_analysis_model.keras')

In [None]:
import tensorflow as tf

class MyCustomLayer(tf.keras.layers.Layer):
    def __init__(self, my_custom_param=32, **kwargs):
        super(MyCustomLayer, self).__init__(**kwargs)
        self.my_custom_param = my_custom_param

    def build(self, input_shape):
        # Example variable based on custom parameter
        self.kernel = self.add_weight("kernel",
                                      shape=[int(input_shape[-1]),
                                             self.my_custom_param])

    def call(self, inputs):
        return tf.matmul(inputs, self.kernel)

    def get_config(self):
        config = super(MyCustomLayer, self).get_config()
        config.update({"my_custom_param": self.my_custom_param})
        return config

    @classmethod
    def from_config(cls, config):
        return cls(**config)
