In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, LSTM, Bidirectional, Dense
print("All has been imported")



All has been imported


In [5]:
import pandas as pd

# Load dataset (adjust file path to yours)
df = pd.read_csv(r"D:\flutterwidget\neurocivicnet\sentiment_analysis\dataset\go_emotions_dataset.csv")

print("Shape:", df.shape)
print("Columns:", df.columns)
print(df.head())


Shape: (211225, 31)
Columns: Index(['id', 'text', 'example_very_unclear', 'admiration', 'amusement',
       'anger', 'annoyance', 'approval', 'caring', 'confusion', 'curiosity',
       'desire', 'disappointment', 'disapproval', 'disgust', 'embarrassment',
       'excitement', 'fear', 'gratitude', 'grief', 'joy', 'love',
       'nervousness', 'optimism', 'pride', 'realization', 'relief', 'remorse',
       'sadness', 'surprise', 'neutral'],
      dtype='object')
        id                                               text  \
0  eew5j0j                                    That game hurt.   
1  eemcysk   >sexuality shouldn’t be a grouping category I...   
2  ed2mah1     You do right, if you don't care then fuck 'em!   
3  eeibobj                                 Man I love reddit.   
4  eda6yn6  [NAME] was nowhere near them, he was by the Fa...   

   example_very_unclear  admiration  amusement  anger  annoyance  approval  \
0                 False           0          0      0          0  

In [6]:
positive_labels = ['admiration','amusement','excitement','joy','love','optimism','pride','gratitude']
negative_labels = ['anger','annoyance','disappointment','disgust','embarrassment','fear','grief','remorse','sadness']

def map_sentiment(row):
    if row['neutral'] == 1:
        return 2  # Neutral
    elif any(row[label]==1 for label in positive_labels):
        return 0  # Positive
    elif any(row[label]==1 for label in negative_labels):
        return 1  # Negative
    else:
        return 2  # fallback Neutral

df['sentiment'] = df.apply(map_sentiment, axis=1)


In [7]:
texts = df['text'].values
labels = df['sentiment'].values

tokenizer = Tokenizer(num_words=10000, oov_token="<OOV>")
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
padded = pad_sequences(sequences, maxlen=50, padding='post', truncating='post')

X_train, X_val, y_train, y_val = train_test_split(padded, labels, test_size=0.1, random_state=42)


In [8]:
model = Sequential([
    Embedding(10000, 64, input_length=50),
    Conv1D(64, 5, activation='relu'),
    Bidirectional(LSTM(64)),  # Keep sequence dimension
    Dense(32, activation='relu'),
    Dense(3, activation='softmax')
])

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])





In [9]:
model.build(input_shape=(None, 50))
model.summary()


In [10]:
history = model.fit(X_train, y_train, epochs=3, validation_data=(X_val, y_val), batch_size=128)


Epoch 1/3
[1m1486/1486[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m93s[0m 60ms/step - accuracy: 0.6883 - loss: 0.7385 - val_accuracy: 0.7066 - val_loss: 0.6899
Epoch 2/3
[1m1486/1486[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 54ms/step - accuracy: 0.7236 - loss: 0.6568 - val_accuracy: 0.7119 - val_loss: 0.6759
Epoch 3/3
[1m1486/1486[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m93s[0m 63ms/step - accuracy: 0.7355 - loss: 0.6212 - val_accuracy: 0.7122 - val_loss: 0.6820


In [11]:
def predict_sentiment(message):
    # Convert to sequence & pad
    seq = tokenizer.texts_to_sequences([message])
    pad = pad_sequences(seq, maxlen=50, padding='post', truncating='post')
    
    # Predict probabilities
    pred_probs = model.predict(pad)
    
    # Get class and confidence
    classes = ["Positive", "Negative", "Neutral"]
    pred_class = classes[np.argmax(pred_probs[0])]
    confidence = np.max(pred_probs[0]) * 100
    
    print(f"Message: {message}")
    print(f"Prediction: {pred_class}")
    print(f"Confidence: {confidence:.2f}%")
    print("-"*50)

# Example
predict_sentiment("I am so excited about this new project!")
predict_sentiment("This is the worst day ever.")
predict_sentiment("I feel nothing about the situation.")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 390ms/step
Message: I am so excited about this new project!
Prediction: Positive
Confidence: 96.57%
--------------------------------------------------
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
Message: This is the worst day ever.
Prediction: Negative
Confidence: 70.25%
--------------------------------------------------
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step
Message: I feel nothing about the situation.
Prediction: Neutral
Confidence: 62.71%
--------------------------------------------------


In [12]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.target_spec.supported_ops = [
    tf.lite.OpsSet.TFLITE_BUILTINS,  # normal TFLite ops
    tf.lite.OpsSet.SELECT_TF_OPS     # allow TF ops like LSTM
]
tflite_model = converter.convert()

with open("sentiment_model.tflite", "wb") as f:
    f.write(tflite_model)

print("TFLite model saved with SELECT_TF_OPS")


INFO:tensorflow:Assets written to: C:\Users\Asus\AppData\Local\Temp\tmprriyu0az\assets


INFO:tensorflow:Assets written to: C:\Users\Asus\AppData\Local\Temp\tmprriyu0az\assets


Saved artifact at 'C:\Users\Asus\AppData\Local\Temp\tmprriyu0az'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 50), dtype=tf.float32, name='keras_tensor')
Output Type:
  TensorSpec(shape=(None, 3), dtype=tf.float32, name=None)
Captures:
  2049486674000: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2049486676880: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2049486677840: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2049486676304: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2049486677456: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2049486677072: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2049486678224: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2049486677648: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2049486678800: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2049486677264: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2049486679568: 

In [13]:
import pickle

with open("tokenizer.pkl", "wb") as f:
    pickle.dump(tokenizer, f)

print("Tokenizer saved")


Tokenizer saved


In [14]:
import pickle
import json

# Load your tokenizer.pkl
with open("tokenizer.pkl", "rb") as f:
    tokenizer = pickle.load(f)

# Save as JSON (word_index only is needed)
with open("tokenizer.json", "w", encoding="utf-8") as f:
    json.dump(tokenizer.word_index, f, ensure_ascii=False)
