<a href="https://colab.research.google.com/github/Ihimanshudhar/Symbol-table-cpp/blob/main/neuralnetwork.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [77]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import mean_absolute_error
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from gensim.models import Word2Vec

In [35]:
train_data = pd.DataFrame({
    "review": [
         "I absolutely loved it!",
         "Terrible, would not recommend.",
         "It was okay, not the best but not the worst.",
         "Amazing film with superb acting.",
         "Disappointing, a waste of time.",
         "An outstanding masterpiece!",
         "Mediocre at best.",
         "A brilliant and engaging experience.",
         "Not my cup of tea.",
         "Simply fantastic."
    ],
    "label": [5.0, 1.0, 3.0, 4.5, 1.5, 5.0, 3.0, 4.0, 2.0, 5.0]
})
train_data.to_csv("train.csv", index=False)

# Step 2: Generate Sample Test Data
test_data = pd.DataFrame({
    "review": [
         "I absolutely loved it!",
         "I loved the cinematography.",
         "Worst experience ever.",
         "Not bad, could be better.",
         "Absolutely mind-blowing performance!",
         "I did not like it."
    ]
})
test_data.to_csv("test.csv", index=False)

In [94]:
train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")
train_df["review"].fillna("", inplace=True)
test_df["review"].fillna("", inplace=True)

X = train_df["review"]
y = train_df["label"]

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  train_df["review"].fillna("", inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_df["review"].fillna("", inplace=True)


In [95]:

# Normalize labels to range [0,1]
y = (y - 1) / 4.0


In [96]:
tfidf = TfidfVectorizer(max_features=10000, ngram_range=(1, 2), stop_words="english")
X_tfidf = tfidf.fit_transform(X).toarray()
X_train, X_val, y_train, y_val = train_test_split(X_tfidf, y, test_size=0.2, random_state=42)

In [97]:

model = keras.Sequential([
    layers.Input(shape=(X_train.shape[1],)),
    layers.Dense(128, activation="relu"),
    layers.Dropout(0.3),
    layers.Dense(64, activation="relu"),
    layers.Dense(1, activation="sigmoid")
])

model.compile(optimizer="adam", loss="mse", metrics=["mae"])

history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=10, batch_size=4)

Epoch 1/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 175ms/step - loss: 0.1414 - mae: 0.3241 - val_loss: 0.1502 - val_mae: 0.3660
Epoch 2/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step - loss: 0.1266 - mae: 0.2916 - val_loss: 0.1570 - val_mae: 0.3747
Epoch 3/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step - loss: 0.1458 - mae: 0.3423 - val_loss: 0.1644 - val_mae: 0.3838
Epoch 4/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step - loss: 0.1155 - mae: 0.2924 - val_loss: 0.1706 - val_mae: 0.3914
Epoch 5/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step - loss: 0.1273 - mae: 0.3225 - val_loss: 0.1776 - val_mae: 0.3999
Epoch 6/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step - loss: 0.0987 - mae: 0.2588 - val_loss: 0.1839 - val_mae: 0.4074
Epoch 7/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step - loss: 0.1116 - mae: 

In [98]:
val_preds = model.predict(X_val).flatten()
val_mae = mean_absolute_error(y_val, val_preds)
print(f"Validation MAE: {val_mae:.4f}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step
Validation MAE: 0.4396


In [99]:

X_test_tfidf = tfidf.transform(test_df["review"]).toarray()
test_preds = model.predict(X_test_tfidf).flatten()


test_preds = test_preds * 4 + 1
test_preds = np.clip(test_preds, 1, 5)


print(f"Test Predictions Range: min={test_preds.min()}, max={test_preds.max()}")


submission_df = pd.DataFrame({
    "review": test_df["review"],
    "label": test_preds
})
submission_df.to_csv("submissions.csv", index=False)
print("submissions.csv generated!")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step
Test Predictions Range: min=3.1335277557373047, max=3.341707468032837
submissions.csv generated!


## Word2Vec embeddings CNN + LSTM architecture