In [16]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Flatten, Dense
from tensorflow.keras.optimizers import Adam

# Load your training, testing, and validation datasets
train_df = pd.read_csv('/training.csv')
test_df = pd.read_csv('/test.csv')
validation_df = pd.read_csv('/validation.csv')

# Preprocess the data
le = LabelEncoder()
train_df['label'] = le.fit_transform(train_df['label'])
test_df['label'] = le.transform(test_df['label'])
validation_df['label'] = le.transform(validation_df['label'])

# Tokenize and pad the text data
tokenizer = Tokenizer()
tokenizer.fit_on_texts(train_df['text'])
train_sequences = tokenizer.texts_to_sequences(train_df['text'])
test_sequences = tokenizer.texts_to_sequences(test_df['text'])
validation_sequences = tokenizer.texts_to_sequences(validation_df['text'])

X_train = pad_sequences(train_sequences)
X_test = pad_sequences(test_sequences)
X_validation = pad_sequences(validation_sequences)

y_train = train_df['label']
y_test = test_df['label']
y_validation = validation_df['label']

# Define the neural network architecture
model = Sequential([
    Embedding(input_dim=len(tokenizer.word_index)+1, output_dim=64, mask_zero=True),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(1, activation='sigmoid')  # Binary classification for sentiment analysis
])
# Compile the model
model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
model = Sequential([
    Embedding(input_dim=len(tokenizer.word_index)+1, output_dim=64),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(1, activation='sigmoid')  # Binary classification for sentiment analysis
])
...
# Tokenize and pad the text data
tokenizer = Tokenizer()
tokenizer.fit_on_texts(train_df['text'])
train_sequences = tokenizer.texts_to_sequences(train_df['text'])
test_sequences = tokenizer.texts_to_sequences(test_df['text'])
validation_sequences = tokenizer.texts_to_sequences(validation_df['text'])

# Pad the sequences to a maximum length of 4224
X_train = pad_sequences(train_sequences, maxlen=4224)
X_test = pad_sequences(test_sequences, maxlen=4224)
X_validation = pad_sequences(validation_sequences, maxlen=4224)

# ...
# Train the model
model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=1, batch_size=32, validation_data=(X_validation, y_validation))

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_accuracy:.2f}")

# Make predictions on new data
new_tweets = ["I love this product!", "This is terrible."]
new_sequences = tokenizer.texts_to_sequences(new_tweets)
X_new = pad_sequences(new_sequences, maxlen=4224)
X_new = X_new.reshape((2, 4224))
predictions = model.predict(X_new)
predicted_labels = [1 if prediction > 0.5 else 0 for prediction in predictions]

print("Predicted Labels for New Tweets:")
for tweet, label in zip(new_tweets, predicted_labels):
    sentiment = "Positive" if label == 1 else "Negative"
    print(f"Tweet: {tweet}, Predicted Sentiment: {sentiment}")



[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m520s[0m 914ms/step - accuracy: 0.3341 - loss: -34695124.0000 - val_accuracy: 0.3520 - val_loss: -599593600.0000
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 110ms/step - accuracy: 0.3300 - loss: -586556160.0000
Test Accuracy: 0.35
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step
Predicted Labels for New Tweets:
Tweet: I love this product!, Predicted Sentiment: Positive
Tweet: This is terrible., Predicted Sentiment: Positive


In [1]:
pip install keras --upgrade

