<a href="https://colab.research.google.com/github/Soham0410/Sentimental-Analysis/blob/main/deep_neural_network.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install necessary libraries
!pip install kaggle tensorflow



In [None]:
# Configure the path of kaggle.json file
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
# Download the dataset
!kaggle datasets download -d kazanova/sentiment140

Dataset URL: https://www.kaggle.com/datasets/kazanova/sentiment140
License(s): other
Downloading sentiment140.zip to /content
 91% 74.0M/80.9M [00:00<00:00, 144MB/s]
100% 80.9M/80.9M [00:00<00:00, 113MB/s]


In [None]:
# Unzip the dataset
from zipfile import ZipFile
dataset = 'sentiment140.zip'
with ZipFile(dataset, 'r') as zip_ref:
    zip_ref.extractall()
    print('The dataset is extracted')

The dataset is extracted


In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [None]:
# Load data
column_names = ['target', 'id', 'date', 'flag', 'user', 'text']
socialmedia_data = pd.read_csv('training.1600000.processed.noemoticon.csv', names=column_names, encoding='ISO-8859-1')

In [None]:
# Data preprocessing
socialmedia_data.replace({'target': {4: 1}}, inplace=True)

In [None]:
port_stem = PorterStemmer()

In [None]:
def stemming(content):
    stemmed_content = re.sub('[^a-zA-Z]', ' ', content)
    stemmed_content = stemmed_content.lower()
    stemmed_content = stemmed_content.split()
    stemmed_content = [port_stem.stem(word) for word in stemmed_content if not word in stopwords.words('english')]
    stemmed_content = ' '.join(stemmed_content)
    return stemmed_content

In [None]:
socialmedia_data['stemmed_content'] = socialmedia_data['text'].apply(stemming)

In [None]:
# Separating data and labels
X = socialmedia_data['stemmed_content']
Y = socialmedia_data['target'].values


In [None]:
# Splitting the data
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)

In [None]:
# Tokenization and padding
tokenizer = Tokenizer()
tokenizer.fit_on_texts(X_train)

In [None]:
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

In [None]:
max_length = 50
X_train_padded = pad_sequences(X_train_seq, maxlen=max_length, padding='post')
X_test_padded = pad_sequences(X_test_seq, maxlen=max_length, padding='post')

In [None]:
# Model building
model = Sequential()
model.add(Dense(128, input_shape=(max_length,), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

In [None]:
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# Train the model
model.fit(X_train_padded, Y_train, epochs=10, batch_size=32, validation_data=(X_test_padded, Y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7d1de56198d0>

In [None]:
# Evaluate the model on training data
train_loss, train_accuracy = model.evaluate(X_train_padded, Y_train)
print(f"Training Accuracy: {train_accuracy}")

Training Accuracy: 0.5


In [None]:
# Evaluate the model on test data
test_loss, test_accuracy = model.evaluate(X_test_padded, Y_test)
print(f"Test Accuracy: {test_accuracy}")

Test Accuracy: 0.5


In [None]:
# Save the trained model
model.save('trained_dnn_model.h5')

  saving_api.save_model(


In [None]:
# Load the model
loaded_model = tf.keras.models.load_model('trained_dnn_model.h5')

In [None]:
# Make predictions
X_new = X_test_padded[200]
Y_new = Y_test[200]
prediction = loaded_model.predict(np.expand_dims(X_new, axis=0))



In [None]:
if prediction[0][0] < 0.5:
    print('Negative Tweet')
else:
    print('Positive Tweet')
print(f"Actual label: {'Negative' if Y_new == 0 else 'Positive'}")

Negative Tweet
Actual label: Positive


In [None]:
X_new = X_test_padded[3]
Y_new = Y_test[3]
prediction = loaded_model.predict(np.expand_dims(X_new, axis=0))



In [None]:
if prediction[0][0] < 0.5:
    print('Negative Tweet')
else:
    print('Positive Tweet')
print(f"Actual label: {'Negative' if Y_new == 0 else 'Positive'}")

Negative Tweet
Actual label: Negative
