In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import warnings
warnings.filterwarnings("ignore")

print("Setup Complete")

Setup Complete


In [None]:
Reddit_Data = pd.read_csv("/content/Reddit_sentiment_analysis.csv")
Reddit_Data.head(10)

Unnamed: 0,clean_comment,category
0,family mormon have never tried explain them t...,1
1,buddhism has very much lot compatible with chr...,1
2,seriously don say thing first all they won get...,-1
3,what you have learned yours and only yours wha...,0
4,for your own benefit you may want read living ...,1
5,you should all sit down together and watch the...,-1
6,was teens when discovered zen meditation was ...,1
7,jesus was zen meets jew,0
8,there are two varieties christians dogmatic th...,-1
9,dont worry about trying explain yourself just ...,1


In [None]:
# Convert 'clean_comment' to string and fill missing values with empty strings
Reddit_Data['clean_comment'] = Reddit_Data['clean_comment'].astype(str).fillna('')

In [None]:
# Tokenize the text data
max_features = 2000
tokenizer = Tokenizer(num_words=max_features, split=' ')
tokenizer.fit_on_texts(Reddit_Data['clean_comment'].values)
X = tokenizer.texts_to_sequences(Reddit_Data['clean_comment'].values)
X = pad_sequences(X)

# Encode the labels
label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(Reddit_Data['category'])
y = integer_encoded

In [None]:
X

array([[   0,    0,    0, ...,    1,  111, 1518],
       [   0,    0,    0, ..., 1094,   70,   21],
       [   0,    0,    0, ...,   54,   36,  535],
       ...,
       [   0,    0,    0, ...,    0, 1758, 1465],
       [   0,    0,    0, ...,    0, 1453,  420],
       [   0,    0,    0, ...,   37,  422,  818]], dtype=int32)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
import numpy as np

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    sigmoid_x = sigmoid(x)
    return sigmoid_x * (1 - sigmoid_x)

def create_network(input_nodes, hidden_nodes, output_nodes):
    weights = {
        "W1": np.random.randn(input_nodes, hidden_nodes),
        "W2": np.random.randn(hidden_nodes, output_nodes)
    }
    return weights

In [None]:
def train_simple_network(X_train, y_train, weights, learning_rate=0.01, epochs=100):
    for epoch in range(epochs):
        # Forward pass
        layer1 = sigmoid(np.dot(X_train, weights['W1']))
        layer2 = sigmoid(np.dot(layer1, weights['W2']))

        # Backpropagation
        error = y_train - layer2
        delta_layer2 = error * sigmoid_derivative(layer2)
        error_layer1 = np.dot(delta_layer2, weights['W2'].T)
        delta_layer1 = error_layer1 * sigmoid_derivative(layer1)

        # Update weights
        weights['W2'] += learning_rate * np.dot(layer1.T, delta_layer2)
        weights['W1'] += learning_rate * np.dot(X_train.T, delta_layer1)

    return weights

In [None]:
def evaluate_simple_network(X_test, y_test, weights):
    layer1 = sigmoid(np.dot(X_test, weights['W1']))
    layer2 = sigmoid(np.dot(layer1, weights['W2']))

    predictions = np.argmax(layer2, axis=1)
    true_labels = np.argmax(y_test, axis=1)

    accuracy = np.mean(predictions == true_labels)

    return accuracy

In [None]:
from sklearn.preprocessing import OneHotEncoder

# One-hot encode the labels
onehot_encoder = OneHotEncoder(sparse=False)
y_train_onehot = onehot_encoder.fit_transform(y_train.reshape(-1, 1))
y_test_onehot = onehot_encoder.transform(y_test.reshape(-1, 1))

# Create and train the simple neural network model
input_nodes = X_train.shape[1]
hidden_nodes = 64
output_nodes = 3
weights = create_network(input_nodes, hidden_nodes, output_nodes)
trained_weights = train_simple_network(X_train, y_train_onehot, weights)
# Evaluate the simple neural network model
accuracy = evaluate_simple_network(X_test, y_test_onehot, trained_weights)
print("Test accuracy:", accuracy)

Test accuracy: 0.48416107382550333


In [None]:
data = {'sentences': ["I love this product!",
                      "This is an amazing movie!",
                      "I can't stand this annoying noise!",
                      "The food was so delicious!",
                      "This is the worst experience ever!",
                      "I don't like the taste of this dish.",
                      "It was an average day, nothing special.",
                      "The weather is quite nice today.",
                      "I'm so bored with this movie.",
                      "The service was fantastic!",
                      "I'm not sure how I feel about this.",
                      "This book is not very interesting.",
                      "I had an incredible time at the concert!",
                      "The party was really boring.",
                      "I'm very disappointed with the product.",
                      "The room is clean and comfortable.",
                      "That's a pretty neutral color.",
                      "The store was really crowded.",
                      "He is an exceptional speaker.",
                      "She gave a mediocre performance."],
        'sentiment_label': [1, 1, -1, 1, -1, -1, 0, 1, -1, 1, 0, -1, 1, -1, -1, 1, 0, 0, 1, -1]}

Sample_Sentences = pd.DataFrame(data)
Sample_Sentences

Unnamed: 0,sentences,sentiment_label
0,I love this product!,1
1,This is an amazing movie!,1
2,I can't stand this annoying noise!,-1
3,The food was so delicious!,1
4,This is the worst experience ever!,-1
5,I don't like the taste of this dish.,-1
6,"It was an average day, nothing special.",0
7,The weather is quite nice today.,1
8,I'm so bored with this movie.,-1
9,The service was fantastic!,1


In [None]:
#Preprocess the Sample_Sentences data
max_features = 2000
tokenizer = Tokenizer(num_words=max_features, split=' ')
tokenizer.fit_on_texts(Sample_Sentences['sentences'].values)
X_sample = tokenizer.texts_to_sequences(Sample_Sentences['sentences'].values)
X_sample = pad_sequences(X_sample, maxlen=X.shape[1])


# Predict sentiment labels using the simple neural network model
layer1_sample = sigmoid(np.dot(X_sample, trained_weights['W1']))
layer2_sample = sigmoid(np.dot(layer1_sample, trained_weights['W2']))
predictions_sample = np.argmax(layer2_sample, axis=1)

# Decode predictions to their original sentiment labels
decoded_predictions_sample = np.vectorize(lambda x: -1 if x==2 else x)(predictions_sample)

# Add predicted sentiment labels as a new column in the Sample_Sentences DataFrame
Sample_Sentences['predicted_sentiment_label'] = decoded_predictions_sample
Sample_Sentences

Unnamed: 0,sentences,sentiment_label,predicted_sentiment_label
0,I love this product!,1,-1
1,This is an amazing movie!,1,-1
2,I can't stand this annoying noise!,-1,-1
3,The food was so delicious!,1,-1
4,This is the worst experience ever!,-1,-1
5,I don't like the taste of this dish.,-1,-1
6,"It was an average day, nothing special.",0,-1
7,The weather is quite nice today.,1,-1
8,I'm so bored with this movie.,-1,-1
9,The service was fantastic!,1,-1


In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score

y_true = Sample_Sentences['sentiment_label']
y_pred = Sample_Sentences['predicted_sentiment_label']

precision = precision_score(y_true, y_pred, average='macro')
recall = recall_score(y_true, y_pred, average='macro')
f1 = f1_score(y_true, y_pred, average='macro')

print("Precision:", precision)
print("Recall:", recall)
print("F1 measure:", f1)

Precision: 0.13333333333333333
Recall: 0.3333333333333333
F1 measure: 0.1904761904761905
