In [None]:
from google.colab import drive
drive.flush_and_unmount()
drive.mount('/content/drive')

Drive not mounted, so nothing to flush and unmount.
Mounted at /content/drive


In [61]:
import pandas as pd

# Set the paths to the input CSV files
csv_file1 = 'final_dataset_1_2.csv'
csv_file2 = 'final_dataset_2_4.csv'
csv_file3 = 'final_dataset_3_2.csv'

# Read the data from the CSV files
data1 = pd.read_csv(csv_file1)
data2 = pd.read_csv(csv_file2)
data3 = pd.read_csv(csv_file3)

# Concatenate the data from both files
data = pd.concat([data1, data2, data3], axis=0)

# Shuffle the data
data = data.sample(frac=1).reset_index(drop=True)

# Save the merged and shuffled data to a new CSV file
data.to_csv('super_final_data.csv', index=False)

In [None]:
import pandas as pd
import tensorflow as tf
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.utils import resample

# Load dataset
data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Sentiment Anaalysis/super_final_data.csv')

data = data[['Comments','Reviews']]

# Set label and feature columns
X = data['Comments']
y = data['Reviews']

# Tokenize the comments
tokenizer = tf.keras.preprocessing.text.Tokenizer()
tokenizer.fit_on_texts(X)
X = tokenizer.texts_to_sequences(X)

# Pad the sequences
maxlen = max(len(x) for x in X)
X = tf.keras.preprocessing.sequence.pad_sequences(X, maxlen=maxlen)

# Undersample the data to balance the classes
min_class_size = y.value_counts().min()
balanced_data = pd.concat([
    data[data['Reviews'] == 0].sample(min_class_size),
    data[data['Reviews'] == 1].sample(min_class_size)
], axis=0).sample(frac=1, random_state=0)

X_resampled = balanced_data['Comments']
y_resampled = balanced_data['Reviews']

# Tokenize and pad the resampled comments
X_resampled = tokenizer.texts_to_sequences(X_resampled)
X_resampled = tf.keras.preprocessing.sequence.pad_sequences(X_resampled, maxlen=maxlen)

# Get the number of classes
num_classes = y_resampled.nunique()

# Create a neural network model using TensorFlow
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(len(tokenizer.word_index) + 1, 16),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(16, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),
    tf.keras.layers.Dense(num_classes, activation='softmax')
])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Define an early stopping callback
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True
)

X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.3, random_state=0)
# Fit the model to the training data
model.fit(
    X_train,
    y_train,
    epochs=1000,
    validation_data=(X_test, y_test),
    callbacks=[early_stopping]
)

Epoch 1/1000

In [8]:
from sklearn.metrics import confusion_matrix, accuracy_score, recall_score, precision_score, f1_score
import numpy as np
# Get the predicted probability distribution over the classes
y_pred_proba = model.predict(X_test)

# Get the class with the highest predicted probability
y_pred = np.argmax(y_pred_proba, axis=1)

# Calculate the confusion matrix
cm = confusion_matrix(y_test, y_pred)

# Calculate the classification metrics
accuracy = accuracy_score(y_test, y_pred)
recall = recall_score(y_test, y_pred, average='weighted')
precision = precision_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

# Print the results
print(f'Confusion matrix:\n{cm}')
print(f'Accuracy: {accuracy}')
print(f'Recall: {recall}')
print(f'Precision: {precision}')
print(f'F1-score: {f1}')

Confusion matrix:
[[882   5]
 [  5 893]]
Accuracy: 0.9943977591036415
Recall: 0.9943977591036415
Precision: 0.9943977591036415
F1-score: 0.9943977591036415


In [16]:
# Tokenize and pad the input text
input_data = tokenizer.texts_to_sequences(['"Hypnotic carries the narrative potential to become a mind-bending film packed with jaw-dropping twists and incredibly captivating storytelling. Unfortunately, despite some interesting moments and an excellent cast, Robert Rodriguez and Max Borenstein deliver a messily edited, extremely confusing mishmash of ideas not as clever as the filmmakers try to make them out to be.'])
input_data = tf.keras.preprocessing.sequence.pad_sequences(input_data, maxlen=maxlen)

# Make a prediction using the trained model
prediction = model.predict(input_data)

# Print the predicted class
if prediction[0][0] > prediction[0][1]:
    print('Predicted class: 0 (bad)')
else:
    print('Predicted class: 1 (good)')

Predicted class: 1 (good)


In [21]:
import pandas as pd
from sklearn.utils import shuffle

# Load the data from the two CSV files
df1 = pd.read_csv('Reddit_Data.csv')
df2 = pd.read_csv('Twitter_Data.csv')

# Concatenate the two DataFrames
merged_df = pd.concat([df1, df2], ignore_index=True)

# Shuffle the resulting DataFrame
shuffled_df = shuffle(merged_df)

# Save the shuffled DataFrame to a new CSV file
shuffled_df.to_csv('final_dataset_2.csv', index=False)


In [62]:
# Load your data into a DataFrame
df = pd.read_csv('super_final_data.csv')

# Count the number of null values in each column
null_counts = df.isnull().sum()

# Display the results
print(null_counts)

Comments    0
Reviews     0
dtype: int64


In [40]:
import pandas as pd
from sklearn.utils import shuffle

# Load the CSV file
df = pd.read_csv('final_dataset_2_3.csv')

# Drop rows with any null values
df = df.dropna()

shuffled_df = shuffle(df)

# Save the new CSV file
shuffled_df.to_csv('final_dataset_2_4.csv', index=False)

In [25]:
import pandas as pd

# Load the data from the text file
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Sentiment Anaalysis/reviews.txt', delimiter='\t')

# Swap the two columns
df = df.iloc[:, ::-1]

# Save the resulting DataFrame to a CSV file
df.to_csv('final_dataset_3.csv', index=False)

In [46]:
import pandas as pd

# Load the CSV file
df = pd.read_csv('final_dataset_1_1.csv')

# Drop rows where the 'category' column has a value of -1
df = df[df['Reviews'] != -1]

# Save the new CSV file
df.to_csv('final_dataset_1_2.csv', index=False)