In [None]:
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense, Dropout

# Load and merge data
nodes = pd.read_csv('nodes.csv')
edges = pd.read_csv('edges.csv')
labels = pd.read_csv('labels.csv')

# Merge nodes with labels using user_id as key
data = pd.merge(nodes, labels, on='user_id')

# Create network graph
G = nx.Graph()
G.add_nodes_from(data['user_id'])
G.add_edges_from([(row['user_id1'], row['user_id2']) for _, row in edges.iterrows()])

# Visualize subset of graph for better clarity
plt.figure(figsize=(10, 8))
nx.draw(G.subgraph(list(data['user_id'])[:200]),  # Show first 200 nodes for clarity
        with_labels=False, 
        node_size=40,
        node_color=data['label'][:200].map({0:'red', 1:'green'}),
        alpha=0.7)
plt.title("Network Graph (Red=Negative, Green=Positive)")
plt.show()

# Text preprocessing
tokenizer = Tokenizer(num_words=2000, oov_token='<OOV>')
tokenizer.fit_on_texts(data['text'])
sequences = tokenizer.texts_to_sequences(data['text'])
X = pad_sequences(sequences, maxlen=15, padding='post', truncating='post')

# Labels
y = data['label'].values

# Split data with stratification
X_train, X_test, y_train, y_test = train_test_split(
    X, y, 
    test_size=0.2, 
    random_state=42,
    stratify=y  # Maintain class balance
)

# Enhanced RNN model
model = Sequential([
    Embedding(input_dim=2000, output_dim=64, input_length=15),
    SimpleRNN(64, return_sequences=True),
    Dropout(0.5),
    SimpleRNN(32),
    Dense(1, activation='sigmoid')
])

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy', 
            tf.keras.metrics.Precision(name='precision'),
            tf.keras.metrics.Recall(name='recall')]
)

model.summary()

# Train with early stopping
early_stop = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=3,
    restore_best_weights=True
)

history = model.fit(
    X_train, y_train,
    epochs=20,
    validation_split=0.2,
    batch_size=64,
    callbacks=[early_stop]
)

# Evaluation
test_loss, test_acc, test_precision, test_recall = model.evaluate(X_test, y_test)
print(f"\nTest Accuracy: {test_acc*100:.2f}%")
print(f"Precision: {test_precision:.2f}")
print(f"Recall: {test_recall:.2f}")

# Prediction function
def predict_sentiment(text):
    sample_seq = tokenizer.texts_to_sequences([text])
    sample_padded = pad_sequences(sample_seq, maxlen=15, padding='post', truncating='post')
    prediction = model.predict(sample_padded)
    return 'Positive' if prediction[0][0] > 0.5 else 'Negative'

# Test predictions
print("\nSample Predictions:")
print("I love this product! ->", predict_sentiment("I love this product!"))
print("Terrible service ->", predict_sentiment("Terrible service"))
print("It's okay, not great ->", predict_sentiment("It's okay, not great"))
