In [None]:
# pip install torch pandas scikit-learn openai
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import openai

# === YOUR OPENAI KEY HERE ===
openai.api_key = "OPEN-API-KEY"   # ‚Üê CHANGE THIS

# Load data directly from GitHub mirror of the Kaggle dataset
url = "./amazon_cells_labelled.txt"
df = pd.read_csv(url, sep='\t', header=None, names=['text', 'label'])

# Train-test split
train_texts, test_texts, train_labels, test_labels = train_test_split(
    df['text'], df['label'], test_size=0.2, random_state=42, stratify=df['label'])

# Simple but effective Bag-of-Words (fixed vocab shared across train/test)
all_words = " ".join(df['text']).lower().split()
vocab = {word: idx for idx, word in enumerate(set(all_words))}
vocab_size = len(vocab)

def text_to_vector(text):
    vec = torch.zeros(vocab_size)
    for word in text.lower().split():
        if word in vocab:
            vec[vocab[word]] += 1
    return vec

# Neural Network (sentiment classification)
class SentimentNN(nn.Module):
    def __init__(self, vocab_size):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(vocab_size, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 64),
            nn.ReLU(),
            nn.Linear(64, 2)
        )
    def forward(self, x):
        return self.layers(x)

model = SentimentNN(vocab_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training
model.train()
X_train = torch.stack([text_to_vector(t) for t in train_texts])
y_train = torch.tensor(train_labels.values, dtype=torch.long)

for epoch in range(10):
    optimizer.zero_grad()
    outputs = model(X_train)
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()

# Evaluation
model.eval()
X_test = torch.stack([text_to_vector(t) for t in test_texts])
with torch.no_grad():
    pred = torch.argmax(model(X_test), dim=1).numpy()
accuracy = accuracy_score(test_labels, pred)
print(f"Sentiment Accuracy: {accuracy:.3f}")

# QA function using OpenAI
def ask_question(review, question):
    response = openai.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{
            "role": "user",
            "content": f"Review: \"{review}\"\n\nQuestion: {question}\nAnswer briefly using only the review."
        }],
        temperature=0.0,
        max_tokens=100
    )
    return response.choices[0].message.content.strip()

# Single function that runs BOTH tasks
def analyze_review(review, question=None):
    vec = text_to_vector(review).unsqueeze(0)  # shape (1, vocab_size)
    with torch.no_grad():
        sentiment_idx = torch.argmax(model(vec), dim=1).item()
    sentiment = "Positive" if sentiment_idx == 1 else "Negative"

    qa_answer = ask_question(review, question) if question else "No question asked"
    return sentiment, qa_answer

# Demo
print("\n=== DEMO ===")
r1 = "The phone has excellent battery life and the camera is amazing."
s1, a1 = analyze_review(r1, "What features did the customer like?")
print("Review:", r1)
print("Sentiment (Neural Network):", s1)
print("Answer (gpt-4o-mini):", a1)

r2 = "Very slow, keeps freezing, terrible purchase."
s2, a2 = analyze_review(r2, "What are the main problems?")
print("\nReview:", r2)
print("Sentiment (Neural Network):", s2)
print("Answer (gpt-4o-mini):", a2)