In [1]:
import random
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from collections import deque

In [2]:
df = pd.read_csv("data.csv")
sentences = df['Sentence'].values
labels = df['Sentiment'].map({'negative': 0, 'neutral': 1, 'positive': 2}).values

In [3]:
df.head()

Unnamed: 0,Sentence,Sentiment
0,The GeoSolutions technology will leverage Bene...,positive
1,"$ESI on lows, down $1.50 to $2.50 BK a real po...",negative
2,"For the last quarter of 2010 , Componenta 's n...",positive
3,According to the Finnish-Russian Chamber of Co...,neutral
4,The Swedish buyout firm has sold its remaining...,neutral


In [4]:
# TF-IDF encoding
vectorizer = TfidfVectorizer(max_features=1000)
X = vectorizer.fit_transform(sentences).toarray()

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.2, random_state=42)


In [6]:
# Hyperparameters
num_actions = 3
state_size = X.shape[1]
epsilon = 1.0
epsilon_min = 0.01
epsilon_decay = 0.995
gamma = 0.95
lr = 0.001
batch_size = 64
memory = deque(maxlen=2000)

In [7]:
# DQN Model
class DQN(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, 64)
        self.out = nn.Linear(64, output_dim)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.out(x)

In [8]:
model = DQN(state_size, num_actions)
optimizer = optim.Adam(model.parameters(), lr=lr)
loss_fn = nn.MSELoss()

In [9]:
# Helper functions
def act(state):
    global epsilon
    if np.random.rand() <= epsilon:
        return random.randrange(num_actions)
    state = torch.FloatTensor(state).unsqueeze(0)
    with torch.no_grad():
        q_values = model(state)
    return torch.argmax(q_values).item()

In [10]:
def replay():
    global epsilon
    if len(memory) < batch_size:
        return
    minibatch = random.sample(memory, batch_size)
    for state, action, reward, next_state, done in minibatch:
        state = torch.FloatTensor(state)
        next_state = torch.FloatTensor(next_state)
        target = reward
        if not done:
            target += gamma * torch.max(model(next_state)).item()
        target_f = model(state)
        target_val = target_f.clone()
        target_val[action] = target
        optimizer.zero_grad()
        loss = loss_fn(target_f, target_val.detach())
        loss.backward()
        optimizer.step()
    if epsilon > epsilon_min:
        epsilon *= epsilon_decay

In [None]:
episodes = 10
for e in range(episodes):
    for i in range(len(X_train)):
        state = X_train[i]
        action = act(state)
        reward = 1 if action == y_train[i] else -1
        done = True  # one-step episode
        next_state = X_train[i]
        memory.append((state, action, reward, next_state, done))
        replay()
    print(f"Episode {e+1}/{episodes} completed. Epsilon: {epsilon:.2f}")

Episode 1/10 completed. Epsilon: 0.01


In [None]:
correct = 0
total = len(X_test)
for i in range(total):
    state = torch.FloatTensor(X_test[i]).unsqueeze(0)
    with torch.no_grad():
        pred = torch.argmax(model(state)).item()
    if pred == y_test[i]:
        correct += 1
print(f"Test Accuracy: {100 * correct / total:.2f}%")