In [1]:
import random
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv("data.csv")
sentences = df['Sentence'].values
labels = df['Sentiment'].map({'negative': 0, 'neutral': 1, 'positive': 2}).values

In [3]:
# TF-IDF encoding
vectorizer = TfidfVectorizer(max_features=1000)
X = vectorizer.fit_transform(sentences).toarray()

In [4]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.2, random_state=42)

In [5]:
# Initialize Q-table
num_actions = 3
state_size = X.shape[1]
q_table = np.zeros((len(X_train), num_actions))

In [6]:
# Hyperparameters
epsilon = 1.0
epsilon_min = 0.01
epsilon_decay = 0.995
gamma = 0.95
alpha = 0.1  # learning rate
episodes = 10

In [7]:
# Helper function to choose action
def act_q_learning(state_idx):
    if np.random.rand() <= epsilon:
        return random.randrange(num_actions)
    return np.argmax(q_table[state_idx])

In [8]:
# Training loop
for e in range(episodes):
    for i in range(len(X_train)):
        action = act_q_learning(i)
        reward = 1 if action == y_train[i] else -1
        next_max = np.max(q_table[i])
        q_table[i][action] = q_table[i][action] + alpha * (reward + gamma * next_max - q_table[i][action])
    if epsilon > epsilon_min:
        epsilon *= epsilon_decay
    print(f"Episode {e+1}/{episodes} completed. Epsilon: {epsilon:.2f}")

Episode 1/10 completed. Epsilon: 0.99
Episode 2/10 completed. Epsilon: 0.99
Episode 3/10 completed. Epsilon: 0.99
Episode 4/10 completed. Epsilon: 0.98
Episode 5/10 completed. Epsilon: 0.98
Episode 6/10 completed. Epsilon: 0.97
Episode 7/10 completed. Epsilon: 0.97
Episode 8/10 completed. Epsilon: 0.96
Episode 9/10 completed. Epsilon: 0.96
Episode 10/10 completed. Epsilon: 0.95


In [9]:
correct = 0
total = len(X_test)
X_test_transformed = vectorizer.transform(df['Sentence'].iloc[X_train.shape[0]:]).toarray()
for i in range(total):
    state_vec = X_test_transformed[i]
    idx = np.argmin(np.linalg.norm(X_train - state_vec, axis=1))
    action = np.argmax(q_table[idx])
    if action == y_test[i]:
        correct += 1
print(f"Test Accuracy: {100 * correct / total:.2f}%")

Test Accuracy: 40.46%
