In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

In [2]:
train = pd.read_csv('train.csv')

In [3]:
# Check the column names
print("Column names:", train.columns)

# If necessary, rename columns. Assuming the columns are '1' for ratings and 'review_text' for reviews.
train.rename(columns={'1': 'rating', 'review_text': 'review'}, inplace=True)


Column names: Index(['2', 'Stuning even for the non-gamer',
       'This sound track was beautiful! It paints the senery in your mind so well I would recomend it even to people who hate vid. game music! I have played the game Chrono Cross but out of all of the games I have ever played it has the best music! It backs away from crude keyboarding and takes a fresher step with grate guitars and soulful orchestras. It would impress anyone who cares to listen! ^_^'],
      dtype='object')


In [4]:
# Rename columns
train.rename(columns={'2': 'rating', 'Stuning even for the non-gamer': 'review'}, inplace=True)

In [5]:
# Assuming 'review' contains the text data and 'rating' contains labels
X = train['review'].values
y = train['rating'].values

# Convert labels to 0 and 1
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# For demonstration, converting text to integers
X = [hash(str(x)) % 10000 for x in X]
X = np.array(X).reshape(-1, 1, 1)


In [6]:
# Splitting data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
class SimpleRNN(nn.Module):
    def __init__(self):
        super(SimpleRNN, self).__init__()
        self.rnn = nn.RNN(input_size=1, hidden_size=20, batch_first=True)
        self.fc = nn.Linear(20, 1)

    def forward(self, x):
        x, _ = self.rnn(x)
        x = self.fc(x[:, -1, :])
        return x

model = SimpleRNN()

In [8]:
# Initialize Loss and Optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Convert numpy arrays to PyTorch tensors
X_train_tensor = torch.FloatTensor(X_train)
y_train_tensor = torch.FloatTensor(y_train).reshape(-1, 1)

# Training loop
for epoch in range(10):  # number of epochs
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()
    print(f'Epoch {epoch+1}, Loss: {loss.item()}')

Epoch 1, Loss: 0.8033607006072998
Epoch 2, Loss: 0.7319759726524353
Epoch 3, Loss: 0.7047370672225952
Epoch 4, Loss: 0.693200945854187
Epoch 5, Loss: 0.6943418383598328
Epoch 6, Loss: 0.6959403157234192
Epoch 7, Loss: 0.6971373558044434
Epoch 8, Loss: 0.6975097060203552
Epoch 9, Loss: 0.6970521807670593
Epoch 10, Loss: 0.6960362195968628


In [9]:
# Convert test data to PyTorch tensor
X_test_tensor = torch.FloatTensor(X_test)

# Forward pass to get the output
with torch.no_grad():
    outputs = model(X_test_tensor)
    outputs = torch.sigmoid(outputs)
    predicted = (outputs > 0.5).float()

# Calculate Accuracy
accuracy = (predicted.reshape(-1) == torch.FloatTensor(y_test)).float().mean()
print(f'Accuracy: {accuracy.item()}')

Accuracy: 0.5008124709129333


# PyTorch Model with LSTM

In [10]:

class SimpleLSTM(nn.Module):
    def __init__(self):
        super(SimpleLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size=1, hidden_size=50, batch_first=True)
        self.fc = nn.Linear(50, 1)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x, _ = self.lstm(x)
        x = self.dropout(x)
        x = self.fc(x[:, -1, :])
        return x

# Initialize the model, loss, and optimizer
model = SimpleLSTM()
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Training Loop with 10 epochs
for epoch in range(10):
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()
    print(f'Epoch {epoch+1}, Loss: {loss.item()}')

# Evaluation
with torch.no_grad():
    outputs = model(X_test_tensor)
    outputs = torch.sigmoid(outputs)
    predicted = (outputs > 0.5).float()
    accuracy = (predicted.reshape(-1) == torch.FloatTensor(y_test)).float().mean()
    print(f'Accuracy: {accuracy.item()}')


Epoch 1, Loss: 0.7011341452598572
Epoch 2, Loss: 0.6972042322158813
Epoch 3, Loss: 0.6976842880249023
Epoch 4, Loss: 0.6968285441398621
Epoch 5, Loss: 0.6959050297737122
Epoch 6, Loss: 0.6950064301490784
Epoch 7, Loss: 0.6950085163116455
Epoch 8, Loss: 0.6947450041770935
Epoch 9, Loss: 0.6944408416748047
Epoch 10, Loss: 0.6941607594490051
Accuracy: 0.5011861324310303


## Enhanced LSTM Model

In [11]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Step 1: Load and Preprocess Data
train = pd.read_csv('train.csv')
train.rename(columns={'2': 'rating', 'Stuning even for the non-gamer': 'review'}, inplace=True)
X = train['review'].values
y = train['rating'].values
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)
X = [hash(str(x)) % 10000 for x in X]
X = np.array(X).reshape(-1, 1, 1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to PyTorch Tensors
X_train_tensor = torch.FloatTensor(X_train)
y_train_tensor = torch.FloatTensor(y_train).reshape(-1, 1)
X_test_tensor = torch.FloatTensor(X_test)

# Step 2: Create DataLoader for Batch Training
train_data = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)

# Step 3: Define Model with LSTM and Dropout
class AdvancedLSTM(nn.Module):
    def __init__(self):
        super(AdvancedLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size=1, hidden_size=100, batch_first=True)  # Increased hidden_size
        self.fc = nn.Linear(100, 1)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x, _ = self.lstm(x)
        x = self.dropout(x)
        x = self.fc(x[:, -1, :])
        return x

# Step 4: Initialize Model, Loss, Optimizer and Learning Rate Scheduler
model = AdvancedLSTM()
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)  # Learning rate scheduler

# Step 5: Training Loop with Early Stopping
prev_val_loss = float('inf')
for epoch in range(10):
    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
    scheduler.step()  # Step the learning rate scheduler
    print(f'Epoch {epoch+1}, Loss: {loss.item()}')
    # Implement early stopping here based on validation loss

# Step 6: Evaluation
with torch.no_grad():
    outputs = model(X_test_tensor)
    outputs = torch.sigmoid(outputs)
    predicted = (outputs > 0.5).float()
    accuracy = (predicted.reshape(-1) == torch.FloatTensor(y_test)).float().mean()
    print(f'Accuracy: {accuracy.item()}')


Epoch 1, Loss: 0.7026242613792419
Epoch 2, Loss: 0.7141169309616089
Epoch 3, Loss: 0.6914501786231995
Epoch 4, Loss: 0.6944655179977417
Epoch 5, Loss: 0.6943813562393188
Epoch 6, Loss: 0.6998687386512756
Epoch 7, Loss: 0.7048817276954651
Epoch 8, Loss: 0.686941385269165
Epoch 9, Loss: 0.7019990682601929
Epoch 10, Loss: 0.7023907899856567
Accuracy: 0.49933332204818726
