# Classifying Movie Reviews Using RNN

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import Dataset, DataLoader

### Loading and Preprocessing the Dataset

Load the dataset using pd.read_csv() and assign column names. </br>
Lowercase and tokenize the text using pandas string methods.</br>
Encode labels into numeric form with LabelEncoder().</br>
Split the data into training and testing sets using train_test_split().</br>
Create a vocabulary set from all unique words in the dataset.</br>
Map each unique word to a unique index.</br>
Define encode_and_pad() function to convert tokenized sentences into sequences of indices and pad them to the maximum sequence length.</br>
Process training and testing texts with encode_and_pad() to prepare data for modeling.</br>

In [8]:
df = pd.read_csv('IMDB-Dataset.csv', names=["text","label"])

df['text'] = df['text'].str.lower().str.split()

le = LabelEncoder()
df['label'] = le.fit_transform(df['label'])

train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)

vocab = {word for phrase in df['text'] for word in phrase}
word_to_idx = {word: idx for idx, word in enumerate(vocab, start=1)}

max_length = df['text'].str.len().max()

def encode_and_pad(text):
    encoded = [word_to_idx[word] for word in text]
    return encoded + [0] *   (max_length - len(encoded))

train_data['text'] = train_data['text'].apply(encode_and_pad)
test_data['text'] = test_data['text'].apply(encode_and_pad)

In [9]:
df

Unnamed: 0,text,label
0,[review],2
1,"[one, of, the, other, reviewers, has, mentione...",1
2,"[a, wonderful, little, production., <br, /><br...",1
3,"[i, thought, this, was, a, wonderful, way, to,...",1
4,"[basically, there's, a, family, where, a, litt...",0
...,...,...
49996,"[i, thought, this, movie, did, a, down, right,...",1
49997,"[bad, plot,, bad, dialogue,, bad, acting,, idi...",0
49998,"[i, am, a, catholic, taught, in, parochial, el...",0
49999,"[i'm, going, to, have, to, disagree, with, the...",0
