# Project: Deep Learning - Pytorch

## Project Structure

Your project should be organized into five main sections.

### 1. Package and Module Installation

First, let's pool all package and module that you'll need in the installation section below.

In [None]:
# !pip install torch torchvision torchaudio torchtext scikit-learn pandas numpy matplotlib


In [None]:
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install torchtext

Looking in indexes: https://download.pytorch.org/whl/cu118


### 2. Data Loading and Preprocessing

Load the chosen dataset and preprocess it for deep learning.

In [None]:
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader, TensorDataset

import nltk
nltk.download('punkt')
from nltk.tokenize import word_tokenize

# Tokenization
def tokenizer(text):
    return word_tokenize(text.lower())

# Load your data
df = pd.read_csv('spam.csv', encoding='latin-1')[['v1', 'v2']]
df.columns = ['label', 'text']
df['label'] = df['label'].map({'ham': 0, 'spam': 1})




[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


### 3. Model Building

Define your deep learning model's architecture.

In [None]:
# Write your code here for Model Building here
import torch.nn as nn

class SpamClassifier(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim, output_dim):
        super(SpamClassifier, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.rnn = nn.LSTM(embed_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        embedded = self.embedding(x)
        rnn_out, _ = self.rnn(embedded)
        final_feature_map = rnn_out[:, -1, :]  # get the output of the last time step
        out = self.fc(final_feature_map)
        return out

# Hyperparameters
vocab_size = len(vocab) + 1
embed_dim = 256
hidden_dim = 128
output_dim = 2

model = SpamClassifier(vocab_size, embed_dim, hidden_dim, output_dim)

# define your Deep Learning Model here, training is in the next section.



### 4. Model Training

Train your model and evaluate its performance using validation data.

In [None]:
# Write your code here for Model Training here


#define the iteration
import torch.optim as optim
import torch.nn.functional as F

# Define optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=0.0005)
loss_fn = nn.CrossEntropyLoss()

# Training loop
def train(model, train_loader, optimizer, loss_fn, num_epochs=20):
    model.train()
    global dl_loss_value  # Declare the global variable for the loss value
    for epoch in range(num_epochs):
        total_loss = 0
        for texts, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(texts)
            loss = loss_fn(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        dl_loss_value = total_loss / len(train_loader)  # Store the loss value in dl_loss_value
        print(f'Epoch {epoch + 1}, Loss: {dl_loss_value}')

# Train the model
train(model, train_loader, optimizer, loss_fn, num_epochs=5)


#create the training loop


Epoch 1, Loss: 0.3927784115076065
Epoch 2, Loss: 0.16077557663832392
Epoch 3, Loss: 0.08840025863277592
Epoch 4, Loss: 0.04481233745713585
Epoch 5, Loss: 0.030548392448274952


### 5. Model Evaluation
Evaluate your model's performance on the test data using the grading scheme defined above.

In [None]:
# Write your code here for Model Evaluation here


#define the iteration
from sklearn.metrics import accuracy_score

# Evaluation function with dl_accuracy
def evaluate(model, test_loader):
    model.eval()
    global dl_accuracy  # Declare the global variable for accuracy
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for texts, labels in test_loader:
            outputs = model(texts)
            preds = torch.argmax(outputs, dim=1)
            all_preds.extend(preds.tolist())
            all_labels.extend(labels.tolist())

    dl_accuracy = accuracy_score(all_labels, all_preds) * 100  # Store accuracy in dl_accuracy
    print(f'Test Accuracy: {dl_accuracy:.2f}%')

# Evaluate the model
evaluate(model, test_loader)

#create the training loop


Test Accuracy: 97.13%


In [None]:
# Submit Method

# Do not change the code below
question_id = "01_dl_project_accuracy"
submit(student_id, name, assignment_id, str(dl_accuracy), question_id, drive_link)
question_id = "02_dl_project_loss_value"
submit(student_id, name, assignment_id, str(dl_loss_value), question_id, drive_link)

'Assignment successfully submitted'

## FIN