In [1]:
!pip install gradio
import gradio as gr

Collecting gradio
  Downloading gradio-4.44.1-py3-none-any.whl.metadata (15 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0 (from gradio)
  Downloading fastapi-0.115.0-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.3.0 (from gradio)
  Downloading gradio_client-1.3.0-py3-none-any.whl.metadata (7.1 kB)
Collecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting orjson~=3.0 (from gradio)
  Downloading orjson-3.10.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.4/50.4 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.9 (from g

In [2]:
!pip install torchtext
!pip install torch==2.3.0 torchvision==0.18.0 torchaudio==2.3.0

Collecting torchtext
  Downloading torchtext-0.18.0-cp310-cp310-manylinux1_x86_64.whl.metadata (7.9 kB)
Downloading torchtext-0.18.0-cp310-cp310-manylinux1_x86_64.whl (2.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m25.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torchtext
Successfully installed torchtext-0.18.0
Collecting torch==2.3.0
  Downloading torch-2.3.0-cp310-cp310-manylinux1_x86_64.whl.metadata (26 kB)
Collecting torchvision==0.18.0
  Downloading torchvision-0.18.0-cp310-cp310-manylinux1_x86_64.whl.metadata (6.6 kB)
Collecting torchaudio==2.3.0
  Downloading torchaudio-2.3.0-cp310-cp310-manylinux1_x86_64.whl.metadata (6.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch==2.3.0)
  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch==2.3.0)
  Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux

In [3]:
import torch
import torchtext
from torchtext import vocab
from torchtext import data
import torch.optim as optim
import argparse
import os
import pandas as pd
import torch.nn as nn



In [4]:
# The first time you run this will download a 862MB size file to .vector_cache/glove.6B.zip
glove = torchtext.vocab.GloVe(name="6B",dim=100) # embedding size = 100

.vector_cache/glove.6B.zip: 862MB [02:39, 5.41MB/s]                           
100%|█████████▉| 399999/400000 [00:27<00:00, 14731.74it/s]


In [9]:
#define the baseline model
class baselineModel(torch.nn.Module):
    def __init__(self, vocab, embedding_size=100):
        super().__init__()
        # convert word tokens into word vectors
        self.embeddings = nn.Embedding.from_pretrained(vocab.vectors)

        # prediction function takes aveaged embedding as input, and output a scalar
        self.linear = nn.Linear(embedding_size, 1)

    def forward(self, x):
        # Now shape transposed to (batch_size, length)
        x = x.transpose(0, 1)

        #(batch_size, length, embedding_dim)
        embedded = self.embeddings(x)

        # take average
        #(batch_size, embedding_dim)
        avg = torch.mean(embedded, dim=1)

        # linear layer
        # (batch_size, 1)
        output = self.linear(avg)

        #(batch_size)
        return output.reshape(-1)

In [10]:
#define the CNN model(freeze = False)
class cnnFreezeFalseModel(torch.nn.Module):
    def __init__(self, vocab, embedding_size, k1, k2, n1, n2):
        super().__init__()
        # convert word tokens into word vectors
        self.embeddings = nn.Embedding.from_pretrained(vocab.vectors, freeze = False)

        # 2 convolutional layers
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=n1, kernel_size=(k1, embedding_size), bias=False)
        self.conv2 = nn.Conv2d(in_channels=1, out_channels=n2, kernel_size=(k2, embedding_size), bias=False)

        # activation and maxpool
        self.relu = nn.ReLU()
        self.maxpool = nn.AdaptiveMaxPool1d(1)

        # takes concatenated as input, and output a scalar
        self.linear = nn.Linear(n1+n2, 1)

    def forward(self, x):
        # Now shape transposed to (batch_size, length)
        x = x.transpose(0, 1)

        #(batch_size, length, embedding_dim)
        embedded = self.embeddings(x)

        # add dimension = 1 for convolution
        #(batch_size, 1, length, embedding_dim)
        embedded = embedded.unsqueeze(1)

        #(batch_size, n1, length-k1+1, 1)
        x1 = self.conv1(embedded)
        x1 = self.relu(x1)
        #(batch_size, n1, 1)
        x1 = self.maxpool(x1.squeeze(3))
        #(batch_size, n1)
        x1 = x1.squeeze(2)

        #(batch_size, n2, length-k2+1, 1)
        x2 = self.conv2(embedded)
        x2 = self.relu(x2)
        #(batch_size, n2, 1)
        x2 = self.maxpool(x2.squeeze(3))
        #(batch_size, n2)
        x2 = x2.squeeze(2)

        #(batch_size, n1+n2)
        concatenated = torch.cat((x1, x2), dim=1)
        #(batch_size, 1)
        output = self.linear(concatenated)
        output = torch.nn.Sigmoid()(output)

        #(batch_size)
        return output.reshape(-1)

In [15]:
checkpoint = torch.load('/content/baseline.pt')
baseline = baselineModel(glove,100)
baseline.load_state_dict(checkpoint)
baseline.eval()

baselineModel(
  (embeddings): Embedding(400000, 100)
  (linear): Linear(in_features=100, out_features=1, bias=True)
)

In [16]:
checkpoint = torch.load('/content/cnn.pt', map_location=torch.device('cpu'))
cnn = cnnFreezeFalseModel(glove,100,2,4,50,80)
cnn.load_state_dict(checkpoint)
cnn.eval()

cnnFreezeFalseModel(
  (embeddings): Embedding(400000, 100)
  (conv1): Conv2d(1, 50, kernel_size=(2, 100), stride=(1, 1), bias=False)
  (conv2): Conv2d(1, 80, kernel_size=(4, 100), stride=(1, 1), bias=False)
  (relu): ReLU()
  (maxpool): AdaptiveMaxPool1d(output_size=1)
  (linear): Linear(in_features=130, out_features=1, bias=True)
)

In [49]:
def predict(sentence):
    tokens = sentence.split()
    # Covert to integer representation per token
    token_ints = [glove.stoi.get(tok, len(glove.stoi)-1) for tok in tokens]
    # Covert into a tensor of the shape accepted by the models
    token_tensor = torch.LongTensor(token_ints).view(-1,1)

    baseline_prediction = torch.nn.Sigmoid()(baseline(token_tensor)).item()
    baseline_result = "Subjective" if baseline_prediction > 0.5 else "Objective"
    cnn_prediction = cnn(token_tensor).item()
    cnn_result = "Subjective" if cnn_prediction > 0.5 else "Objective"
    # cnn_prediction = cnn(token_tensor).item()

    output = "baseline determines it as " + str(baseline_result) + " with probability "+ str(baseline_prediction)
    output += " cnn determines it as " + str(cnn_result) + " with probability " + str(cnn_prediction)
    return output

In [50]:
demo = gr.Interface(
    fn = predict,
    inputs = "text",
    outputs = "text")
demo.launch()

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://e10158b8169676bcf6.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


