# **LAUNCH TENSORBOARD ON KAGGLE**

In [None]:
# Clear any logs from previous runs
!rm -rf ./logs/ 
!mkdir ./logs/

In [None]:
# From Github Gist: https://gist.github.com/hantoine/4e7c5bc6748861968e61e60bab89e9b0
from urllib.request import urlopen
from io import BytesIO
from zipfile import ZipFile
from subprocess import Popen
from os import chmod
from os.path import isfile
import json
import time
import psutil

def download_and_unzip(url, extract_to='.'):
    http_response = urlopen(url)
    zipfile = ZipFile(BytesIO(http_response.read()))
    zipfile.extractall(path=extract_to)


def run_cmd_async_unsafe(cmd):
    return Popen(cmd, shell=True)


def is_process_running(process_name):
    running_process_names = (proc.name() for proc in psutil.process_iter())
    return process_name in running_process_names

def launch_tensorboard():
    tb_process, ngrok_process = None, None
    
    # Launch TensorBoard
    if not is_process_running('tensorboard'):
        tb_command = 'tensorboard --logdir ./logs/ --host 0.0.0.0 --port 6006'
        tb_process = run_cmd_async_unsafe(tb_command)
    
    # Install ngrok
    if not isfile('./ngrok'):
        ngrok_url = 'https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip'
        download_and_unzip(ngrok_url)
        chmod('./ngrok', 0o755)

    # Create ngrok tunnel and print its public URL
    if not is_process_running('ngrok'):
        ngrok_process = run_cmd_async_unsafe('./ngrok http 6006')
        time.sleep(1) # Waiting for ngrok to start the tunnel
    ngrok_api_res = urlopen('http://127.0.0.1:4040/api/tunnels', timeout=10)
    ngrok_api_res = json.load(ngrok_api_res)
    assert len(ngrok_api_res['tunnels']) > 0, 'ngrok tunnel not found'
    tb_public_url = ngrok_api_res['tunnels'][0]['public_url']
    print(f'TensorBoard URL: {tb_public_url}')

    return tb_process, ngrok_process

tb_process, ngrok_process = launch_tensorboard()

# **START WORKING WITH TEXTS**

In [None]:
import torch
import torch.nn as nn
import pandas as pd
import torch.nn.functional as F
import torch.optim as optim
from tqdm.notebook import tqdm
from torch.utils.data import DataLoader, Dataset
from torch.utils.tensorboard import SummaryWriter
from sklearn.feature_extraction.text import CountVectorizer

In [None]:
DATA_PATH = "../input/imdb-dataset-of-50k-movie-reviews/IMDB Dataset.csv"
BATCH_SIZE = 4096
LEARNING_RATE = 1e-3
NUM_EPOCHS = 25
# what epochs should go with no improvement to decrease lr
PATIENCE = NUM_EPOCHS/5

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
pd.read_csv(DATA_PATH).sample(5)

In [None]:
class Reviews(Dataset):
    def __init__(self, path):
        df = pd.read_csv(path)
        self.vectorizer = CountVectorizer(stop_words='english', max_df=0.99, min_df=0.005)
        self.sequences = self.vectorizer.fit_transform(df.review.tolist())
        self.labels = df.sentiment.replace(['positive', 'negative'], [1, 0]).tolist()
        self.token2idx = self.vectorizer.vocabulary_
        self.idx2token = {idx: token for token, idx in self.token2idx.items()}
        
    def __getitem__(self, i):
        return self.sequences[i, :].toarray(), self.labels[i]
    
    def __len__(self):
        return self.sequences.shape[0]

In [None]:
dataset = Reviews(DATA_PATH)
loader = DataLoader(dataset, batch_size=BATCH_SIZE)
print(dataset[5][0])

In [None]:
class BagOfWordsClassifier(nn.Module):
    def block(self, in_p, out_p):
        return nn.Sequential(
            nn.Linear(hidden[x-1], hidden[x]),
            nn.ReLU()
        )
    
    def __init__(self, hidden):
        super(BagOfWordsClassifier, self).__init__()
        self.len_params = len(hidden)
        self.model = nn.Sequential(
            *[nn.Linear(hidden[x-1], hidden[x]) for x in range(1, self.len_params)],
            nn.Linear(hidden[self.len_params-1], 1),
            nn.Sigmoid()
        )
        
    def forward(self, data):
        return self.model(data.squeeze(1).float())

In [None]:
model = BagOfWordsClassifier([len(dataset.token2idx), 128, 64]).to(device)
model

In [None]:
criterion = nn.BCELoss()
optimizer = optim.Adam([p for p in model.parameters() if p.requires_grad], lr=LEARNING_RATE)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', patience=PATIENCE, min_lr=1e-6, eps=1e-08)

In [None]:
writer_sum = SummaryWriter("logs/scratch")
writer = SummaryWriter("logs/desc")

In [None]:
model.train()
train_losses = []
for epochs in tqdm(range(NUM_EPOCHS)):
    losses = []
    total = 0
    for inputs, labels in tqdm(loader, leave=False):
        model.zero_grad()
        inputs = inputs.to(device)
        labels = labels.to(device)
        predicted = model(inputs)
        loss = criterion(predicted.squeeze(), labels.float())
        loss.backward()
        ##########################
        nn.utils.clip_grad_norm_(model.parameters(), 3)
        ##########################
        optimizer.step()
        losses.append(loss.item())
        total += 1
        writer.add_scalar("Loss all", loss.item())
        
    current_train_loss = sum(losses)/total
    scheduler.step(current_train_loss)
    train_losses.append(current_train_loss)
    writer_sum.add_scalar("Loss summary", current_train_loss)

In [None]:
def predict_sentiment(text):
    model.eval()
    with torch.no_grad():
        test_vector = torch.LongTensor(dataset.vectorizer.transform([text]).toarray()).to(device)

        prediction = model(test_vector).item()

        if prediction > 0.5:
            print(f'{prediction:0.3}: Positive sentiment')
        else:
            print(f'{prediction:0.3}: Negative sentiment')

In [None]:
test_text = """
This poor excuse for a movie is terrible. It has been 'so good it's bad' for a
while, and the high ratings are a good form of sarcasm, I have to admit. But
now it has to stop. Technically inept, spoon-feeding mundane messages with the
artistic weight of an eighties' commercial, hypocritical to say the least, it
deserves to fall into oblivion. Mr. Derek, I hope you realize you are like that
weird friend that everybody know is lame, but out of kindness and Christian
duty is treated like he's cool or something. That works if you are a good
decent human being, not if you are a horrible arrogant bully like you are. Yes,
Mr. 'Daddy' Derek will end on the history books of the internet for being a
delusional sour old man who thinks to be a good example for kids, but actually
has a poster of Kim Jong-Un in his closet. Destroy this movie if you all have a
conscience, as I hope IHE and all other youtube channel force-closed by Derek
out of SPITE would destroy him in the courts.This poor excuse for a movie is
terrible. It has been 'so good it's bad' for a while, and the high ratings are
a good form of sarcasm, I have to admit. But now it has to stop. Technically
inept, spoon-feeding mundane messages with the artistic weight of an eighties'
commercial, hypocritical to say the least, it deserves to fall into oblivion.
Mr. Derek, I hope you realize you are like that weird friend that everybody
know is lame, but out of kindness and Christian duty is treated like he's cool
or something. That works if you are a good decent human being, not if you are a
horrible arrogant bully like you are. Yes, Mr. 'Daddy' Derek will end on the
history books of the internet for being a delusional sour old man who thinks to
be a good example for kids, but actually has a poster of Kim Jong-Un in his
closet. Destroy this movie if you all have a conscience, as I hope IHE and all
other youtube channel force-closed by Derek out of SPITE would destroy him in
the courts.
"""
predict_sentiment(test_text)