# General

In [None]:
!nvidia-smi # Check what gpu you are connected with

# Installments

## General

In [None]:
# Import generally needed modules
import torch # Imports torch

from math import sqrt # square root
import numpy as np # Handels conversion from image to tensor
import random

import requests

from bs4 import BeautifulSoup

from PIL import Image # Handels images

import os, sys # Handels interacting with the operating system

## Transformers

In [None]:
# Pip install necessary modules for working with transformers at high and low level
!pip install datasets
!pip install transformers[sentencepiece]

In [None]:
# Import specific classes from transformers module
from transformers import AutoTokenizer, DataCollatorWithPadding, TrainingArguments, Trainer
from transformers import BertForSequenceClassification

# Imports for handeling datasets
from datasets import load_dataset, dataset_dict, Dataset

In [None]:
# Function for computing metrics
from sklearn.metrics import accuracy_score
def ComputeMetrics(prediction):
    labels = prediction.label_ids
    predictions = prediction.predictions.argmax(-1)
    accuracy = accuracy_score(labels, predictions)
    return { 'accuracy': accuracy }

## Fast AI

In [None]:
!pip install fastai --upgrade -q
!pip install nbdev

In [None]:
from fastai.vision.all import *

# Drive

In [None]:
# Mounts your drive
def MountDrive() -> str:
    from google.colab import drive
    drive_directory:str = "/content/drive"
    drive.mount(drive_directory)
    return drive_directory

# Functions to help with using your drive
def CopyFolder(copy_from:str, copy_to:str):
    %cp -av copy_from copy_to

# Train Transformer using Trainer API

In [None]:
# Model
model_name:str = "bert-base-uncased" # "KB/bert-base-swedish-cased"
max_lenght:int = 128
batch_size:int = 32
use_floating_point_16:bool = False
training_epochs:float = 1.0

In [None]:
# Mount drive
MountDrive()

## Get dataset from huggingface

In [None]:
# Get and show dataset from huggingface
dataset_name:str = "ag_news"
label_list:list = ["World", "Sports", "Business", "Sci/Tech"] # Should be able to extract from data rather than the website lamao
raw_datasets = load_dataset(dataset_name)
print(raw_datasets)

In [None]:
# Tokenize and add structure to dataset
def TokenizeFunction(example):
    return tokenizer(example["text"], truncation=True, padding=True, max_length=max_lenght)

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenized_dataset_train = raw_datasets["train"].map(TokenizeFunction, batched=True)
tokenized_dataset_test = raw_datasets["test"].map(TokenizeFunction, batched=True)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
print(data_collator)

## Create dataset from file

In [None]:
def CreateDatasetFromJSONFile(file_path:str, label_list:list, seperation_key:str = "[SEP]", beggining_key:str = "[CLS]") -> dict:
    import json

    with open(file_path, "r") as json_file:
        json_list:list = list(json_file)

    dataset:dict = {
        "text":[],
        "label":[]
    }

    for json_str in json_list[:]:
        result:dict = json.loads(json_str)

        #result_prittie_print = json.dumps(result, sort_keys=True, indent=4)
        #filtered_result:dict = {key: result[key] for key in result.keys() & {"gold_label", "sentence1", "sentence2"}}

        sub_scentence_1:str = result["sentence1"]
        sub_scentence_2:str = result["sentence2"]
        full_scentence:str = f"{beggining_key} {sub_scentence_1} {seperation_key} {sub_scentence_2} {seperation_key}"
        full_scentence_flipped:str = f"{beggining_key} {sub_scentence_2} {seperation_key} {sub_scentence_1} {seperation_key}"

        label:str = result["gold_label"]
        label_index:int = label_list.index(label)

        dataset["text"].append(full_scentence)
        dataset["label"].append(label_index)

        dataset["text"].append(full_scentence_flipped)
        dataset["label"].append(label_index)

    return dataset

In [None]:
dataset_name:str = "SNLI"
label_list:list = ["entailment", "contradiction", "neutral", "-"]

In [None]:
Get dataset from json file
dataset_path = Download("https://nlp.stanford.edu/projects/snli/snli_1.0.zip")

In [None]:
dev_data:dict = CreateDatasetFromJSONFile("/content/snli_1.0/snli_1.0_dev.jsonl", label_list)
dev_dataset = Dataset.from_dict(dev_data)

test_data:dict = CreateDatasetFromJSONFile("/content/snli_1.0/snli_1.0_test.jsonl", label_list)
test_dataset = Dataset.from_dict(test_data)

train_data:dict = CreateDatasetFromJSONFile("/content/snli_1.0/snli_1.0_train.jsonl", label_list)
train_dataset = Dataset.from_dict(train_data)

In [None]:
# Tokenize and add structure to dataset
def TokenizeFunction(example):
    return tokenizer(example["text"], truncation=True, padding=True, max_length=max_lenght)

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenized_dataset_train = train_dataset.map(TokenizeFunction, batched=True)
tokenized_dataset_test = test_dataset.map(TokenizeFunction, batched=True)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
print(data_collator)

## Create dataset from scraping

In [None]:
def CreateDatasetFromScraping(page_url:str, label_list:list, seperation_key:str = "[SEP]", beggining_key:str = "[CLS]", start_itteration:int = 0, end_itteration:int = 1000): -> dict
    dataset:dict = {
        "text":[],
        "label":[]
    }

    print("page: ", end="")
    itteration:int = start_itteration
    while True:
        page = requests.get(f"{page_url}={itteration}")
        soup = BeautifulSoup(page.content, 'html.parser')
        all_articles_in_page = soup.find_all(class_="inner_article")

        if len(all_articles_in_page) == 0 or end_itteration == itteration:
            break

        print(f"{itteration}, ", end="")
        for article in all_articles_in_page:
            review_title:str = article.find(class_="review_title").contents[0][1:-1]
            review_score:int = len(article.find_all(class_="mz_star_on"))-1
            dataset["text"].append(f"{beggining_key} {review_title} {seperation_key}")
            dataset["label"].append(review_score)
        itteration+=1
    return dataset

In [None]:
dataset_name:str = "moviezine"
label_list:list = ["1", "2", "3", "4", "5"]

In [None]:
raw_dataset_train = CreateDatasetFromScraping("https://www.moviezine.se/recensioner?page", label_list, end_itteration=290)
raw_dataset_test = CreateDatasetFromScraping("https://www.moviezine.se/recensioner?page", label_list, start_itteration=290)

In [None]:
dataset_train = Dataset.from_dict(raw_dataset_train)
dataset_test = Dataset.from_dict(raw_dataset_test)

In [None]:
# Tokenize and add structure to dataset
def TokenizeFunction(example):
    return tokenizer(example["text"], truncation=True, padding=True, max_length=max_lenght)

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenized_datasets = raw_datasets.map(TokenizeFunction, batched=True)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
print(data_collator)

## Create the model

In [None]:
# Create training arguments
training_args = TrainingArguments(f"/content/drive/My Drive/Colab Notebooks/Models/{model_name}/{dataset_name}" if os.path.exists("/content/drive/") else "/content/cached-trainer", evaluation_strategy="epoch")
training_args.per_device_train_batch_size = batch_size
training_args.per_device_eval_batch_size = batch_size
training_args.fp16 = use_floating_point_16                # Tesla K80 CANT UTILIZE AAH
training_args.fp16_full_eval = use_floating_point_16    # Tesla K80 CANT UTILIZE AAH
training_args.num_train_epochs = training_epochs

In [None]:
# Load in model
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=len(label_list)).to("cuda")

In [None]:
# Create a trainer
trainer = Trainer(
    model,
    training_args,
    train_dataset=tokenized_dataset_train,
    eval_dataset=tokenized_dataset_test,
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=ComputeMetrics
)

## Train

In [None]:
# Train
trainer.train()

In [None]:
# Mount drive and copy cached files to drive
if os.path.exists("/content/cached-trainer/"):
    MountDrive()
    CopyFolder("/content/cached-trainer/", f"/content/drive/My Drive/Colab Notebooks/Models/{model_name}/{dataset_name}")

# Creating Fast Ai Backend

## Dataset

### Creating data handler

In [None]:
class Path:
  ''' Handels accessing files from directories '''

  @staticmethod
  def GetPaths(directory: str, file_extension: str = "") -> tuple:
    ''' Returns all paths from given directory that ends with file_extension '''
    paths_list: list = []
    for (directory_path, _, file_names) in os.walk(directory):
      for file_name in file_names:
        if file_name.endswith(file_extension):
          paths_list.append(f"{directory_path}/{file_name}")

    paths: tuple = tuple(paths_list)
    return paths

  @staticmethod
  def GetFilesFromPaths(paths: tuple, FileReaderFunction) -> tuple:
    ''' Reads and returns all files from given paths object '''
    files_list: list = []

    for directory_path in paths:
      file = FileReaderFunction(directory_path)
      files_list.append(file)

    files: tuple = tuple(files_list)
    return files

  @staticmethod
  def GetFilesFromPath(directory: str, FileReaderFunction, data_type: str = "") -> tuple:
    ''' Reads and returns all files from given directory which file names ends with data_type '''
    paths: tuple = GetPaths(directory, data_type)
    dataset: tuple = GetFilesFromPaths(paths, FileReaderFunction)
    return dataset

In [None]:
class Annotations:
  ''' Handels accessing files from directories '''

  @staticmethod
  def GetAnnotationsByDirectory(directory: str) -> dict:
    ''' Returns a mapped annotations to indices dictionary by hierarchy of given directory '''
    annotations: dict = {}
    value: int = 0

    for (_, parrent_names, _) in os.walk(directory):
      for parrent_name in parrent_names:
        annotations[parrent_name] = value
        value += 1

    return annotations

In [None]:
class FileReader:
  ''' Handels reading of files '''
  
  @staticmethod
  def ReadImageFile(path: str, annotation_to_vocab: dict) -> tuple:
    ''' Reads image files '''
    image = Image.open(path)
    image_resized = image.resize((64,64))
    rgb_image = image_resized.convert('RGB')
    image_to_tensor: torch.tensor = torch.from_numpy(np.float16(rgb_image) / 256)

    annotation: str = path.split("/")[-2]
    annotation_vocab: int = annotation_to_vocab[annotation]
    annotation_tensor: torch.tensor = torch.tensor(annotation_vocab)
    
    return (image_to_tensor, annotation_tensor)

In [None]:
class Dataset:
  ''' Class that handels your dataset '''

  def __init__(self, data_directory_paths: tuple, annotation_vocab: dict, FileReaderFunction):
    ''' Stores data directory paths and the prefered file reader for data access '''
    self.data_directory_paths: tuple = data_directory_paths;
    self.FileReaderFunction = FileReaderFunction;
    self.annotation_vocab: dict = annotation_vocab

  def Length(self) -> int:
    ''' Returns lenght of dataset '''
    return len(self.data_directory_paths)

  def GetItem(self, item_index: int) -> tuple:
    ''' Returns item at given index '''
    item_directory_path: str = self.data_directory_paths[item_index]
    return self.FileReaderFunction(item_directory_path, self.annotation_vocab)

  def GetBatch(self, item_indices: list, index_shift: int = 0) -> tuple:
    ''' Returns a stacked tensor of each item in item_indices from dataset '''
    data_tensors: list = []
    annotations: list = []

    for item_index in item_indices:
      item: tuple = self.GetItem(item_index + index_shift)
      data_tensors.append(item[0])
      annotations.append(item[1])

    data_collective: torch.tensor = torch.stack(data_tensors)
    annotations_collective: torch.tensor = torch.stack(annotations)

    return (data_collective, annotations_collective)

In [None]:
class DataLoader:
  ''' Class working for Dataset '''
  def __init__(self, dataset: Dataset, batch_size: int = 128, shuffle: bool = False, thread_count: int = 1):
    self.dataset: Dataset = dataset
    self.batch_size: int = batch_size
    self.chunk_size: int = (self.dataset.Length() - 1) // self.batch_size + 1
    self.shuffle: bool = shuffle
    self.thread_count: int = thread_count

  def __len__(self):
    return self.chunk_size

  def __iter__(self) -> iter:
    ''' Divides dataset into chunks the size of batch_size '''
    all_indices: list = [*range(self.dataset.Length())]
    chunks: list = []

    if self.shuffle:
      random.shuffle(all_indices)

    for chunk_index in range(self.chunk_size):
      range_from: int = chunk_index * self.batch_size
      range_to: int = (chunk_index + 1) * self.batch_size

      chunks.append(all_indices[range_from:range_to])
    
    data_chunks: list = []
    for chunk in chunks:
      data_chunks.append(self.dataset.GetBatch(chunk))

    # return iter(data_chunks)
    with concurrent.futures.ProcessPoolExecutor(self.thread_count) as thread:
      yield from thread.map(self.dataset.GetBatch, chunks)

class DataLoaders:
    def __init__(self, *data_loaders: DataLoader):
      self.train, self.valid = data_loaders

In [None]:
def Download(url: str, target_directory: str = "/content/") -> str:
  ''' Downloads and untars url content to target directory folder and returns path to content '''
  !mkdir $target_directory

  !wget $url
  file_name: str = url.split("/")[-1]
  file_name_no_extension: str = file_name.split(".")[0]
  file_tar_type: str = file_name.split(".")[-1]

  downloaded_file_directory: str = "/content/" + file_name
  if (file_tar_type == "zip"):
    !unzip $downloaded_file_directory
  elif (file_tar_type == "tgz"):
    !tar -xvzf $downloaded_file_directory -C $target_directory
  else:
    print("tar prefix not handleable")
    raise ValueError;
  !rm $downloaded_file_directory
  
  return target_directory + file_name_no_extension 

In [None]:
# This cell is not mine so i do not understand it fully :'(
def to_device(b, device=None, non_blocking=False):
  "Recursively put `b` on `device`."
  if defaults.use_cuda==False: device='cpu'
  elif device is None: device=default_device()
  def _inner(o):
      if isinstance(o,Tensor): return o.to(device, non_blocking=non_blocking)
      return o
  return apply(_inner, b)

class Normalize:
  def __init__(self, stats): self.stats=stats
  def __call__(self, x):
    if x.device != self.stats[0].device:
      self.stats = to_device(self.stats, x.device)
    return (x-self.stats[0])/self.stats[1]

### Using created data handler

In [None]:
# Download dataset
dataset_path = Download("https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz")

In [None]:
# Serilize dataset into a dataloader
paths: tuple = Path.GetPaths(dataset_path, ".jpg")
annotation_vocab: dict = Annotations.GetAnnotationsByDirectory(dataset_path)
dataset: Dataset = Dataset(paths, annotation_vocab, FileReader.ReadImageFile)
batch: tuple = dataset.GetBatch([1, 2])

data_loader: DataLoader = DataLoader(dataset, 128, True)

data_loaders: DataLoaders = DataLoaders(data_loader, data_loader)

batches: tuple = data_loader.__iter__()

# idk = data_loader.Batches()

# print(idk[0][0].shape)
# print(idk[0][1])

# print(dataset.GetItem(0)[0].shape)

print(batch[0].shape)
print(batch[1])
print(dataset.annotation_vocab)
print(dataset.Length())

In [None]:
# What was stated above still holds and i do not understand this cell :'(
stats = [batch[0].mean((0,1,2)), batch[0].std((0,1,2))]
print(stats)
norm = Normalize(stats)
def tfm_x(x): return norm(x).permute((0,3,1,2))
t = tfm_x(batch[0])
print([t.mean((0,2,3)),t.std((0,2,3))])

## Neural Nets

### Creating Neural Nets

In [None]:
# Theese next code cells are not mine so i do not understand them fully :'(
class LayerFunction():
  def __call__(self, *args):
    self.args = args
    self.out = self.forward(*args)
    return self.out

  def forward(self):
    raise Exception('not implemented')
  def bwd(self):
    raise Exception('not implemented')
  def backward(self):
    self.bwd(self.out, *self.args)

class Relu(LayerFunction):
    def forward(self, inp): return inp.clamp_min(0.)
    def bwd(self, out, inp): inp.g = (inp>0).float() * out.g

class Lin(LayerFunction):
    def __init__(self, w, b): self.w,self.b = w,b
        
    def forward(self, inp): return inp@self.w + self.b
    
    def bwd(self, out, inp):
        inp.g = out.g @ self.w.t()
        self.w.g = inp.t() @ self.out.g
        self.b.g = out.g.sum(0)

class Mse(LayerFunction):
    def forward (self, inp, targ): return (inp.squeeze() - targ).pow(2).mean()
    def bwd(self, out, inp, targ): 
        inp.g = 2*(inp.squeeze()-targ).unsqueeze(-1) / targ.shape[0]

class Model():
  def __init__(self, w1, b1, w2, b2):
    self.layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]
    self.loss = Mse()
      
  def __call__(self, x, targ):
    for l in self.layers:
      x = l(x)
    return self.loss(x, targ)
  
  def backward(self):
    self.loss.backward()
    for l in reversed(self.layers):
      l.backward()

In [None]:
class MyRelu(torch.autograd.Function):
    @staticmethod
    def forward(ctx, i):
        result = i.clamp_min(0.)
        ctx.save_for_backward(i)
        return result
    
    @staticmethod
    def backward(ctx, grad_output):
        i, = ctx.saved_tensors
        return grad_output * (i>0).float()

class LinearLayer(torch.nn.Module):
    def __init__(self, n_in, n_out):
        super().__init__()
        self.weight = torch.nn.Parameter(torch.randn(n_out, n_in) * sqrt(2/n_in))
        self.bias = torch.nn.Parameter(torch.zeros(n_out))
    
    def forward(self, x): return x @ self.weight.t() + self.bias

class Model(torch.nn.Module):
    def __init__(self, n_in, nh, n_out):
        super().__init__()
        self.layers = torch.nn.Sequential(
            torch.nn.Linear(n_in,nh), torch.nn.ReLU(), torch.nn.Linear(nh,n_out))
        self.loss = mse
        
    def forward(self, x, targ): return self.loss(self.layers(x).squeeze(), targ)

In [None]:
class Parameter(Tensor):
    def __new__(self, x): return Tensor._make_subclass(Parameter, x, True)
    def __init__(self, *args, **kwargs): self.requires_grad_()

In [None]:
class Module:
    def __init__(self):
        self.hook,self.params,self.children,self._training = None,[],[],False
        
    def register_parameters(self, *ps): self.params += ps
    def register_modules   (self, *ms): self.children += ms
        
    @property
    def training(self): return self._training
    @training.setter
    def training(self,v):
        self._training = v
        for m in self.children: m.training=v
            
    def parameters(self):
        return self.params + sum([m.parameters() for m in self.children], [])

    def __setattr__(self,k,v):
        super().__setattr__(k,v)
        if isinstance(v,Parameter): self.register_parameters(v)
        if isinstance(v,Module):    self.register_modules(v)
        
    def __call__(self, *args, **kwargs):
        res = self.forward(*args, **kwargs)
        if self.hook is not None: self.hook(res, args)
        return res
    
    def cuda(self):
        for p in self.parameters(): p.data = p.data.cuda()

In [None]:
class ConvLayer(Module):
    def __init__(self, ni, nf, stride=1, bias=True, act=True):
        super().__init__()
        self.w = Parameter(torch.zeros(nf,ni,3,3))
        self.b = Parameter(torch.zeros(nf)) if bias else None
        self.act,self.stride = act,stride
        init = nn.init.kaiming_normal_ if act else nn.init.xavier_normal_
        init(self.w)
    
    def forward(self, x):
        x = F.conv2d(x, self.w, self.b, stride=self.stride, padding=1)
        if self.act: x = F.relu(x)
        return x

class Linear(Module):
    def __init__(self, ni, nf):
        super().__init__()
        self.w = Parameter(torch.zeros(nf,ni))
        self.b = Parameter(torch.zeros(nf))
        nn.init.xavier_normal_(self.w)
    
    def forward(self, x): return x@self.w.t() + self.b

In [None]:
class T(Module):
    def __init__(self):
        super().__init__()
        self.c,self.l = ConvLayer(3,4),Linear(4,2)

In [None]:
class Sequential(Module):
    def __init__(self, *layers):
        super().__init__()
        self.layers = layers
        self.register_modules(*layers)

    def forward(self, x):
        for l in self.layers: x = l(x)
        return x

In [None]:
class AdaptivePool(Module):
    def forward(self, x): return x.mean((2,3))

In [None]:
def nll(input, target): return -input[range(target.shape[0]), target].mean()

def logsumexp(x):
    m = x.max(-1)[0]
    return m + (x-m[:,None]).exp().sum(-1).log()

def log_softmax(x): return x - x.logsumexp(-1,keepdim=True)

def cross_entropy(preds, yb): return nll(log_softmax(preds), yb).mean()

In [None]:
class Learner:
    def __init__(self, model, dls, loss_func, lr, cbs, opt_func=SGD):
        store_attr()
        for cb in cbs: cb.learner = self

    def one_batch(self):
        self('before_batch')
        xb,yb = self.batch
        self.preds = self.model(xb)
        self.loss = self.loss_func(self.preds, yb)
        if self.model.training:
            self.loss.backward()
            self.opt.step()
        self('after_batch')

    def one_epoch(self, train):
        self.model.training = train
        self('before_epoch')
        dl = self.dls.train if train else self.dls.valid
        for self.num,self.batch in enumerate(progress_bar(dl, leave=False)):
            self.one_batch()
        self('after_epoch')
    
    def fit(self, n_epochs):
        self('before_fit')
        self.opt = self.opt_func(self.model.parameters(), self.lr)
        self.n_epochs = n_epochs
        try:
            for self.epoch in range(n_epochs):
                self.one_epoch(True)
                self.one_epoch(False)
        except CancelFitException: pass
        self('after_fit')
        
    def __call__(self,name):
        for cb in self.cbs: getattr(cb,name,noop)()

In [None]:
class Callback(GetAttr): _default='learner'

In [None]:
class SetupLearnerCB(Callback):
    def before_batch(self):
        xb,yb = to_device(self.batch)
        self.learner.batch = tfm_x(xb),yb

    def before_fit(self): self.model.cuda()

In [None]:
class TrackResults(Callback):
    def before_epoch(self): self.accs,self.losses,self.ns = [],[],[]
        
    def after_epoch(self):
        n = sum(self.ns)
        print(self.epoch, self.model.training,
              sum(self.losses).item()/n, sum(self.accs).item()/n)
        
    def after_batch(self):
        xb,yb = self.batch
        acc = (self.preds.argmax(dim=1)==yb).float().sum()
        self.accs.append(acc)
        n = len(xb)
        self.losses.append(self.loss*n)
        self.ns.append(n)

In [None]:
class OneCycle(Callback):
    def __init__(self, base_lr): self.base_lr = base_lr
    def before_fit(self): self.lrs = []

    def before_batch(self):
        if not self.model.training: return
        n = len(self.dls.train)
        bn = self.epoch*n + self.num
        mn = self.n_epochs*n
        pct = bn/mn
        pct_start,div_start = 0.25,10
        if pct<pct_start:
            pct /= pct_start
            lr = (1-pct)*self.base_lr/div_start + pct*self.base_lr
        else:
            pct = (pct-pct_start)/(1-pct_start)
            lr = (1-pct)*self.base_lr
        self.opt.lr = lr
        self.lrs.append(lr)

### Using created Neural Nets

In [None]:
# Theese next code cells are not mine so i do not understand them fully :'(
x = torch.randn(200, 100)
y = torch.randn(200)

In [None]:
w1 = torch.randn(100,50) * sqrt(2 / 100)
b1 = torch.zeros(50)
w2 = torch.randn(50,1) * sqrt(2 / 50)
b2 = torch.zeros(1)

In [None]:
Parameter(torch.zeros(50))

In [None]:
def simple_cnn():
    return Sequential(
        ConvLayer(3 ,16 ,stride=2), #32
        ConvLayer(16,32 ,stride=2), #16
        ConvLayer(32,64 ,stride=2), # 8
        ConvLayer(64,128,stride=2), # 4
        AdaptivePool(),
        Linear(128, 10)
    )

In [None]:
m = simple_cnn()
len(m.parameters())

In [None]:
def print_stats(outp, inp): print (outp.mean().item(),outp.std().item())
for i in range(4): m.layers[i].hook = print_stats
xbt = tfm_x(batch[0])
r = m(xbt)
r.shape

In [None]:
loss = nll(sm, batch[1])
loss

In [None]:
sm = log_softmax(r); sm[0][0]

In [None]:
x = torch.rand(5)
a = x.max()
x.exp().sum().log() == a + (x-a).exp().sum().log()

In [None]:
logsumexp(r)[0]

In [None]:
sm = log_softmax(r); sm[0][0]

In [None]:
cbs = [SetupLearnerCB(),TrackResults()]
learn = Learner(simple_cnn(), data_loaders, cross_entropy, lr=0.1, cbs=cbs)
learn.fit(1)

In [None]:
onecyc = OneCycle(0.1)
learn = Learner(simple_cnn(), data_loaders, cross_entropy, lr=0.1, cbs=cbs+[onecyc])

In [None]:
learn.fit(8)

In [None]:
plt.plot(onecyc.lrs);

# Testing