# **How to use BOLT,**
# **fast training,**
# **benefits of sparse models,**
# **and fast inference.**

## Learning the syntax with simple exercise: MNIST

In [None]:
# TODO(Geordie): Add download scripts and change to relative path
from thirdai import dataset

mnist_train = dataset.load_bolt_svm_dataset(
    filename="datasets/mnist/mnist", 
    batch_size=256)

mnist_test = dataset.load_bolt_svm_dataset(
    filename="datasets/mnist/mnist.t", 
    batch_size=256)


## We'll use a simple neural network with 784-dim input, 1000-dim hidden layer with ReLU, and 10-dim output layer with Softmax.

### **Keras**

In [None]:
from tensorflow import keras

keras_layers = [
    keras.layers.Dense(
        units=1000, 
        activation='relu', 
        input_shape=(784,)),
        
    keras.layers.Dense(
        units=10, 
        activation='softmax')
]

keras_model = keras.Sequential(layers=keras_layers)

### **BOLT**

In [None]:
from thirdai import bolt

mnist_layers = [
    bolt.LayerConfig(
        dim=1000, 
        activation_function=bolt.ActivationFunctions.ReLU),
    
    bolt.LayerConfig(
        dim=10, 
        activation_function=bolt.ActivationFunctions.Softmax)
]

mnist_network = bolt.Network(
    layers=mnist_layers, 
    input_dim=784)

## We now train the network with categorical cross entropy loss function. We'll measure how we do with the categorical accuracy metric.

In [None]:
mnist_network.train(
    train_data=mnist_train, 
    loss_fn=bolt.CategoricalCrossEntropyLoss(), 
    learning_rate=0.001, 
    epochs=1)

mnist_network.predict(
    test_data=mnist_test, 
    metrics=["categorical_accuracy"], 
    verbose=True)

## That's neat, but what about bigger models?

In [None]:
# TODO(Geordie): Add download scripts and change to relative path
intent_class_train = dataset.load_bolt_svm_dataset(
    filename="datasets/intent_classification/train_shuf.svm", 
    batch_size=256)

intent_class_test = dataset.load_bolt_svm_dataset(
    filename="datasets/intent_classification/test_shuf.svm", 
    batch_size=256)

## Use `load_factor` to set the computational budget. <br> BOLT curates the best small network for each sample to accelerate training.

In [None]:
bigger_layers = [
    bolt.LayerConfig(
        dim=10000, 
        load_factor=0.05, 
        activation_function=bolt.ActivationFunctions.ReLU),
    
    bolt.LayerConfig(
        dim=151, 
        activation_function=bolt.ActivationFunctions.Softmax)
]

bigger_network = bolt.Network(
    layers=bigger_layers, 
    input_dim=5512)

## You can also use sparsity to accelerate inference using `enable_sparse_inference()`.

In [None]:
bigger_network.train(
    train_data=intent_class_train, 
    loss_fn=bolt.CategoricalCrossEntropyLoss(), 
    learning_rate=0.001, 
    epochs=2)

bigger_network.enable_sparse_inference()

bigger_network.train(
    train_data=intent_class_train, 
    loss_fn=bolt.CategoricalCrossEntropyLoss(), 
    learning_rate=0.001, 
    epochs=1)

bigger_network.predict(
    test_data=intent_class_test, 
    metrics=["categorical_accuracy"], 
    verbose=True)

## Even larger model for accurate sentiment classification.

In [None]:
train_data = dataset.load_bolt_svm_dataset(
    filename="../sa_demo/text_data/yelp_review_full_2class_train.svm", 
    batch_size=1024)

test_data = dataset.load_bolt_svm_dataset(
    filename="../sa_demo/text_data/yelp_review_full_2class_test.svm", 
    batch_size=256)

## Supporting 100,000-dim rich input features through sparsity.

In [None]:
yelp_sentiment_analysis_layers = [
    
    bolt.LayerConfig(dim=2000, 
        load_factor=0.2, 
        activation_function=bolt.ActivationFunctions.ReLU),
    
    bolt.LayerConfig(dim=2,
        load_factor=1.0, 
        activation_function=bolt.ActivationFunctions.Softmax)     
]

yelp_sentiment_analysis_network = bolt.Network(
    layers=yelp_sentiment_analysis_layers, 
    input_dim=100000)

## Train a model once and save it for later with `save()`.

In [None]:
# TODO(Geordie): Add download scripts and change to relative path


yelp_sentiment_analysis_network.train(
    train_data=train_data,
    loss_fn=bolt.CategoricalCrossEntropyLoss(), 
    learning_rate=0.0001, 
    epochs=20, 
    rehash=6400, 
    rebuild=128000)

yelp_sentiment_analysis_network.save(filename="yelp_sentiment_analysis_cp")

## Use the saved model with `load()`.

In [None]:
yelp_sentiment_analysis_network = bolt.Network.load(filename="yelp_sentiment_analysis_cp")

## RoBERTa: 83% accuracy. Let's see how BOLT does!

In [None]:
# TODO(Geordie): Add download scripts and change to relative path
res = yelp_sentiment_analysis_network.predict(
    test_data=test_data, 
    metrics=["categorical_accuracy"], 
    verbose=True)

## We also trained an even larger **2 billion parameter** model on a large text corpus to build an interactive sentiment analysis demo.

In [None]:
# TODO(Geordie): Add download scripts and change to relative path
sentiment_analysis_network = bolt.Network.load(filename="interactive_demo_cp")

## Let's run this!

In [None]:
import interactive_sentiment_analysis
interactive_sentiment_analysis.demo(sentiment_analysis_network, verbose=False)

## Let's talk speed. How much faster is BOLT compared to RoBERTa?

In [None]:
import time
from transformers import pipeline
sentiment_analysis = pipeline("sentiment-analysis",model="siebert/sentiment-roberta-large-english")
t1 = time.time()
out = sentiment_analysis("I love chocolate.")
t2 = time.time()
print(out, flush=True)
print('time elapsed: ',str(t2-t1),'s', flush=True)