# GTC Keynote Playlist Generator

## Import libraries

In [37]:
from sentence_transformers import SentenceTransformer, util
import gtc_spring23 as gtc
import pandas as pd
import smart_search

import ipywidgets as widgets

## Notebook variables and Inputs

In [43]:
#model_name = 'msmarco-distilbert-base-v4'
#model_name = 'msmarco-MiniLM-L12-cos-v5'
#model_name = smart_search.sentence_models[2]
#query_string = 'USPS'
playlist_count = 5

In [68]:
query_string = widgets.Text(
    placeholder='Enter Query String',
    description='Query String:',
    disabled=False   
)
query_string

Text(value='', description='Query String:', placeholder='Enter Query String')

In [54]:
model_name = widgets.Dropdown(
    options=smart_search.sentence_models,
    description='Model:',
    disabled=False,
)

model_name

Dropdown(description='Model:', options=('all-distilroberta-v1', 'allenai-specter', 'all-mpnet-base-v1', 'all-m…

## Embed keynote sections, query term, and perform semantic search

In [69]:
%%time

print("Loading model: '{}'".format(model_name.value))
model = SentenceTransformer(model_name.value,cache_folder='../models/')

print("Embedding GTC Sections.")
source_embeddings = model.encode(gtc.section_text,convert_to_tensor=True,show_progress_bar=True)

print("Embedding query.")
query_embedding = model.encode(query_string.value,convert_to_tensor=True)

# Using the util function to run semantic search, default to cosine
topk_results = pd.DataFrame(util.semantic_search(query_embedding, source_embeddings, top_k=playlist_count)[0])

Loading model: 'stsb-mpnet-base-v2'
Embedding GTC Sections.


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Embedding query.
CPU times: user 2.14 s, sys: 89.9 ms, total: 2.23 s
Wall time: 667 ms


## Display the results

In [70]:
print(f"The following top {playlist_count} sections are ranked in order of relevance to the query term: '{query_string.value}' using {model_name.value} as an embedding model\n")

for i in range(playlist_count):
    idx = topk_results['corpus_id'][i]
    print(f"Section: {gtc.section_name[idx]}")
    print(f"Chapter: {gtc.chapter_name[idx]}")
    print(f"Section URL: {gtc.section_url[idx]}")
    print(f"Section Summary (bart-large-cnn): {gtc.section_summary[idx]}")
    print(f"Score: {topk_results.score[i]}\n")

The following top 5 sections are ranked in order of relevance to the query term: 'USPS' using stsb-mpnet-base-v2 as an embedding model

Section: Route Optimization
Chapter: 2) Acceleration Libraries
Section URL: https://youtu.be/DiGB5uAYKAg?t=938
Section Summary (bart-large-cnn): AT&T is tapping into NVIDIA accelerated computing and AI for sustainability, cost savings, and new services. cuOpt can also optimize logistic services. 400 billion parcels are delivered to 377 billion stops each year. Deloitte, Capgemini, Softserve, Accenture, and Quantiphi are using NVIDIA cuOpt to help customers optimize operations.
Score: 0.17746584117412567

Section: Amazon Robotics
Chapter: 7) Omniverse
Section URL: https://youtu.be/DiGB5uAYKAg?t=3526
Section Summary (bart-large-cnn): Amazon Robotics has manufactured and deployed the largest fleet of mobile industrial robots in the world. The newest member of this robotic fleet is Proteus, Amazon's first fully autonomous warehouse robot. Let's see how NVI