First, we need to download book data from AWS:

In [None]:
# Install aws cli and get the data for this course and this task overall. Already done, doesn't need to be run again.
#   !pip install awscli
#   !aws s3 cp s3://applied-nlp-book/data/ data --recursive --no-sign-request
#   !aws s3 cp s3://applied-nlp-book/models/ag_dataset/ models/ag_dataset --recursive --no-sign-request

## Create Directory

In [1]:
# Connect to Google Drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
 
# Make and Switch to Apps Directory
# %mkdir '/content/drive/My Drive/NLP Book Notes/apps'
%cd '/content/drive/My Drive/NLP Book Notes/apps'

Mounted at /content/drive
/content/drive/My Drive/NLP Book Notes/apps


### Build and Deploy Streamlit App (Dataset NER)

In [None]:
# Here, we write a python file into our "apps" directory using jupyter cell magic. 

%%writefile ner_app_agnews.py
# Load libraries
import spacy
import streamlit as st
import spacy_streamlit
import numpy as np
import pandas as pd
import random
 
# Set title
st.title(':star: AG News Dataset')
 
# Define function to read data
@st.cache
def read_data(file):
    read_path = '/content/drive/MyDrive/Applied-NLP-in-the-Enterprise'
    data = pd.read_csv(read_path+file)
    return data
 
# Read data
data = read_data('/data/ag_dataset/train_updated.csv')

# Define function to select category. Using the st.cache decorator just adds that function to cache, and if the same
#   function is called later, with the same parameters, the same cached output will be returned
@st.cache
def select_category(data, category_option):
    return data.loc[data.class_name==category_option]
 
# Set up sidebar widgets using streamlit functions.
st.sidebar.header("Parameters")
display_selections = st.sidebar.multiselect(
 "Which data would you like to display?",
 ["Full Data","Single Article"], None)
 
category_option = st.sidebar.radio(
 'Which category would you like to explore?',
 data.class_name.unique())
 
article_option = st.sidebar.number_input(
 'Which article would you like to explore?',
 data.loc[data.class_name==category_option].index.min(),
 data.loc[data.class_name==category_option].index.max(),
                            data.loc[data.class_name
                                     ==category_option].index.min())

# Set display behavior for data
if "Full Data" in display_selections:
    st.header("Full Data")
    st.write(select_category(data,category_option
                             .loc[:,["title","description"]],
                             width=1980, height=200))

#
if "Single Article" in display_selections:
    st.header("Single Article")
    st.subheader("Title")
    st.write(data.loc[article_option,"title"])
    st.subheader("Description")
    st.write(data.loc[article_option,"description"])

# Set text
default_text = data.loc[article_option,"description"]
 
# Show NER Results
st.header("NER Results")
base, custom = st.columns(2)
 
# Base SpaCy Model
with base:
    base_model = spacy_streamlit.load_model("en_core_web_lg")
    doc_base = base_model(default_text)
    ner_labels = ["ORG","PERSON","GPE"]
    show_table = True
    title = "Base SpaCy Model"
    sidebar_title = "Base SpaCy Model"
    spacy_streamlit.visualize_ner(doc_base,
                                  labels=ner_labels,
                                  show_table=show_table,
                                  title=title,
                                  key = 0)

# Run custom SpaCy model using Streamlit to display output.
with custom:
    # Uses prebuilt NER model, to use custom model, load in at the path /content/drive/MyDrive/NLP Book Notes/ner-last-model/models/ag_dataset/ner/ner-gpu-blank/model-last
    custom_ner_model = spacy_streamlit.load_model(
    '/content/drive/MyDrive/Applied-NLP-in-the-Enterprise/models/ag_dataset/ner/ner-gpu-blank/model-best')
    doc_custom = custom_ner_model(default_text)
    ner_labels = ["ORG","PERSON","GPE","TICKER"]
    show_table = True
    title = "Custom SpaCy Model"
    sidebar_title = "Custom SpaCy Model"
    spacy_streamlit.visualize_ner(doc_custom,
                                  labels=ner_labels, 
                                  show_table=show_table,
                                  title=title,
                                  key = 1)

Overwriting ner_app_agnews.py


In [None]:
# Import libraries
'''Main Libraries'''
import numpy as np
import pandas as pd
 
# Install spacy (using cuda 112 since colab now uses cuda 11.2)
!pip install -U spacy[cuda112, transformers]
!pip install -U spacy-lookups-data
!pip install cupy-cuda112
!pip install -U spacy[transformers]

# Download pretrained language model (core model)
!python -m spacy download en_core_web_lg
 
# Install spacy-streamlit
!pip install spacy-streamlit

# Install Streamlit
!pip install streamlit
 
!pip install protobuf==3.20.3

In [None]:
!streamlit run ner_app_agnews.py  & npx localtunnel --port 8501

[K[?25hnpx: installed 22 in 2.124s

Collecting usage statistics. To deactivate, set browser.gatherUsageStats to False.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Network URL: [0m[1mhttp://172.28.0.2:8501[0m
[34m  External URL: [0m[1mhttp://34.90.175.125:8501[0m
[0m
your url is: https://large-ties-help-34-90-175-125.loca.lt
2023-01-12 07:06:15.035 Uncaught app exception
Traceback (most recent call last):
  File "/usr/local/lib/python3.8/dist-packages/streamlit/runtime/scriptrunner/script_runner.py", line 565, in _run_script
    exec(code, module.__dict__)
  File "/content/drive/MyDrive/NLP Book Notes/apps/ner_app_agnews.py", line 90, in <module>
    spacy_streamlit.visualize_ner(doc_custom,
  File "/usr/local/lib/python3.8/dist-packages/spacy_streamlit/visualizer.py", line 250, in visualize_ner
    label_select = exp.multiselect(
  File "/usr/local/lib/python3.8/dist-packages/streamlit/runtime/metrics_util.py", line 311, in wrappe

### Build and Deploy Streamlit App for Custom NER

In [2]:
%%writefile ner_app_custom.py
# Load libraries
import spacy
import streamlit as st
import spacy_streamlit
import numpy as np
import pandas as pd
import random
 
# Set title
st.title(':star: Custom NER')
st.header("Custom Text")
default_text = st.text_area("Enter text to analyze.",
                            height=250)
 
# Show NER Results
st.header("NER Results")
base, custom = st.columns(2)
st.write("GPE - GeoPolitical Entity, ORG - ORGanization, PERSON - Person")
 
# Base SpaCy Model
with base:
    base_model = spacy_streamlit.load_model("en_core_web_lg")
    doc_base = base_model(default_text)
    ner_labels = ["ORG","PERSON","GPE"]
    show_table = True
    title = "Base SpaCy Model"
    sidebar_title = "Base SpaCy Model"
    spacy_streamlit.visualize_ner(doc_base,
                                  labels=ner_labels,
                                  show_table=show_table,
                                  title=title,
                                  key = 0)

# Custom SpaCy Model
with custom:
    custom_ner_model = spacy_streamlit.load_model(
    '/content/drive/MyDrive/Applied-NLP-in-the-Enterprise/models/ag_dataset/ner/ner-gpu-blank/model-best')
    doc_custom = custom_ner_model(default_text)
    ner_labels = ["ORG","PERSON","GPE","TICKER"]
    show_table = True
    title = "Custom SpaCy Model"
    sidebar_title = "Custom SpaCy Model"
    spacy_streamlit.visualize_ner(doc_custom,
                                  labels=ner_labels,
                                  show_table=show_table,
                                  title=title,
                                  key = 1)

Overwriting ner_app_custom.py


In [None]:
# Import libraries
'''Main Libraries'''
import numpy as np
import pandas as pd
 
# Install spacy (using cuda 112 since colab now uses cuda 11.2)
!pip install -U spacy[cuda112]
!pip install -U spacy-lookups-data
!pip install cupy-cuda112
!pip install -U spacy[transformers]
 
# Download pretrained language model (core model)
!python -m spacy download en_core_web_lg
 
# Install spacy-streamlit
!pip install spacy-streamlit

# Install Streamlit
!pip install streamlit
 
!pip install protobuf==3.20.3

In [None]:
%cd '/content/drive/My Drive/NLP Book Notes/apps'
!streamlit run ner_app_custom.py  & npx localtunnel --port 8501

### Build and Deploy Streamlit App for Text Classification on AGNews Dataset

In [None]:
# NOT UPDATED FOR USE

%%writefile textcat_app_agnews.py
# Load libraries
import spacy
import streamlit as st
import spacy_streamlit
import numpy as np
import pandas as pd
import random
 
# Set title
st.title(':star: AG News Dataset')
 
# Define function to read data
@st.cache
def read_data(file):
    read_path = '/content/drive/My Drive/Python/NLP Book Notes'
    data = pd.read_csv(read_path+file)
    return data
 
# Read data
data = read_data('/data/ag_dataset/prepared/train_prepared.csv')
 
# Define function to select category
@st.cache
def select_category(data, category_option):
    return data.loc[data.class_name==category_option]
 
# Set up sidebar widgets
st.sidebar.header("Parameters")
display_selections = st.sidebar.multiselect(
 "Which data would you like to display?",
 ["Full Data","Single Article"], None)
 
category_option = st.sidebar.radio(
 'Which category would you like to explore?',
 data.class_name.unique())
 
article_option = st.sidebar.number_input(
 'Which article would you like to explore?',
 data.loc[data.class_name==category_option].index.min(),
 data.loc[data.class_name==category_option].index.max(),
 data.loc[data.class_name==category_option].index.min())
 
# Set display behavior for data
if "Full Data" in display_selections:
    st.header("Full Data")
    st.write(select_category(data,category_option)
             .loc[:,["title","description"]],
             width=1980, height=200)
 

In [None]:
if "Single Article" in display_selections:
    st.header("Single Article")
    st.subheader("Title")
    st.write(data.loc[article_option,"title"])
    st.subheader("Description")
    st.write(data.loc[article_option,"description"])

# Set text
default_text = data.loc[article_option,"description"]
 
# Custom SpaCy Model
custom_model = spacy_streamlit.load_model(\
'/content/drive/My Drive/Python/NLP Book Notes/\
models/ag_dataset/textcat-prodigy-V3-base-full/model-best')
doc= custom_model(default_text)
title = "Text Classification"
spacy_streamlit.visualize_textcat(doc, title=title)
prediction = max(doc.cats, key=lambda key: doc.cats[key])
confidence = str(np.round(doc.cats[prediction],2))
st.header("Prediction: " + prediction)
st.subheader("Confidence: " + confidence)

In [None]:
# Deploy Text Classification App for AGNews
deploy('/content/drive/"My Drive"/NLP Book Notes/\
 apps/textcat_app_agnews.py')

### Build and Deploy Streamlit App for Text Classification on Custom Text

In [None]:
# NOT UPDATED FOR USE

%%writefile textcat_app_custom.py
# Load libraries
import spacy
import streamlit as st
import spacy_streamlit
import numpy as np
import pandas as pd
import random
 
# Set title
st.header(":star: Text Classification")
default_text = st.text_area("Enter text to analyze.")
 
# Custom SpaCy Model
custom_model = spacy_streamlit.load_model(\
'/content/drive/My Drive/Python/NLP Book Notes/\
models/ag_dataset/textcat-prodigy-V3-base-full/model-best')
doc= custom_model(default_text)
title = "Text"
spacy_streamlit.visualize_textcat(doc, title=title)
prediction = max(doc.cats, key=lambda key: doc.cats[key])
confidence = str(np.round(doc.cats[prediction],2))
st.header("Prediction: " + prediction)
st.subheader("Confidence: " + confidence)