# Setup the ABSA Demo

### Step 1 - Install aditional pip packages on your Compute instance

In [None]:
!pip install git+https://github.com/hnky/nlp-architect.git@absa

In [None]:
!pip install spacy==2.1.8

### Step 2 - Download Notebooks, Training Data, Training / Inference scripts

In [None]:
import azureml
from azureml.core import Workspace, Datastore, Experiment, Environment, Model
import urllib.request
from pathlib import Path

In [None]:
# This will open an device login prompt. Login with your credentials that have access to the workspace.

# Connect to the workspace
ws = Workspace.from_config()
print("Using workspace:",ws.name,"in region", ws.location)

# Connect to the default datastore
ds = ws.get_default_datastore()
print("Datastore:",ds.name)

In [None]:
# Create directories
Path("dataset").mkdir(parents=True, exist_ok=True)
Path("notebooks").mkdir(parents=True, exist_ok=True)
Path("scripts").mkdir(parents=True, exist_ok=True)
Path("temp").mkdir(parents=True, exist_ok=True)

The cell below will take some time to run as it is downloading a large dataset plus code files. Please allow around 10-15 mins

In [None]:
# Download all files needed
base_link = "https://raw.githubusercontent.com/microsoft/ignite-learning-paths-training-aiml/main/aiml40/absa/"

# Download Data 
if not Path("dataset/glove.840B.300d.zip").is_file():
    urllib.request.urlretrieve('http://nlp.stanford.edu/data/glove.840B.300d.zip', 'dataset/glove.840B.300d.zip')

urllib.request.urlretrieve(base_link+'../dataset/clothing_absa_train.csv', 'dataset/clothing_absa_train.csv')
urllib.request.urlretrieve(base_link+'../dataset/clothing-absa-validation.json', 'dataset/clothing-absa-validation.json')
urllib.request.urlretrieve(base_link+'../dataset/clothing_absa_train_small.csv', 'dataset/clothing_absa_train_small.csv')

# Download Notebooks
urllib.request.urlretrieve(base_link+'notebooks/absa-hyperdrive.ipynb', 'notebooks/absa-hyperdrive.ipynb')
urllib.request.urlretrieve(base_link+'notebooks/absa.ipynb', 'notebooks/absa.ipynb')

# Download Scripts 
urllib.request.urlretrieve(base_link+'scripts/score.py', 'scripts/score.py')
urllib.request.urlretrieve(base_link+'scripts/train.py', 'scripts/train.py')


In [None]:
# Upload data to the data store
ds.upload('dataset', target_path='clothing_data', overwrite=False, show_progress=True)

In [None]:
### Step 3 - Setup AMLS
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

cluster_name = "absa-cluster"

try:
    cluster = ComputeTarget(workspace=ws, name=cluster_name)
    print('Using compute cluster:', cluster_name)
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D3_V2',
                                                           vm_priority='lowpriority',
                                                           min_nodes=0,
                                                           max_nodes=8)
    cluster = ComputeTarget.create(ws, cluster_name, compute_config)
    cluster.wait_for_completion(show_output=True)
