diff --git a/.github/kubernetes-compute/convert.py b/.github/kubernetes-compute/convert.py deleted file mode 100644 index e9cd6dd24c..0000000000 --- a/.github/kubernetes-compute/convert.py +++ /dev/null @@ -1,107 +0,0 @@ -import argparse -import yaml -import os - - -def convert(input_file, compute_target, instance_type, common_runtime, output_file): - def _convert(input_file, data, job_schema): - # check job type - is_pipeline_job = False - is_sweep_job = False - if "pipelineJob" in job_schema or "jobs" in data: - is_pipeline_job = True - if "sweepJob" in job_schema or data.get("type") == "sweep": - is_sweep_job = True - - print("Job type: pipelineJob", is_pipeline_job, "sweepJob:", is_sweep_job) - - # change compute target - if compute_target: - data["compute"] = "azureml:%s" % compute_target - if is_pipeline_job: - settings = data.get("settings", {}) - settings["default_compute"] = "azureml:%s" % compute_target - data["settings"] = settings - - # set instance type - if not is_pipeline_job and instance_type: - resources = data.get("resources", {}) - resources["instance_type"] = instance_type - data["resources"] = resources - - for field in ["trial", "component"]: - if field not in data: - continue - - file_field = data[field] - if not isinstance(file_field, str): - continue - - if file_field.startswith("file:"): - file_field = file_field.split(":", 1)[1] - - print("Found sub job spec:", file_field) - dirname = os.path.dirname(input_file) - convert( - os.path.join(dirname, file_field), - compute_target, - instance_type, - common_runtime, - "", - ) - - if is_pipeline_job: - jobs = data.get("jobs", {}) - for step in jobs: - print("Found step:", step) - _convert(input_file, jobs[step], "") - return - - print("Processing file:", input_file) - if not os.path.exists(input_file): - print("Warning: File doesn't exist: ", input_file) - return - with open(input_file, "r") as f: - data = yaml.load(f, Loader=yaml.FullLoader) - job_schema = data.get("$schema", "") - _convert(input_file, data, job_schema) - - # write to output file if output file is specified, otherwise change inplace. - if output_file: - with open(output_file, "w") as f: - yaml.dump(data, f) - else: - with open(input_file, "w") as f: - yaml.dump(data, f) - - -if __name__ == "__main__": - # Parse command line arguments - parser = argparse.ArgumentParser( - description="Convert test case to AMLARC-compatible files." - ) - parser.add_argument("-i", "--input", required=True, help="Input test case file") - parser.add_argument( - "-o", - "--output", - required=False, - help="Output AMLARC-compatible file, if not provides, " "replace file inplace", - ) - parser.add_argument("-c", "--compute-target", required=False, help="Compute target") - parser.add_argument("-it", "--instance-type", required=False, help="Instance type") - parser.add_argument( - "-cr", - "--common-runtime", - required=False, - default=False, - action="store_true", - help='Enable common runtime explicitly, default is "false"', - ) - args = parser.parse_args() - convert( - args.input, - args.compute_target, - args.instance_type, - args.common_runtime, - args.output, - ) diff --git a/.github/kubernetes-compute/papermill_count_failed_case.py b/.github/kubernetes-compute/papermill_count_failed_case.py deleted file mode 100644 index 2e1f4f263b..0000000000 --- a/.github/kubernetes-compute/papermill_count_failed_case.py +++ /dev/null @@ -1,48 +0,0 @@ -import argparse -import json -import os - - -def check_test_case(input_file): - error_count = 0 - error_list = [] - not_run_list = [] - - with open(input_file) as f: - files = f.readlines() - for file in files: - file = file.replace("\n", "") - if ".ipynb" in file: - file = file.replace(".ipynb", ".output.ipynb") - if not os.path.isfile(file): - not_run_list.append(file) - continue - with open(file) as output_file: - output_file_obj = json.load(output_file) - if ( - "An Exception was encountered at" - in output_file_obj["cells"][0]["source"][0] - ): - error_count += 1 - error_list.append(file) - - if error_count != 0: - for err in error_list: - print(err) - - if len(not_run_list) > 0: - print("\nThese test case are skipped") - for not_run in not_run_list: - print(not_run) - - raise Exception("Error occurs in these test cases") - - -if __name__ == "__main__": - # Parse command line arguments - parser = argparse.ArgumentParser(description="Check all papermill output files.") - parser.add_argument("-i", "--input", required=True, help="job ipynb file list") - - args = parser.parse_args() - - check_test_case(args.input) diff --git a/.github/kubernetes-compute/sdk_testcase_collector.py b/.github/kubernetes-compute/sdk_testcase_collector.py deleted file mode 100644 index c077c9055a..0000000000 --- a/.github/kubernetes-compute/sdk_testcase_collector.py +++ /dev/null @@ -1,42 +0,0 @@ -import argparse -import pathlib -import yaml -import re - - -def collect_test_cases(output_file, regex): - root_dir = ".github/workflows" - root = pathlib.Path(root_dir) - - testcases = [] - for item in root.iterdir(): - testcase_filename = str(item).split("/")[-1] - # print(testcase_filename) - if re.match(regex, testcase_filename) is not None: - print(testcase_filename) - # testcases.append(testcase_filename) - yaml_stream = open(item) - yaml_obj = yaml.load(yaml_stream, Loader=yaml.Loader) - for step in yaml_obj["jobs"]["build"]["steps"]: - if ".ipynb" in step["name"]: - work_dir = step["working-directory"] - notebook_name = step["name"].split("/")[-1] - testcases.append(f"{work_dir}/{notebook_name}\n") - - with open(output_file, "w") as f: - f.writelines(testcases) - - -if __name__ == "__main__": - # Parse command line arguments - parser = argparse.ArgumentParser( - description="Collect all sdk test case with a regex." - ) - parser.add_argument("-r", "--regex", required=True, help="test case name selector") - parser.add_argument( - "-o", "--output", required=False, help="the file selected test case send to" - ) - - args = parser.parse_args() - - collect_test_cases(args.output, args.regex) diff --git a/.github/kubernetes-compute/tool.sh b/.github/kubernetes-compute/tool.sh deleted file mode 100644 index 86b9ec885b..0000000000 --- a/.github/kubernetes-compute/tool.sh +++ /dev/null @@ -1,793 +0,0 @@ -## This script provides functions to facilitate cluster setup and job testing on Arc Enabled ML compute -set -x - -# Global variables -export SCRIPT_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) -export LOCK_FILE=${SCRIPT_DIR}/"$(basename ${BASH_SOURCE[0]})".lock -export RESULT_FILE=${SCRIPT_DIR}/kubernetes-compute-test-result.txt -export MAX_RETRIES=60 -export SLEEP_SECONDS=20 - -# Resource group -export SUBSCRIPTION="${SUBSCRIPTION:-subscription}" -export RESOURCE_GROUP="${RESOURCE_GROUP:-amlarc-examples-rg}" -export LOCATION="${LOCATION:-eastus}" - -# AKS -export AKS_CLUSTER_PREFIX="${AKS_CLUSTER_PREFIX:-amlarc-aks}" -export VM_SKU="${VM_SKU:-Standard_D4s_v3}" -export MIN_COUNT="${MIN_COUNT:-3}" -export MAX_COUNT="${MAX_COUNT:-8}" -export AKS_CLUSTER_NAME=${AKS_CLUSTER_NAME:-$(echo ${AKS_CLUSTER_PREFIX}-${VM_SKU} | tr -d '_')} -export AKS_LOCATION="${AKS_LOCATION:-$LOCATION}" -export AKS_RESOURCE_ID="/subscriptions/$SUBSCRIPTION/resourceGroups/$RESOURCE_GROUP/providers/Microsoft.ContainerService/managedClusters/$AKS_CLUSTER_NAME" - -# ARC -export ARC_CLUSTER_PREFIX="${ARC_CLUSTER_PREFIX:-amlarc-arc}" -export ARC_CLUSTER_NAME=${ARC_CLUSTER_NAME:-$(echo ${ARC_CLUSTER_PREFIX}-${VM_SKU} | tr -d '_')} -export ARC_LOCATION="${ARC_LOCATION:-$LOCATION}" -export ARC_RESOURCE_ID="/subscriptions/$SUBSCRIPTION/resourceGroups/$RESOURCE_GROUP/providers/Microsoft.Kubernetes/ConnectedClusters/$ARC_CLUSTER_NAME" - -# Extension -export RELEASE_TRAIN="${RELEASE_TRAIN:-staging}" -export RELEASE_NAMESPACE="${RELEASE_NAMESPACE:-azureml}" -export EXTENSION_NAME="${EXTENSION_NAME:-amlarc-extension}" -export EXTENSION_TYPE="${EXTENSION_TYPE:-Microsoft.AzureML.Kubernetes}" -export EXTENSION_SETTINGS="${EXTENSION_SETTINGS:-enableTraining=True enableInference=True allowInsecureConnections=True inferenceRouterServiceType=loadBalancer}" -export CLUSTER_TYPE="${CLUSTER_TYPE:-connectedClusters}" # or managedClusters -if [ "${CLUSTER_TYPE}" == "connectedClusters" ]; then - export CLUSTER_NAME=${CLUSTER_NAME:-$ARC_CLUSTER_NAME} - export RESOURCE_ID=${RESOURCE_ID:-$ARC_RESOURCE_ID} -else - # managedClusters - export CLUSTER_NAME=${CLUSTER_NAME:-$AKS_CLUSTER_NAME} - export RESOURCE_ID=${RESOURCE_ID:-$AKS_RESOURCE_ID} -fi - -# Workspace and Compute -export WORKSPACE="${WORKSPACE:-amlarc-githubtest-ws}" # $((1 + $RANDOM % 100)) -export COMPUTE="${COMPUTE:-githubtest}" -export INSTANCE_TYPE_NAME="${INSTANCE_TYPE_NAME:-defaultinstancetype}" -export CPU="${CPU:-1}" -export MEMORY="${MEMORY:-4Gi}" -export GPU="${GPU:-null}" - -refresh_lock_file(){ - rm -f $LOCK_FILE - echo $(date) > $LOCK_FILE -} - -remove_lock_file(){ - rm -f $LOCK_FILE -} - -check_lock_file(){ - if [ -f $LOCK_FILE ]; then - echo true - return 0 - else - echo false - return 1 - fi -} - -set_default_env(){ - echo "SUBSCRIPTION=6560575d-fa06-4e7d-95fb-f962e74efd7a" | tee -a $GITHUB_ENV - echo "RESOURCE_GROUP=azureml-examples" | tee -a $GITHUB_ENV - echo "WORKSPACE=amlarc-githubtest-ws" | tee -a $GITHUB_ENV - echo "LOCATION=eastus" | tee -a $GITHUB_ENV - echo "FILE_TICKET=true" | tee -a $GITHUB_ENV - echo "KEY_VAULT_NAME=amlarcgithubworkflowkv" | tee -a $GITHUB_ENV - echo "REPOSITORY=https://github.com/Azure/azureml-examples" | tee -a $GITHUB_ENV -} - -install_tools(){ - - az upgrade --all --yes - az extension add -n connectedk8s --yes - az extension add -n k8s-extension --yes - az extension add -n ml --yes - - curl -LO https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl \ - && chmod +x ./kubectl \ - && sudo mv ./kubectl /usr/local/bin/kubectl - - pip install azureml-core - pip install shyaml - - pip list || true - az version || true -} - -register_provider(){ - - # For aks - az provider register --namespace Microsoft.ContainerService - - # For arc - az provider register -n 'Microsoft.Kubernetes' - - # For amlarc extension - az provider register --namespace Microsoft.Relay - az provider register --namespace Microsoft.KubernetesConfiguration - az provider register --namespace Microsoft.ContainerService - az feature register --namespace Microsoft.ContainerService -n AKS-ExtensionManager - - # For workspace - az provider register --namespace Microsoft.Storage - -} - -# setup RG -setup_resource_group(){ - # create resource group - az group show \ - --subscription $SUBSCRIPTION \ - -n "$RESOURCE_GROUP" || \ - az group create \ - --subscription $SUBSCRIPTION \ - -l "$LOCATION" \ - -n "$RESOURCE_GROUP" -} - -# setup AKS -setup_aks(){ - # create aks cluster - az aks show \ - --subscription $SUBSCRIPTION \ - --resource-group $RESOURCE_GROUP \ - --name $AKS_CLUSTER_NAME || \ - az aks create \ - --subscription $SUBSCRIPTION \ - --resource-group $RESOURCE_GROUP \ - --location $AKS_LOCATION \ - --name $AKS_CLUSTER_NAME \ - --enable-cluster-autoscaler \ - --node-count $MIN_COUNT \ - --min-count $MIN_COUNT \ - --max-count $MAX_COUNT \ - --node-vm-size ${VM_SKU} \ - --no-ssh-key \ - $@ - - check_aks_status - -} - -check_aks_status(){ - for i in $(seq 1 $MAX_RETRIES); do - provisioningState=$(az aks show \ - --subscription $SUBSCRIPTION \ - --resource-group $RESOURCE_GROUP \ - --name $AKS_CLUSTER_NAME \ - --query provisioningState -o tsv) - echo "provisioningState: $provisioningState" - if [[ $provisioningState != "Succeeded" ]]; then - sleep ${SLEEP_SECONDS} - else - break - fi - done - - [[ $provisioningState == "Succeeded" ]] -} - -get_kubeconfig(){ - az aks get-credentials \ - --subscription $SUBSCRIPTION \ - --resource-group $RESOURCE_GROUP \ - --name $AKS_CLUSTER_NAME \ - --overwrite-existing -} - -# connect cluster to ARC -connect_arc(){ - # get aks kubeconfig - get_kubeconfig - - # attach cluster to Arc - az connectedk8s show \ - --subscription $SUBSCRIPTION \ - --resource-group $RESOURCE_GROUP \ - --name $ARC_CLUSTER_NAME || \ - az connectedk8s connect \ - --subscription $SUBSCRIPTION \ - --resource-group $RESOURCE_GROUP \ - --location $ARC_LOCATION \ - --name $ARC_CLUSTER_NAME --no-wait \ - $@ - - check_arc_status -} - -check_arc_status(){ - for i in $(seq 1 $MAX_RETRIES); do - connectivityStatus=$(az connectedk8s show \ - --subscription $SUBSCRIPTION \ - --resource-group $RESOURCE_GROUP \ - --name $ARC_CLUSTER_NAME \ - --query connectivityStatus -o tsv) - echo "connectivityStatus: $connectivityStatus" - if [[ $connectivityStatus != "Connected" ]]; then - sleep ${SLEEP_SECONDS} - else - break - fi - done - - [[ $connectivityStatus == "Connected" ]] -} - -# install extension -install_extension(){ - REINSTALL_EXTENSION="${REINSTALL_EXTENSION:-true}" - - if [[ $REINSTALL_EXTENSION == "true" ]]; then - # remove extension if exists to avoid missing the major version upgrade. - az k8s-extension delete \ - --cluster-name $CLUSTER_NAME \ - --cluster-type $CLUSTER_TYPE \ - --subscription $SUBSCRIPTION \ - --resource-group $RESOURCE_GROUP \ - --name $EXTENSION_NAME \ - --yes || true - - # install extension - az k8s-extension create \ - --cluster-name $CLUSTER_NAME \ - --cluster-type $CLUSTER_TYPE \ - --subscription $SUBSCRIPTION \ - --resource-group $RESOURCE_GROUP \ - --name $EXTENSION_NAME \ - --extension-type $EXTENSION_TYPE \ - --scope cluster \ - --release-train $RELEASE_TRAIN \ - --configuration-settings $EXTENSION_SETTINGS \ - --no-wait \ - $@ - else - az k8s-extension show \ - --cluster-name $CLUSTER_NAME \ - --cluster-type $CLUSTER_TYPE \ - --subscription $SUBSCRIPTION \ - --resource-group $RESOURCE_GROUP \ - --name $EXTENSION_NAME || \ - az k8s-extension create \ - --cluster-name $CLUSTER_NAME \ - --cluster-type $CLUSTER_TYPE \ - --subscription $SUBSCRIPTION \ - --resource-group $RESOURCE_GROUP \ - --name $EXTENSION_NAME \ - --extension-type $EXTENSION_TYPE \ - --scope cluster \ - --release-train $RELEASE_TRAIN \ - --configuration-settings $EXTENSION_SETTINGS \ - --no-wait \ - $@ - fi - - check_extension_status -} - -check_extension_status(){ - for i in $(seq 1 $MAX_RETRIES); do - provisioningState=$(az k8s-extension show \ - --cluster-name $CLUSTER_NAME \ - --cluster-type $CLUSTER_TYPE \ - --subscription $SUBSCRIPTION \ - --resource-group $RESOURCE_GROUP \ - --name $EXTENSION_NAME \ - --query provisioningState -o tsv) - echo "provisioningState: $provisioningState" - if [[ $provisioningState != "Succeeded" ]]; then - sleep ${SLEEP_SECONDS} - else - break - fi - done - - [[ $provisioningState == "Succeeded" ]] -} - -# setup workspace -setup_workspace(){ - - az ml workspace show \ - --subscription $SUBSCRIPTION \ - --resource-group $RESOURCE_GROUP \ - --name $WORKSPACE || \ - az ml workspace create \ - --subscription $SUBSCRIPTION \ - --resource-group $RESOURCE_GROUP \ - --location $LOCATION \ - --name $WORKSPACE \ - $@ - - az ml workspace update \ - --subscription $SUBSCRIPTION \ - --resource-group $RESOURCE_GROUP \ - --name $WORKSPACE \ - --public-network-access Enabled - -} - -# setup compute -setup_compute(){ - - COMPUTE_NS=${COMPUTE_NS:-default} - - az ml compute attach \ - --subscription $SUBSCRIPTION \ - --resource-group $RESOURCE_GROUP \ - --workspace-name $WORKSPACE \ - --type Kubernetes \ - --resource-id "$RESOURCE_ID" \ - --namespace "$COMPUTE_NS" \ - --name $COMPUTE \ - $@ - -} - -setup_instance_type(){ - INSTANCE_TYPE_NAME="${1:-$INSTANCE_TYPE_NAME}" - CPU="${2:-$CPU}" - MEMORY="${3:-$MEMORY}" - GPU="${4:-$GPU}" - - cat < .azureml/config.json -{ - "subscription_id": "$SUBSCRIPTION", - "resource_group": "$RESOURCE_GROUP", - "workspace_name": "$WORKSPACE" -} -EOF -} - -install_jupyter_dependency(){ - pip install jupyter - pip install notebook - ipython kernel install --name "amlarc" --user - pip install matplotlib numpy scikit-learn==0.22.1 numpy joblib glob2 - pip install azureml.core - pip install azure.cli.core - pip install azureml.opendatasets - pip install azureml.widgets - pip list || true -} - -# run jupyter test -run_jupyter_test(){ - JOB_SPEC="${1:-examples/training/simple-train-sdk/img-classification-training.ipynb}" - JOB_DIR=$(dirname $JOB_SPEC) - JOB_FILE=$(basename $JOB_SPEC) - - echo "[JobSubmission] $JOB_SPEC" | tee -a $RESULT_FILE - - cd $JOB_DIR - jupyter nbconvert --debug --execute $JOB_FILE --to python - status=$? - cd - - - echo $status - if [[ "$status" == "0" ]] - then - echo "[JobStatus] $JOB_SPEC ${JOB_STATUS_COMPLETED}" | tee -a $RESULT_FILE - else - echo "[JobStatus] $JOB_SPEC ${JOB_STATUS_FAILED}" | tee -a $RESULT_FILE - return 1 - fi -} - -# run python test -run_py_test(){ - JOB_SPEC="${1:-python-sdk/workflows/train/fastai/mnist/job.py}" - JOB_DIR=$(dirname $JOB_SPEC) - JOB_FILE=$(basename $JOB_SPEC) - - echo "[JobSubmission] $JOB_SPEC" | tee -a $RESULT_FILE - - cd $JOB_DIR - python $JOB_FILE - status=$? - cd - - - echo $status - if [[ "$status" == "0" ]] - then - echo "[JobStatus] $JOB_SPEC ${JOB_STATUS_COMPLETED}" | tee -a $RESULT_FILE - else - echo "[JobStatus] $JOB_SPEC ${JOB_STATUS_FAILED}" | tee -a $RESULT_FILE - return 1 - fi -} - -# count result -count_result(){ - - MIN_SUCCESS_NUM=${MIN_SUCCESS_NUM:--1} - - [ ! -f $RESULT_FILE ] && touch $RESULT_FILE - - echo "RESULT:" - cat $RESULT_FILE - - total=$(grep -c "\[JobSubmission\]" $RESULT_FILE) - success=$(grep "\[JobStatus\]" $RESULT_FILE | grep -ic ${JOB_STATUS_COMPLETED}) - unhealthy=$(( $total - $success )) - - echo "Total: ${total}, Success: ${success}, Unhealthy: ${unhealthy}, MinSuccessNum: ${MIN_SUCCESS_NUM}." - - if (( 10#${unhealthy} > 0 )) ; then - echo "There are $unhealthy unhealthy jobs." - echo "Unhealthy jobs:" - grep "\[JobStatus\]" $RESULT_FILE | grep -iv ${JOB_STATUS_COMPLETED} - return 1 - fi - - if (( 10#${MIN_SUCCESS_NUM} > 10#${success} )) ; then - echo "There should be at least ${MIN_SUCCESS_NUM} success jobs. Found ${success} success jobs." - return 1 - fi - - echo "All tests passed." -} - - -######################################## -## -## Upload metrics funcs -## -######################################## -export CERT_PATH=$(pwd)/certs -export CONTAINER_NAME=amltestmdmcontinaer -export STATSD_PORT=38125 -export REPOSITORY="${REPOSITORY:-Repository}" -export WORKFLOW="${WORKFLOW:-Workflow}" -export REPEAT="${REPEAT:-5}" - -report_metrics(){ - # download metrics dependency - install_mdm_dependency - - # download certificates - export METRIC_ENDPOINT_NAME=METRIC-ENDPOINT-PROD - export MDM_ACCOUNT_NAME=MDM-ACCOUNT-PROD - export MDM_NAMESPACE_NAME=MDM-NAMESPACE-PROD - export KEY_PEM_NAME=AMLARC-KEY-PEM - export CERT_PEM_NAME=AMLARC-CERT-PEM - bash .github/kubernetes-compute/tool.sh download_metrics_info - - # start mdm - bash .github/kubernetes-compute/tool.sh start_mdm_container - - # upload metrics - $@ - - # stop mdm - stop_mdm_container - - rm -f metric_endpoint.txt - rm -f mdm_account.txt - rm -f mdm_namespace.txt - rm -f $CERT_PATH/key.pem - rm -f $CERT_PATH/cert.pem -} - -install_mdm_dependency(){ - sudo apt install socat -} - -download_metrics_info(){ - KEY_VAULT_NAME=${KEY_VAULT_NAME:-kvname} - METRIC_ENDPOINT_NAME=${METRIC_ENDPOINT_NAME:-METRIC-ENDPOINT} - MDM_ACCOUNT_NAME=${MDM_ACCOUNT_NAME:-MDM-ACCOUNT} - MDM_NAMESPACE_NAME=${MDM_NAMESPACE_NAME:-MDM-NAMESPACE} - KEY_PEM_NAME=${KEY_PEM_NAME:-KEY-PEM} - CERT_PEM_NAME=${CERT_PEM_NAME:-CERT-PEM} - - mkdir -p $CERT_PATH - - az keyvault secret download --vault-name $KEY_VAULT_NAME --name $METRIC_ENDPOINT_NAME -f metric_endpoint.txt - az keyvault secret download --vault-name $KEY_VAULT_NAME --name $MDM_ACCOUNT_NAME -f mdm_account.txt - az keyvault secret download --vault-name $KEY_VAULT_NAME --name $MDM_NAMESPACE_NAME -f mdm_namespace.txt - az keyvault secret download --vault-name $KEY_VAULT_NAME --name $KEY_PEM_NAME -f $CERT_PATH/key.pem - az keyvault secret download --vault-name $KEY_VAULT_NAME --name $CERT_PEM_NAME -f $CERT_PATH/cert.pem -} - -start_mdm_container(){ - - METRIC_ENDPOINT="${METRIC_ENDPOINT:-$(cat metric_endpoint.txt)}" - MDM_ACCOUNT="${MDM_ACCOUNT:-$(cat mdm_account.txt )}" - MDM_NAMESPACE="${MDM_NAMESPACE:-$(cat mdm_namespace.txt)}" - - METRIC_ENDPOINT_ARG="-e METRIC_ENDPOINT=${METRIC_ENDPOINT}" - if [ "$METRIC_ENDPOINT" = "METRIC-ENDPOINT-PROD" ]; then - METRIC_ENDPOINT_ARG="" - fi - - docker run -d \ - --name=$CONTAINER_NAME \ - -v ${CERT_PATH}:/certs \ - --net=host --uts=host \ - -e MDM_ACCOUNT=${MDM_ACCOUNT} \ - -e MDM_NAMESPACE=${MDM_NAMESPACE} \ - -e MDM_INPUT=statsd_udp \ - -e STATSD_PORT=${STATSD_PORT} \ - -e MDM_LOG_LEVEL=Debug \ - -e CERT_FILE=/certs/cert.pem \ - -e KEY_FILE=/certs/key.pem \ - linuxgeneva-microsoft.azurecr.io/genevamdm \ - $METRIC_ENDPOINT_ARG - - show_mdm_container -} - -show_mdm_container(){ - docker ps -a \ - --format "table {{.ID}}\t{{.Names}}\t{{.Networks}}\t{{.State}}\t{{.CreatedAt}}\t{{.Image}}" \ - -f name=$CONTAINER_NAME -} - -stop_mdm_container(){ - show_mdm_container - docker stop $CONTAINER_NAME - docker rm -f $CONTAINER_NAME - show_mdm_container -} - -upload_cluster_setup_metrics(){ - MDM_ACCOUNT="${MDM_ACCOUNT:-$(cat mdm_account.txt )}" - MDM_NAMESPACE="${MDM_NAMESPACE:-$(cat mdm_namespace.txt)}" - METRIC_NAME="${METRIC_NAME:-GithubWorkflowClusterSetup}" - VALUE="${VALUE:-1}" - - for i in $(seq 1 $REPEAT); do - echo '{"Account":"'${MDM_ACCOUNT}'","Namespace":"'${MDM_NAMESPACE}'","Metric":"'${METRIC_NAME}'", "Dims": { "Repository":"'${REPOSITORY}'", "Workflow":"'${WORKFLOW}'"}}:'${VALUE}'|g' | socat -t 1 - UDP-SENDTO:127.0.0.1:${STATSD_PORT} - sleep 60 - done - -} - -upload_test_result_metrics(){ - MDM_ACCOUNT="${MDM_ACCOUNT:-$(cat mdm_account.txt )}" - MDM_NAMESPACE="${MDM_NAMESPACE:-$(cat mdm_namespace.txt)}" - METRIC_HEARTBEAT_NAME="${METRIC_HEARTBEAT_NAME:-GithubWorkflowHeartBeat}" - METRIC_NAME="${METRIC_NAME:-GithubWorkflowTestResult}" - - jobs=$(grep "\[JobSubmission\]" $RESULT_FILE) - echo "Found $(echo "$jobs"| wc -l) jobs" - - for i in $(seq 1 $REPEAT); do - # Report heartbeat - VALUE=100 - echo '{"Account":"'${MDM_ACCOUNT}'","Namespace":"'${MDM_NAMESPACE}'","Metric":"'${METRIC_HEARTBEAT_NAME}'", "Dims": { "Repository":"'${REPOSITORY}'", "Workflow":"'${WORKFLOW}'"}}:'${VALUE}'|g' | socat -t 1 - UDP-SENDTO:127.0.0.1:${STATSD_PORT} - - while IFS= read -r job; do - job=$(echo $job| awk '{print $2}') - jobstatus=$(grep "\[JobStatus\]" $RESULT_FILE | grep $job | awk '{print $3}') - echo "Report metrics for job: $job status: $jobstatus" - - VALUE=0 - if [ "${jobstatus}" == "${JOB_STATUS_COMPLETED}" ]; then - VALUE=100 - fi - - # Report test result - echo '{"Account":"'${MDM_ACCOUNT}'","Namespace":"'${MDM_NAMESPACE}'","Metric":"'${METRIC_NAME}'", "Dims": {"Job":"'${job}'", "REPOSITORY":"'${REPOSITORY}'", "Workflow":"'${WORKFLOW}'"}}:'${VALUE}'|g' | socat -t 1 - UDP-SENDTO:127.0.0.1:${STATSD_PORT} - sleep 2 - done <<< $(echo "$jobs") - - sleep 60 - done - -} - - -if [ "$0" = "$BASH_SOURCE" ]; then - $@ -fi diff --git a/.github/workflows/kubernetes-compute-cpu-resources-cleanup.yml b/.github/workflows/kubernetes-compute-cpu-resources-cleanup.yml deleted file mode 100644 index 0e3521c37a..0000000000 --- a/.github/workflows/kubernetes-compute-cpu-resources-cleanup.yml +++ /dev/null @@ -1,124 +0,0 @@ -name: kubernetes-compute-cpu-resources-cleanup -on: - schedule: - - cron: "0 16 * * *" - workflow_dispatch: - inputs: - DELETE_ENDPOINTS: - description: 'Whether to delete endpoints: true or false' - required: true - default: 'false' - CLEANUP_WORKSPACE: - description: 'Whether to delete workspace: true or false' - required: true - default: 'false' - UNINSTALL_EXTENSION: - description: 'Whether to uninstall extension: true or false' - required: true - default: 'true' - CLEANUP_CLUSTER: - description: 'Whether to delete cluster: true or false' - required: true - default: 'false' - WORKSPACE: - description: 'The workspace of the cluster' - required: false - default: 'amlarc-githubtest-ws' - VM_SKU: - description: 'The VM SKU of the cluster' - required: false - default: 'Standard_D4s_v3' - AKS_CLUSTER_PREFIX: - description: 'The prefix of the cluster' - required: false - default: 'amlarc-aks' - -jobs: - test: - runs-on: ubuntu-latest - env: - SUBSCRIPTION: 6560575d-fa06-4e7d-95fb-f962e74efd7a - RESOURCE_GROUP: azureml-examples - WORKSPACE: amlarc-githubtest-ws - VM_SKU: Standard_D4s_v3 - MIN_COUNT: 5 - COMPUTE: "cpu-cluster" - CLUSTER_TYPE: managedClusters - - DELETE_ENDPOINTS: true - CLEANUP_WORKSPACE: false - UNINSTALL_EXTENSION: true - CLEANUP_CLUSTER: false - - steps: - - name: replace env from workflow_dispatch - if: ${{ github.event_name == 'workflow_dispatch' }} - run: | - echo "DELETE_ENDPOINTS=${{ github.event.inputs.DELETE_ENDPOINTS }}" | tee -a $GITHUB_ENV - echo "CLEANUP_WORKSPACE=${{ github.event.inputs.CLEANUP_WORKSPACE }}" | tee -a $GITHUB_ENV - echo "UNINSTALL_EXTENSION=${{ github.event.inputs.UNINSTALL_EXTENSION }}" | tee -a $GITHUB_ENV - echo "CLEANUP_CLUSTER=${{ github.event.inputs.CLEANUP_CLUSTER }}" | tee -a $GITHUB_ENV - echo "WORKSPACE=${{ github.event.inputs.WORKSPACE }}" | tee -a $GITHUB_ENV - echo "VM_SKU=${{ github.event.inputs.VM_SKU }}" | tee -a $GITHUB_ENV - echo "AKS_CLUSTER_PREFIX=${{ github.event.inputs.AKS_CLUSTER_PREFIX }}" | tee -a $GITHUB_ENV - - name: check out repo - uses: actions/checkout@v2 - - name: setup python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - name: install tools - run: bash .github/kubernetes-compute/tool.sh install_tools - timeout-minutes: 30 - - name: azure login - uses: azure/login@v1 - with: - creds: ${{secrets.AZUREML_CREDENTIALS}} - timeout-minutes: 30 - - # cleanup resources - - name: delete_endpoints - if: ${{ always() }} - run: | - if [ "$DELETE_ENDPOINTS" == 'true' ]; then - bash .github/kubernetes-compute/tool.sh delete_endpoints - fi - timeout-minutes: 60 - - name: delete_compute - if: ${{ always() }} - run: | - if [ "$CLEANUP_WORKSPACE" == 'true' ] || [ "$UNINSTALL_EXTENSION" == 'true' ] ; then - bash .github/kubernetes-compute/tool.sh delete_compute || true - fi - timeout-minutes: 60 - - name: delete_workspace - if: ${{ always() }} - run: | - if [ "$CLEANUP_WORKSPACE" == 'true' ]; then - bash .github/kubernetes-compute/tool.sh delete_workspace - fi - timeout-minutes: 60 - - name: delete_extension - if: ${{ always() }} - run: | - if [ "$UNINSTALL_EXTENSION" == 'true' ]; then - bash .github/kubernetes-compute/tool.sh delete_extension - fi - timeout-minutes: 60 - - name: delete_cluster - if: ${{ always() }} - run: | - if [ "$CLEANUP_CLUSTER" == 'true' ]; then - bash .github/kubernetes-compute/tool.sh delete_aks - fi - timeout-minutes: 60 - - - name: cleanup terminating pods - if: ${{ always() }} - run: | - set +e - bash .github/kubernetes-compute/tool.sh get_kubeconfig - for i in $(kubectl get pod | grep Terminating | awk '{print $1}') ; do echo $i ; kubectl get pod $i; kubectl delete pod $i --force; done - timeout-minutes: 60 - - diff --git a/.github/workflows/kubernetes-compute-cpu-resources-setup.yml b/.github/workflows/kubernetes-compute-cpu-resources-setup.yml deleted file mode 100644 index 7f1ec29c3e..0000000000 --- a/.github/workflows/kubernetes-compute-cpu-resources-setup.yml +++ /dev/null @@ -1,127 +0,0 @@ -name: kubernetes-compute-cpu-resources-setup -on: - schedule: - - cron: "0 17 * * *" - workflow_dispatch: - inputs: - TEST_REGION: - description: 'Resource Region' - required: false - default: 'eastus' - RELEASE_TRAIN: - description: 'Release version: experimental, staging or stable' - required: false - default: 'stable' - REINSTALL_EXTENSION: - description: 'Whether to reinstall extension: true or false' - required: false - default: 'false' - FILE_TICKET: - description: 'Whether to file icm ticket: true or false' - required: false - default: 'false' - EXTENSION_VERSION: - description: 'The version of k8s-extension' - required: false - default: '' - WORKSPACE: - description: 'The workspace of the cluster' - required: false - default: 'amlarc-githubtest-ws' - VM_SKU: - description: 'The VM SKU of the cluster' - required: false - default: 'Standard_D4s_v3' - AKS_CLUSTER_PREFIX: - description: 'The prefix of the cluster' - required: false - default: 'amlarc-aks' - CPU_INSTANCE_TYPE: - description: 'cpu instance type' - required: false - default: '2 4Gi' -jobs: - test: - runs-on: ubuntu-latest - env: - SUBSCRIPTION: 6560575d-fa06-4e7d-95fb-f962e74efd7a - RESOURCE_GROUP: azureml-examples - LOCATION: eastus - WORKSPACE: amlarc-githubtest-ws - VM_SKU: Standard_D4s_v3 - MIN_COUNT: 5 - AKS_CLUSTER_PREFIX: amlarc-aks - CLUSTER_TYPE: managedClusters - RELEASE_TRAIN: stable - COMPUTE: "cpu-cluster" - EXTENSION_SETTINGS: "enableTraining=True enableInference=True inferenceRouterServiceType=loadBalancer allowInsecureConnections=True" - REINSTALL_EXTENSION: false - EXTENSION_VERSION: "" - CPU_INSTANCE_TYPE: "2 4Gi" - - FILE_TICKET: true - KEY_VAULT_NAME: amlarcgithubworkflowkv - REPOSITORY: https://github.com/Azure/azureml-examples - WORKFLOW: https://github.com/Azure/azureml-examples/actions/workflows/kubernetes-compute-cpu-resources-setup.yml - - steps: - - name: replace env from workflow_dispatch - if: ${{ github.event_name == 'workflow_dispatch' }} - run: | - echo "LOCATION=${{ github.event.inputs.TEST_REGION }}" | tee -a $GITHUB_ENV - echo "RELEASE_TRAIN=${{ github.event.inputs.RELEASE_TRAIN }}" | tee -a $GITHUB_ENV - echo "REINSTALL_EXTENSION=${{ github.event.inputs.REINSTALL_EXTENSION }}" | tee -a $GITHUB_ENV - echo "FILE_TICKET=${{ github.event.inputs.FILE_TICKET }}" | tee -a $GITHUB_ENV - echo "EXTENSION_VERSION=${{ github.event.inputs.EXTENSION_VERSION }}" | tee -a $GITHUB_ENV - echo "WORKSPACE=${{ github.event.inputs.WORKSPACE }}" | tee -a $GITHUB_ENV - echo "VM_SKU=${{ github.event.inputs.VM_SKU }}" | tee -a $GITHUB_ENV - echo "AKS_CLUSTER_PREFIX=${{ github.event.inputs.AKS_CLUSTER_PREFIX }}" | tee -a $GITHUB_ENV - echo "CPU_INSTANCE_TYPE=${{ github.event.inputs.CPU_INSTANCE_TYPE }}" | tee -a $GITHUB_ENV - - name: check out repo - uses: actions/checkout@v2 - - name: setup python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - name: install tools - run: bash .github/kubernetes-compute/tool.sh install_tools - timeout-minutes: 30 - - name: azure login - uses: azure/login@v1 - with: - creds: ${{secrets.AZUREML_CREDENTIALS}} - timeout-minutes: 30 - - # provision resources - - name: setup_aks - run: bash .github/kubernetes-compute/tool.sh setup_aks - timeout-minutes: 30 - - name: install_extension - run: | - if [ "$EXTENSION_VERSION" == "" ]; then - bash .github/kubernetes-compute/tool.sh install_extension - else - bash .github/kubernetes-compute/tool.sh install_extension --version $EXTENSION_VERSION - fi - timeout-minutes: 30 - - name: setup_workspace - run: bash .github/kubernetes-compute/tool.sh setup_workspace - timeout-minutes: 30 - - name: setup_compute - run: | - bash .github/kubernetes-compute/tool.sh setup_compute - timeout-minutes: 30 - - name: setup_instance_type - run: | - bash .github/kubernetes-compute/tool.sh get_kubeconfig - bash .github/kubernetes-compute/tool.sh setup_instance_type defaultinstancetype $CPU_INSTANCE_TYPE - bash .github/kubernetes-compute/tool.sh setup_instance_type cpu $CPU_INSTANCE_TYPE - timeout-minutes: 30 - - - name: report_metrics - if: ${{ always() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - bash .github/kubernetes-compute/tool.sh report_metrics upload_cluster_setup_metrics - fi - timeout-minutes: 120 \ No newline at end of file diff --git a/.github/workflows/kubernetes-compute-gpu-resources-cleanup.yml b/.github/workflows/kubernetes-compute-gpu-resources-cleanup.yml deleted file mode 100644 index c3f72e7d95..0000000000 --- a/.github/workflows/kubernetes-compute-gpu-resources-cleanup.yml +++ /dev/null @@ -1,124 +0,0 @@ -name: kubernetes-compute-gpu-resources-cleanup -on: - schedule: - - cron: "0 16 * * *" - workflow_dispatch: - inputs: - DELETE_ENDPOINTS: - description: 'Whether to delete endpoints: true or false' - required: true - default: 'false' - CLEANUP_WORKSPACE: - description: 'Whether to delete workspace: true or false' - required: true - default: 'false' - UNINSTALL_EXTENSION: - description: 'Whether to uninstall extension: true or false' - required: true - default: 'true' - CLEANUP_CLUSTER: - description: 'Whether to delete cluster: true or false' - required: true - default: 'false' - WORKSPACE: - description: 'The workspace of the cluster' - required: false - default: 'amlarc-githubtest-ws' - VM_SKU: - description: 'The VM SKU of the cluster' - required: false - default: 'STANDARD_NC12' - AKS_CLUSTER_PREFIX: - description: 'The prefix of the cluster' - required: false - default: 'amlarc-aks' - -jobs: - test: - runs-on: ubuntu-latest - env: - SUBSCRIPTION: 6560575d-fa06-4e7d-95fb-f962e74efd7a - RESOURCE_GROUP: azureml-examples - WORKSPACE: amlarc-githubtest-ws - VM_SKU: STANDARD_NC12 - COMPUTE: "gpu-cluster" - CLUSTER_TYPE: managedClusters - - DELETE_ENDPOINTS: true - CLEANUP_WORKSPACE: false - UNINSTALL_EXTENSION: true - CLEANUP_CLUSTER: false - - steps: - - name: replace env from workflow_dispatch - if: ${{ github.event_name == 'workflow_dispatch' }} - run: | - echo "DELETE_ENDPOINTS=${{ github.event.inputs.DELETE_ENDPOINTS }}" | tee -a $GITHUB_ENV - echo "CLEANUP_WORKSPACE=${{ github.event.inputs.CLEANUP_WORKSPACE }}" | tee -a $GITHUB_ENV - echo "UNINSTALL_EXTENSION=${{ github.event.inputs.UNINSTALL_EXTENSION }}" | tee -a $GITHUB_ENV - echo "CLEANUP_CLUSTER=${{ github.event.inputs.CLEANUP_CLUSTER }}" | tee -a $GITHUB_ENV - echo "WORKSPACE=${{ github.event.inputs.WORKSPACE }}" | tee -a $GITHUB_ENV - echo "VM_SKU=${{ github.event.inputs.VM_SKU }}" | tee -a $GITHUB_ENV - echo "AKS_CLUSTER_PREFIX=${{ github.event.inputs.AKS_CLUSTER_PREFIX }}" | tee -a $GITHUB_ENV - - name: check out repo - uses: actions/checkout@v2 - - name: setup python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - name: install tools - run: bash .github/kubernetes-compute/tool.sh install_tools - timeout-minutes: 30 - - name: azure login - uses: azure/login@v1 - with: - creds: ${{secrets.AZUREML_CREDENTIALS}} - timeout-minutes: 30 - - # cleanup resources - - name: delete_endpoints - if: ${{ always() }} - run: | - if [ "$DELETE_ENDPOINTS" == 'true' ]; then - bash .github/kubernetes-compute/tool.sh delete_endpoints - fi - timeout-minutes: 60 - - name: delete_compute - if: ${{ always() }} - run: | - if [ "$CLEANUP_WORKSPACE" == 'true' ] || [ "$UNINSTALL_EXTENSION" == 'true' ] ; then - bash .github/kubernetes-compute/tool.sh delete_compute || true - COMPUTE="cpu-cluster-lg" bash .github/kubernetes-compute/tool.sh delete_compute || true - fi - timeout-minutes: 60 - - name: delete_workspace - if: ${{ always() }} - run: | - if [ "$CLEANUP_WORKSPACE" == 'true' ]; then - bash .github/kubernetes-compute/tool.sh delete_workspace - fi - timeout-minutes: 60 - - name: delete_extension - if: ${{ always() }} - run: | - if [ "$UNINSTALL_EXTENSION" == 'true' ]; then - bash .github/kubernetes-compute/tool.sh delete_extension - fi - timeout-minutes: 60 - - name: delete_cluster - if: ${{ always() }} - run: | - if [ "$CLEANUP_CLUSTER" == 'true' ]; then - bash .github/kubernetes-compute/tool.sh delete_aks - fi - timeout-minutes: 60 - - - name: cleanup terminating pods - if: ${{ always() }} - run: | - set +e - bash .github/kubernetes-compute/tool.sh get_kubeconfig - for i in $(kubectl get pod | grep Terminating | awk '{print $1}') ; do echo $i ; kubectl get pod $i; kubectl delete pod $i --force; done - timeout-minutes: 60 - - diff --git a/.github/workflows/kubernetes-compute-gpu-resources-setup.yml b/.github/workflows/kubernetes-compute-gpu-resources-setup.yml deleted file mode 100644 index 06635a2d24..0000000000 --- a/.github/workflows/kubernetes-compute-gpu-resources-setup.yml +++ /dev/null @@ -1,140 +0,0 @@ -name: kubernetes-compute-gpu-resources-setup -on: - schedule: - - cron: "0 17 * * *" - workflow_dispatch: - inputs: - TEST_REGION: - description: 'Resource Region' - required: false - default: 'eastus' - RELEASE_TRAIN: - description: 'Release version: experimental, staging or stable' - required: false - default: 'stable' - REINSTALL_EXTENSION: - description: 'Whether to reinstall extension: true or false' - required: false - default: 'false' - FILE_TICKET: - description: 'Whether to file icm ticket: true or false' - required: false - default: 'false' - EXTENSION_VERSION: - description: 'The version of k8s-extension' - required: false - default: '' - WORKSPACE: - description: 'The workspace of the cluster' - required: false - default: 'amlarc-githubtest-ws' - VM_SKU: - description: 'The VM SKU of the cluster' - required: false - default: 'STANDARD_NC12' - AKS_CLUSTER_PREFIX: - description: 'The prefix of the cluster' - required: false - default: 'amlarc-aks' - CPU_INSTANCE_TYPE: - description: 'cpu instance type' - required: false - default: '4 40Gi' - GPU_INSTANCE_TYPE: - description: 'gpu instance type' - required: false - default: '4 40Gi 2' -jobs: - test: - runs-on: ubuntu-latest - env: - SUBSCRIPTION: 6560575d-fa06-4e7d-95fb-f962e74efd7a - RESOURCE_GROUP: azureml-examples - LOCATION: eastus - WORKSPACE: amlarc-githubtest-ws - VM_SKU: STANDARD_NC12 - MIN_COUNT: 4 - CLUSTER_TYPE: managedClusters - RELEASE_TRAIN: stable - COMPUTE: "gpu-cluster" - EXTENSION_SETTINGS: "enableTraining=True enableInference=True inferenceRouterServiceType=loadBalancer allowInsecureConnections=True installNvidiaDevicePlugin=True installDcgmExporter=True" - REINSTALL_EXTENSION: false - EXTENSION_VERSION: "" - AKS_CLUSTER_PREFIX: "amlarc-aks" - CPU_INSTANCE_TYPE: "4 40Gi" - GPU_INSTANCE_TYPE: "4 40Gi 2" - - FILE_TICKET: true - KEY_VAULT_NAME: amlarcgithubworkflowkv - REPOSITORY: https://github.com/Azure/azureml-examples - WORKFLOW: https://github.com/Azure/azureml-examples/actions/workflows/kubernetes-compute-gpu-resources-setup.yml - - steps: - - name: replace env from workflow_dispatch - if: ${{ github.event_name == 'workflow_dispatch' }} - run: | - echo "LOCATION=${{ github.event.inputs.TEST_REGION }}" | tee -a $GITHUB_ENV - echo "RELEASE_TRAIN=${{ github.event.inputs.RELEASE_TRAIN }}" | tee -a $GITHUB_ENV - echo "REINSTALL_EXTENSION=${{ github.event.inputs.REINSTALL_EXTENSION }}" | tee -a $GITHUB_ENV - echo "FILE_TICKET=${{ github.event.inputs.FILE_TICKET }}" | tee -a $GITHUB_ENV - echo "EXTENSION_VERSION=${{ github.event.inputs.EXTENSION_VERSION }}" | tee -a $GITHUB_ENV - echo "WORKSPACE=${{ github.event.inputs.WORKSPACE }}" | tee -a $GITHUB_ENV - echo "VM_SKU=${{ github.event.inputs.VM_SKU }}" | tee -a $GITHUB_ENV - echo "AKS_CLUSTER_PREFIX=${{ github.event.inputs.AKS_CLUSTER_PREFIX }}" | tee -a $GITHUB_ENV - echo "CPU_INSTANCE_TYPE=${{ github.event.inputs.CPU_INSTANCE_TYPE }}" | tee -a $GITHUB_ENV - echo "GPU_INSTANCE_TYPE=${{ github.event.inputs.GPU_INSTANCE_TYPE }}" | tee -a $GITHUB_ENV - - name: check out repo - uses: actions/checkout@v2 - - name: setup python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - name: install tools - run: bash .github/kubernetes-compute/tool.sh install_tools - timeout-minutes: 30 - - name: azure login - uses: azure/login@v1 - with: - creds: ${{secrets.AZUREML_CREDENTIALS}} - timeout-minutes: 30 - - # provision resources - - name: setup_aks - run: bash .github/kubernetes-compute/tool.sh setup_aks - timeout-minutes: 30 - - name: install_extension - run: | - if [ "$EXTENSION_VERSION" == "" ]; then - bash .github/kubernetes-compute/tool.sh install_extension - else - bash .github/kubernetes-compute/tool.sh install_extension --version $EXTENSION_VERSION - fi - timeout-minutes: 30 - - name: setup_workspace - run: bash .github/kubernetes-compute/tool.sh setup_workspace - timeout-minutes: 30 - - name: setup_compute - run: | - bash .github/kubernetes-compute/tool.sh setup_compute - timeout-minutes: 30 - - name: setup_compute - run: | - COMPUTE="cpu-cluster-lg" bash .github/kubernetes-compute/tool.sh setup_compute - COMPUTE="spark31" bash .github/kubernetes-compute/tool.sh setup_compute - timeout-minutes: 30 - - name: setup_instance_type - run: | - bash .github/kubernetes-compute/tool.sh get_kubeconfig - bash .github/kubernetes-compute/tool.sh setup_instance_type defaultinstancetype $GPU_INSTANCE_TYPE - bash .github/kubernetes-compute/tool.sh setup_instance_type cpu $CPU_INSTANCE_TYPE - bash .github/kubernetes-compute/tool.sh setup_instance_type gpu $GPU_INSTANCE_TYPE - timeout-minutes: 30 - - - name: report_metrics - if: ${{ always() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - bash .github/kubernetes-compute/tool.sh report_metrics upload_cluster_setup_metrics - fi - timeout-minutes: 120 - diff --git a/.github/workflows/kubernetes-compute-sdk-endpoints.yml b/.github/workflows/kubernetes-compute-sdk-endpoints.yml deleted file mode 100644 index 5946736ca8..0000000000 --- a/.github/workflows/kubernetes-compute-sdk-endpoints.yml +++ /dev/null @@ -1,191 +0,0 @@ -name: kubernetes-compute-sdk-endpoints -on: - schedule: - - cron: "0 21 * * *" - workflow_dispatch: - inputs: - TEST_REGION: - description: 'Resource Region' - required: false - default: 'eastus' - JOB_SELECTOR: - description: 'Job selector used with grep command to select job workflows' - required: false - default: '^sdk-endpoints.*yml$' - JOB_FILTER: - description: 'Job filter used with grep command to filter out job workflows' - required: false - default: '' - TIMEOUT: - description: 'Timeout of a single job' - required: false - default: '120m' - FILE_TICKET: - description: 'Whether to file icm ticket: true or false' - required: false - default: 'false' - WORKSPACE: - description: 'The workspace of the cluster' - required: false - default: 'amlarc-githubtest-ws' - pull_request: - branches: - - main - paths: - - '.github/workflows/kubernetes-compute-sdk-endpoints.yml' -jobs: - test: - runs-on: ubuntu-latest - env: - SUBSCRIPTION: 6560575d-fa06-4e7d-95fb-f962e74efd7a - RESOURCE_GROUP: azureml-examples - WORKSPACE: amlarc-githubtest-ws - LOCATION: eastus - - JOB_SELECTOR: '^sdk-endpoints.*yml$' - JOB_FILTER: '' - JOB_LIST_FILE: sdk-endpoints.txt - TIMEOUT: '120m' - MIN_SUCCESS_NUM: 1 - - FILE_TICKET: true - KEY_VAULT_NAME: amlarcgithubworkflowkv - REPOSITORY: https://github.com/Azure/azureml-examples - WORKFLOW: https://github.com/Azure/azureml-examples/actions/workflows/kubernetes-compute-sdk-endpoints.yml - - steps: - - name: replace env from workflow_dispatch - if: ${{ github.event_name == 'workflow_dispatch' }} - run: | - echo "LOCATION=${{ github.event.inputs.TEST_REGION }}" | tee -a $GITHUB_ENV - echo "JOB_SELECTOR=${{ github.event.inputs.JOB_SELECTOR }}" | tee -a $GITHUB_ENV - echo "JOB_FILTER=${{ github.event.inputs.JOB_FILTER }}" | tee -a $GITHUB_ENV - echo "TIMEOUT=${{ github.event.inputs.TIMEOUT }}" | tee -a $GITHUB_ENV - echo "FILE_TICKET=${{ github.event.inputs.FILE_TICKET }}" | tee -a $GITHUB_ENV - echo "WORKSPACE=${{ github.event.inputs.WORKSPACE }}" | tee -a $GITHUB_ENV - - - name: check out repo - uses: actions/checkout@v2 - - - name: setup python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - - name: pip install notebook reqs - run: pip install -r sdk/dev-requirements.txt - - - name: azure login - uses: azure/login@v1 - with: - creds: ${{secrets.AZUREML_CREDENTIALS}} - timeout-minutes: 30 - - - name: setup SDK - run: bash setup.sh - working-directory: sdk - continue-on-error: true - - - name: setup CLI - run: bash setup.sh - working-directory: cli - continue-on-error: true - - - name: collect jobs - run: | - python .github/kubernetes-compute/sdk_testcase_collector.py -o "$JOB_LIST_FILE" -r "$JOB_SELECTOR" - timeout-minutes: 30 - - - name: run jobs - run: | - for job in $(cat $JOB_LIST_FILE); do - if [[ "$job" = *"ipynb" ]]; then - echo "Run job: $job" - - FOLDER=$(dirname "$job") - FILE=$(basename "$job") - FOLDERBACK=$(echo $FOLDER | sed -e "s/[a-zA-Z0-9\-_-]*\//\.\.\//g") - FOLDERBACK=$(echo $FOLDERBACK | sed -r "s/\/[a-zA-Z0-9\-_-]+/\/\.\./g") - - echo "$FOLDER|$FILE|$FOLDERBACK" - - cd $FOLDER - sed -i -e "s//$SUBSCRIPTION/g" $FILE - sed -i -e "s//$RESOURCE_GROUP/g" $FILE - sed -i -e "s//$WORKSPACE/g" $FILE - sed -i -e "s//amlarc-inference/g" $FILE - sed -i -e "s//inferencecompute/g" $FILE - sed -i -e "s/DefaultAzureCredential/AzureCliCredential/g" $FILE - OUTPUTJOB=$(echo $FILE | sed -e "s/.ipynb/.output.ipynb/g") - papermill -k python $FILE $OUTPUTJOB & - - sleep 60 - cd $FOLDERBACK - else - echo "Found invalid job: $job" - fi - done - - wait - timeout-minutes: 300 - - - name: check_jobs - if: ${{ always() }} - run: | - python .github/kubernetes-compute/papermill_count_failed_case.py -i "$JOB_LIST_FILE" - timeout-minutes: 30 - - # report metrics - - name: download_metrics_dependency - if: ${{ always() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - bash .github/kubernetes-compute/tool.sh install_mdm_dependency - fi - timeout-minutes: 30 - - name: start_mdm - if: ${{ always() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - # download certificates - export METRIC_ENDPOINT_NAME=METRIC-ENDPOINT-PROD - export MDM_ACCOUNT_NAME=MDM-ACCOUNT-PROD - export MDM_NAMESPACE_NAME=MDM-NAMESPACE-PROD - export KEY_PEM_NAME=AMLARC-KEY-PEM - export CERT_PEM_NAME=AMLARC-CERT-PEM - bash .github/kubernetes-compute/tool.sh download_metrics_info - bash .github/kubernetes-compute/tool.sh start_mdm_container - fi - timeout-minutes: 30 - - name: report_failure_metrics - if: ${{ failure() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - export jobstatus=Failed - export job="kubernetes-compute-sdk-endpoints" - bash .github/kubernetes-compute/tool.sh report_inference_metrics - fi - timeout-minutes: 30 - - name: report_succes_metrics - if: ${{ success() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - export jobstatus=Completed - export job="kubernetes-compute-sdk-endpoints" - bash .github/kubernetes-compute/tool.sh report_inference_metrics - fi - timeout-minutes: 30 - - name: stop_mdm - if: ${{ always() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - bash .github/kubernetes-compute/tool.sh stop_mdm_container - fi - timeout-minutes: 30 - - - name: upload notebook's working folder as an artifact - if: ${{ always() }} - uses: actions/upload-artifact@v2 - with: - name: component - path: sdk/endpoints \ No newline at end of file diff --git a/.github/workflows/kubernetes-compute-sdk-jobs-automl-standalone-jobs.yml b/.github/workflows/kubernetes-compute-sdk-jobs-automl-standalone-jobs.yml deleted file mode 100644 index dfe150813a..0000000000 --- a/.github/workflows/kubernetes-compute-sdk-jobs-automl-standalone-jobs.yml +++ /dev/null @@ -1,190 +0,0 @@ -name: kubernetes-compute-sdk-jobs-automl-standalone-jobs -on: - schedule: - - cron: "0 22 * * *" - workflow_dispatch: - inputs: - TEST_REGION: - description: 'Resource Region' - required: false - default: 'eastus' - JOB_SELECTOR: - description: 'Job selector used with grep command to select job workflows' - required: false - default: '^sdk-jobs-automl-standalone-jobs.*yml$' - JOB_FILTER: - description: 'Job filter used with grep command to filter out job workflows' - required: false - default: '' - TIMEOUT: - description: 'Timeout of a single job' - required: false - default: '120m' - FILE_TICKET: - description: 'Whether to file icm ticket: true or false' - required: false - default: 'false' - WORKSPACE: - description: 'The workspace of the cluster' - required: false - default: 'amlarc-githubtest-ws' - pull_request: - branches: - - main - paths: - - '.github/workflows/kubernetes-compute-sdk-jobs-automl-standalone-jobs.yml' -jobs: - test: - runs-on: ubuntu-latest - env: - SUBSCRIPTION: 6560575d-fa06-4e7d-95fb-f962e74efd7a - RESOURCE_GROUP: azureml-examples - WORKSPACE: amlarc-githubtest-ws - LOCATION: eastus - - JOB_SELECTOR: '^sdk-jobs-automl-standalone-jobs.*yml$' - JOB_FILTER: '' - JOB_LIST_FILE: sdk-jobs-automl-standalone-jobs.txt - TIMEOUT: '120m' - MIN_SUCCESS_NUM: 1 - - FILE_TICKET: true - KEY_VAULT_NAME: amlarcgithubworkflowkv - REPOSITORY: https://github.com/Azure/azureml-examples - WORKFLOW: https://github.com/Azure/azureml-examples/actions/workflows/kubernetes-compute-sdk-jobs-automl-standalone-jobs.yml - - steps: - - name: replace env from workflow_dispatch - if: ${{ github.event_name == 'workflow_dispatch' }} - run: | - echo "LOCATION=${{ github.event.inputs.TEST_REGION }}" | tee -a $GITHUB_ENV - echo "JOB_SELECTOR=${{ github.event.inputs.JOB_SELECTOR }}" | tee -a $GITHUB_ENV - echo "JOB_FILTER=${{ github.event.inputs.JOB_FILTER }}" | tee -a $GITHUB_ENV - echo "TIMEOUT=${{ github.event.inputs.TIMEOUT }}" | tee -a $GITHUB_ENV - echo "FILE_TICKET=${{ github.event.inputs.FILE_TICKET }}" | tee -a $GITHUB_ENV - echo "WORKSPACE=${{ github.event.inputs.WORKSPACE }}" | tee -a $GITHUB_ENV - - - name: check out repo - uses: actions/checkout@v2 - - - name: setup python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - - name: pip install notebook reqs - run: pip install -r sdk/dev-requirements.txt - - - name: azure login - uses: azure/login@v1 - with: - creds: ${{secrets.AZUREML_CREDENTIALS}} - timeout-minutes: 30 - - - name: setup SDK - run: bash setup.sh - working-directory: sdk - continue-on-error: true - - - name: setup CLI - run: bash setup.sh - working-directory: cli - continue-on-error: true - - - name: collect jobs - run: | - python .github/kubernetes-compute/sdk_testcase_collector.py -o "$JOB_LIST_FILE" -r "$JOB_SELECTOR" - timeout-minutes: 30 - - - name: run jobs - run: | - for job in $(cat $JOB_LIST_FILE); do - if [[ "$job" = *"ipynb" ]]; then - echo "Run job: $job" - - FOLDER=$(dirname "$job") - FILE=$(basename "$job") - FOLDERBACK=$(echo $FOLDER | sed -e "s/[a-zA-Z0-9\-_-]*\//\.\.\//g") - FOLDERBACK=$(echo $FOLDERBACK | sed -r "s/\/[a-zA-Z0-9\-_-]+/\/\.\./g") - - echo "$FOLDER|$FILE|$FOLDERBACK" - - cd $FOLDER - sed -i -e "s//$SUBSCRIPTION/g" $FILE - sed -i -e "s//$RESOURCE_GROUP/g" $FILE - sed -i -e "s//$WORKSPACE/g" $FILE - sed -i -e "s/DefaultAzureCredential/AzureCliCredential/g" $FILE - OUTPUTJOB=$(echo $FILE | sed -e "s/.ipynb/.output.ipynb/g") - papermill -k python $FILE $OUTPUTJOB & - - sleep 900 - - cd $FOLDERBACK - else - echo "Found invalid job: $job" - fi - done - - wait - timeout-minutes: 300 - - - name: check_jobs - if: ${{ always() }} - run: | - python .github/kubernetes-compute/papermill_count_failed_case.py -i "$JOB_LIST_FILE" - timeout-minutes: 30 - - # report metrics - - name: download_metrics_dependency - if: ${{ always() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - bash .github/kubernetes-compute/tool.sh install_mdm_dependency - fi - timeout-minutes: 30 - - name: start_mdm - if: ${{ always() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - # download certificates - export METRIC_ENDPOINT_NAME=METRIC-ENDPOINT-PROD - export MDM_ACCOUNT_NAME=MDM-ACCOUNT-PROD - export MDM_NAMESPACE_NAME=MDM-NAMESPACE-PROD - export KEY_PEM_NAME=AMLARC-KEY-PEM - export CERT_PEM_NAME=AMLARC-CERT-PEM - bash .github/kubernetes-compute/tool.sh download_metrics_info - bash .github/kubernetes-compute/tool.sh start_mdm_container - fi - timeout-minutes: 30 - - name: report_failure_metrics - if: ${{ failure() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - export jobstatus=Failed - export job="kubernetes-compute-sdk-jobs-automl-standalone-jobs" - bash .github/kubernetes-compute/tool.sh report_inference_metrics - fi - timeout-minutes: 30 - - name: report_succes_metrics - if: ${{ success() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - export jobstatus=Completed - export job="kubernetes-compute-sdk-jobs-automl-standalone-jobs" - bash .github/kubernetes-compute/tool.sh report_inference_metrics - fi - timeout-minutes: 30 - - name: stop_mdm - if: ${{ always() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - bash .github/kubernetes-compute/tool.sh stop_mdm_container - fi - timeout-minutes: 30 - - - name: upload notebook's working folder as an artifact - if: ${{ always() }} - uses: actions/upload-artifact@v2 - with: - name: component - path: sdk/jobs/automl-standalone-jobs \ No newline at end of file diff --git a/.github/workflows/kubernetes-compute-sdk-jobs-pipeline.yml b/.github/workflows/kubernetes-compute-sdk-jobs-pipeline.yml deleted file mode 100644 index bc96f4be74..0000000000 --- a/.github/workflows/kubernetes-compute-sdk-jobs-pipeline.yml +++ /dev/null @@ -1,192 +0,0 @@ -name: kubernetes-compute-sdk-jobs-pipeline -on: - schedule: - - cron: "0 20 * * *" - workflow_dispatch: - inputs: - TEST_REGION: - description: 'Resource Region' - required: false - default: 'eastus' - JOB_SELECTOR: - description: 'Job selector used with grep command to select job workflows' - required: false - default: '^sdk-jobs-pipeline.*yml$' - JOB_FILTER: - description: 'Job filter used with grep command to filter out job workflows' - required: false - default: '' - TIMEOUT: - description: 'Timeout of a single job' - required: false - default: '120m' - FILE_TICKET: - description: 'Whether to file icm ticket: true or false' - required: false - default: 'false' - WORKSPACE: - description: 'The workspace of the cluster' - required: false - default: 'amlarc-githubtest-ws' - pull_request: - branches: - - main - paths: - - '.github/workflows/kubernetes-compute-sdk-jobs-pipeline.yml' -jobs: - test: - runs-on: ubuntu-latest - env: - SUBSCRIPTION: 6560575d-fa06-4e7d-95fb-f962e74efd7a - RESOURCE_GROUP: azureml-examples - WORKSPACE: amlarc-githubtest-ws - LOCATION: eastus - - JOB_SELECTOR: '^sdk-jobs-pipeline.*yml$' - JOB_FILTER: '' - JOB_LIST_FILE: sdk-jobs-pipeline.txt - TIMEOUT: '120m' - MIN_SUCCESS_NUM: 1 - - FILE_TICKET: true - KEY_VAULT_NAME: amlarcgithubworkflowkv - REPOSITORY: https://github.com/Azure/azureml-examples - WORKFLOW: https://github.com/Azure/azureml-examples/actions/workflows/kubernetes-compute-sdk-jobs-pipeline.yml - - steps: - - name: replace env from workflow_dispatch - if: ${{ github.event_name == 'workflow_dispatch' }} - run: | - echo "LOCATION=${{ github.event.inputs.TEST_REGION }}" | tee -a $GITHUB_ENV - echo "JOB_SELECTOR=${{ github.event.inputs.JOB_SELECTOR }}" | tee -a $GITHUB_ENV - echo "JOB_FILTER=${{ github.event.inputs.JOB_FILTER }}" | tee -a $GITHUB_ENV - echo "TIMEOUT=${{ github.event.inputs.TIMEOUT }}" | tee -a $GITHUB_ENV - echo "FILE_TICKET=${{ github.event.inputs.FILE_TICKET }}" | tee -a $GITHUB_ENV - echo "WORKSPACE=${{ github.event.inputs.WORKSPACE }}" | tee -a $GITHUB_ENV - - - name: check out repo - uses: actions/checkout@v2 - - - name: setup python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - - name: pip install notebook reqs - run: pip install -r sdk/dev-requirements.txt - - - name: azure login - uses: azure/login@v1 - with: - creds: ${{secrets.AZUREML_CREDENTIALS}} - timeout-minutes: 30 - - - name: setup SDK - run: bash setup.sh - working-directory: sdk - continue-on-error: true - - - name: setup CLI - run: bash setup.sh - working-directory: cli - continue-on-error: true - - - name: collect jobs - run: | - python .github/kubernetes-compute/sdk_testcase_collector.py -o "$JOB_LIST_FILE" -r "$JOB_SELECTOR" - timeout-minutes: 30 - - - name: run jobs - run: | - mkdir .azureml - echo '{"subscription_id": "6560575d-fa06-4e7d-95fb-f962e74efd7a", "resource_group": "azureml-examples", "workspace_name": "amlarc-githubtest-ws"}' > .azureml/config.json - for job in $(cat $JOB_LIST_FILE); do - if [[ "$job" = *"ipynb" ]]; then - echo "Run job: $job" - - FOLDER=$(dirname "$job") - FILE=$(basename "$job") - FOLDERBACK=$(echo $FOLDER | sed -e "s/[a-zA-Z0-9\-_-]*\//\.\.\//g") - FOLDERBACK=$(echo $FOLDERBACK | sed -r "s/\/[a-zA-Z0-9\-_-]+/\/\.\./g") - - echo "$FOLDER|$FILE|$FOLDERBACK" - - cd $FOLDER - sed -i -e "s//$SUBSCRIPTION/g" $FILE - sed -i -e "s//$RESOURCE_GROUP/g" $FILE - sed -i -e "s//$WORKSPACE/g" $FILE - sed -i -e "s/DefaultAzureCredential/AzureCliCredential/g" $FILE - sed -i "s/@pipeline(/&force_rerun=True,/" $FILE - OUTPUTJOB=$(echo $FILE | sed -e "s/.ipynb/.output.ipynb/g") - papermill -k python $FILE $OUTPUTJOB & - - sleep 60 - cd $FOLDERBACK - else - echo "Found invalid job: $job" - fi - done - - wait - timeout-minutes: 300 - - - name: check_jobs - if: ${{ always() }} - run: | - python .github/kubernetes-compute/papermill_count_failed_case.py -i "$JOB_LIST_FILE" - timeout-minutes: 30 - - # report metrics - - name: download_metrics_dependency - if: ${{ always() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - bash .github/kubernetes-compute/tool.sh install_mdm_dependency - fi - timeout-minutes: 30 - - name: start_mdm - if: ${{ always() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - # download certificates - export METRIC_ENDPOINT_NAME=METRIC-ENDPOINT-PROD - export MDM_ACCOUNT_NAME=MDM-ACCOUNT-PROD - export MDM_NAMESPACE_NAME=MDM-NAMESPACE-PROD - export KEY_PEM_NAME=AMLARC-KEY-PEM - export CERT_PEM_NAME=AMLARC-CERT-PEM - bash .github/kubernetes-compute/tool.sh download_metrics_info - bash .github/kubernetes-compute/tool.sh start_mdm_container - fi - timeout-minutes: 30 - - name: report_failure_metrics - if: ${{ failure() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - export jobstatus=Failed - export job="kubernetes-compute-sdk-jobs-pipeline" - bash .github/kubernetes-compute/tool.sh report_inference_metrics - fi - timeout-minutes: 30 - - name: report_succes_metrics - if: ${{ success() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - export jobstatus=Completed - export job="kubernetes-compute-sdk-jobs-pipeline" - bash .github/kubernetes-compute/tool.sh report_inference_metrics - fi - timeout-minutes: 30 - - name: stop_mdm - if: ${{ always() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - bash .github/kubernetes-compute/tool.sh stop_mdm_container - fi - timeout-minutes: 30 - - - name: upload notebook's working folder as an artifact - if: ${{ always() }} - uses: actions/upload-artifact@v2 - with: - name: component - path: sdk/jobs/pipelines \ No newline at end of file diff --git a/.github/workflows/kubernetes-compute-sdk-jobs-single-step.yml b/.github/workflows/kubernetes-compute-sdk-jobs-single-step.yml deleted file mode 100644 index 44add30123..0000000000 --- a/.github/workflows/kubernetes-compute-sdk-jobs-single-step.yml +++ /dev/null @@ -1,189 +0,0 @@ -name: kubernetes-compute-sdk-jobs-single-step -on: - schedule: - - cron: "0 19 * * *" - workflow_dispatch: - inputs: - TEST_REGION: - description: 'Resource Region' - required: false - default: 'eastus' - JOB_SELECTOR: - description: 'Job selector used with grep command to select job workflows' - required: false - default: '^sdk-jobs-single-step.*yml$' - JOB_FILTER: - description: 'Job filter used with grep command to filter out job workflows' - required: false - default: '' - TIMEOUT: - description: 'Timeout of a single job' - required: false - default: '120m' - FILE_TICKET: - description: 'Whether to file icm ticket: true or false' - required: false - default: 'false' - WORKSPACE: - description: 'The workspace of the cluster' - required: false - default: 'amlarc-githubtest-ws' - pull_request: - branches: - - main - paths: - - '.github/workflows/kubernetes-compute-sdk-jobs-single-step.yml' -jobs: - test: - runs-on: ubuntu-latest - env: - SUBSCRIPTION: 6560575d-fa06-4e7d-95fb-f962e74efd7a - RESOURCE_GROUP: azureml-examples - WORKSPACE: amlarc-githubtest-ws - LOCATION: eastus - - JOB_SELECTOR: '^sdk-jobs-single-step.*yml$' - JOB_FILTER: '' - JOB_LIST_FILE: sdk-jobs-single-step.txt - TIMEOUT: '120m' - MIN_SUCCESS_NUM: 1 - - FILE_TICKET: true - KEY_VAULT_NAME: amlarcgithubworkflowkv - GITHUB_REPO: https://github.com/Azure/azureml-examples - WORKFLOW_URL: https://github.com/Azure/azureml-examples/actions/workflows/kubernetes-compute-sdk-jobs-single-step.yml - - steps: - - name: replace env from workflow_dispatch - if: ${{ github.event_name == 'workflow_dispatch' }} - run: | - echo "LOCATION=${{ github.event.inputs.TEST_REGION }}" | tee -a $GITHUB_ENV - echo "JOB_SELECTOR=${{ github.event.inputs.JOB_SELECTOR }}" | tee -a $GITHUB_ENV - echo "JOB_FILTER=${{ github.event.inputs.JOB_FILTER }}" | tee -a $GITHUB_ENV - echo "TIMEOUT=${{ github.event.inputs.TIMEOUT }}" | tee -a $GITHUB_ENV - echo "FILE_TICKET=${{ github.event.inputs.FILE_TICKET }}" | tee -a $GITHUB_ENV - echo "WORKSPACE=${{ github.event.inputs.WORKSPACE }}" | tee -a $GITHUB_ENV - - - name: check out repo - uses: actions/checkout@v2 - - - name: setup python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - - name: pip install notebook reqs - run: pip install -r sdk/dev-requirements.txt - - - name: azure login - uses: azure/login@v1 - with: - creds: ${{secrets.AZUREML_CREDENTIALS}} - timeout-minutes: 30 - - - name: setup SDK - run: bash setup.sh - working-directory: sdk - continue-on-error: true - - - name: setup CLI - run: bash setup.sh - working-directory: cli - continue-on-error: true - - - name: collect jobs - run: | - python .github/kubernetes-compute/sdk_testcase_collector.py -o "$JOB_LIST_FILE" -r "$JOB_SELECTOR" - timeout-minutes: 30 - - - name: run jobs - run: | - for job in $(cat $JOB_LIST_FILE); do - if [[ "$job" = *"ipynb" ]]; then - echo "Run job: $job" - - FOLDER=$(dirname "$job") - FILE=$(basename "$job") - FOLDERBACK=$(echo $FOLDER | sed -e "s/[a-zA-Z0-9\-_-]*\//\.\.\//g") - FOLDERBACK=$(echo $FOLDERBACK | sed -r "s/\/[a-zA-Z0-9\-_-]+/\/\.\./g") - - echo "$FOLDER|$FILE|$FOLDERBACK" - - cd $FOLDER - sed -i -e "s//$SUBSCRIPTION/g" $FILE - sed -i -e "s//$RESOURCE_GROUP/g" $FILE - sed -i -e "s//$WORKSPACE/g" $FILE - sed -i -e "s/DefaultAzureCredential/AzureCliCredential/g" $FILE - OUTPUTJOB=$(echo $FILE | sed -e "s/.ipynb/.output.ipynb/g") - papermill -k python $FILE $OUTPUTJOB & - - sleep 60 - cd $FOLDERBACK - else - echo "Found invalid job: $job" - fi - done - - wait - timeout-minutes: 300 - - - name: check_jobs - if: ${{ always() }} - run: | - python .github/kubernetes-compute/papermill_count_failed_case.py -i "$JOB_LIST_FILE" - timeout-minutes: 30 - - # report metrics - - name: download_metrics_dependency - if: ${{ always() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - bash .github/kubernetes-compute/tool.sh install_mdm_dependency - fi - timeout-minutes: 30 - - name: start_mdm - if: ${{ always() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - # download certificates - export METRIC_ENDPOINT_NAME=METRIC-ENDPOINT-PROD - export MDM_ACCOUNT_NAME=MDM-ACCOUNT-PROD - export MDM_NAMESPACE_NAME=MDM-NAMESPACE-PROD - export KEY_PEM_NAME=AMLARC-KEY-PEM - export CERT_PEM_NAME=AMLARC-CERT-PEM - bash .github/kubernetes-compute/tool.sh download_metrics_info - bash .github/kubernetes-compute/tool.sh start_mdm_container - fi - timeout-minutes: 30 - - name: report_failure_metrics - if: ${{ failure() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - export jobstatus=Failed - export job="kubernetes-compute-sdk-jobs-single-step" - bash .github/kubernetes-compute/tool.sh report_inference_metrics - fi - timeout-minutes: 30 - - name: report_succes_metrics - if: ${{ success() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - export jobstatus=Completed - export job="kubernetes-compute-sdk-jobs-single-step" - bash .github/kubernetes-compute/tool.sh report_inference_metrics - fi - timeout-minutes: 30 - - name: stop_mdm - if: ${{ always() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - bash .github/kubernetes-compute/tool.sh stop_mdm_container - fi - timeout-minutes: 30 - - - name: upload notebook's working folder as an artifact - if: ${{ always() }} - uses: actions/upload-artifact@v2 - with: - name: component - path: sdk/jobs/single-step diff --git a/.github/workflows/kubernetes-compute-training-cli-jobs-automl-dynamic-data.yml b/.github/workflows/kubernetes-compute-training-cli-jobs-automl-dynamic-data.yml deleted file mode 100644 index 5a9f1c9963..0000000000 --- a/.github/workflows/kubernetes-compute-training-cli-jobs-automl-dynamic-data.yml +++ /dev/null @@ -1,119 +0,0 @@ -name: kubernetes-compute-training-cli-jobs-automl-dynamic-data -on: - schedule: - - cron: "0 0 * * *" - workflow_dispatch: - inputs: - TEST_REGION: - description: 'Resource Region' - required: false - default: 'eastus' - TIMEOUT: - description: 'Timeout of a single job' - required: false - default: '300m' - FILE_TICKET: - description: 'Whether to file icm ticket: true or false' - required: false - default: 'false' - WORKSPACE: - description: 'The workspace of the cluster' - required: false - default: 'amlarc-githubtest-ws' -jobs: - test: - runs-on: ubuntu-latest - env: - JOB_LIST_FILE: kubernetes-compute-training-cli-jobs-automl-dynamic-data.txt - TIMEOUT: '300m' - MIN_SUCCESS_NUM: 1 - WORKFLOW: https://github.com/Azure/azureml-examples/actions/workflows/kubernetes-compute-training-cli-jobs-automl-dynamic-data.yml - - steps: - - name: check out repo - uses: actions/checkout@v2 - - name: setup python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - name: setup environment - run: | - bash .github/kubernetes-compute/tool.sh set_default_env - if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then - echo "LOCATION=${{ github.event.inputs.TEST_REGION }}" | tee -a $GITHUB_ENV - echo "TIMEOUT=${{ github.event.inputs.TIMEOUT }}" | tee -a $GITHUB_ENV - echo "FILE_TICKET=${{ github.event.inputs.FILE_TICKET }}" | tee -a $GITHUB_ENV - echo "WORKSPACE=${{ github.event.inputs.WORKSPACE }}" | tee -a $GITHUB_ENV - fi - bash .github/kubernetes-compute/tool.sh install_tools - pip install azure-ai-textanalytics - pip install azure-identity - pip install azure-ai-ml - pip install azure-cli - set -x +e - bash cli/setup.sh - az version - timeout-minutes: 30 - - name: azure login - uses: azure/login@v1 - with: - creds: ${{secrets.AZUREML_CREDENTIALS}} - timeout-minutes: 30 - - - name: collect jobs - run: | - echo "cli/jobs/pipelines/automl/image-instance-segmentation-task-fridge-items-pipeline/pipeline.yml" | tee -a $JOB_LIST_FILE - echo "cli/jobs/pipelines/automl/image-multiclass-classification-fridge-items-pipeline/pipeline.yml" | tee -a $JOB_LIST_FILE - echo "cli/jobs/pipelines/automl/image-multilabel-classification-fridge-items-pipeline/pipeline.yml" | tee -a $JOB_LIST_FILE - - echo "cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multiclass-task-fridge-items/cli-automl-image-classification-multiclass-task-fridge-items.yml" | tee -a $JOB_LIST_FILE - echo "cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multiclass-task-fridge-items/cli-automl-image-classification-multiclass-task-fridge-items-automode.yml" | tee -a $JOB_LIST_FILE - echo "cli/jobs/automl-standalone-jobs/cli-automl-image-instance-segmentation-task-fridge-items/cli-automl-image-instance-segmentation-task-fridge-items.yml" | tee -a $JOB_LIST_FILE - echo "cli/jobs/automl-standalone-jobs/cli-automl-image-instance-segmentation-task-fridge-items/cli-automl-image-instance-segmentation-task-fridge-items-automode.yml" | tee -a $JOB_LIST_FILE - echo "cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multilabel-task-fridge-items/cli-automl-image-classification-multilabel-task-fridge-items.yml" | tee -a $JOB_LIST_FILE - echo "cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multilabel-task-fridge-items/cli-automl-image-classification-multilabel-task-fridge-items-automode.yml" | tee -a $JOB_LIST_FILE - echo "cli/jobs/automl-standalone-jobs/cli-automl-image-object-detection-task-fridge-items/cli-automl-image-object-detection-task-fridge-items.yml" | tee -a $JOB_LIST_FILE - echo "cli/jobs/automl-standalone-jobs/cli-automl-image-object-detection-task-fridge-items/cli-automl-image-object-detection-task-fridge-items-automode.yml" | tee -a $JOB_LIST_FILE - timeout-minutes: 30 - - name: prepare data - run: | - ROOT_DIR=$(pwd) - for job in $(cat $JOB_LIST_FILE); do - cd $ROOT_DIR - if [[ "$job" = *"yml" ]]; then - echo "Prepare data for job: $job" - JOB_SPEC_FILE=$(basename $job) - JOB_DIR=$(dirname $job) - - cd $JOB_DIR - sed -i -e "s/from azure.identity import InteractiveBrowserCredential/from azureml.core.authentication import AzureCliAuthentication/g" prepare_data.py - sed -i -e "s/credential = InteractiveBrowserCredential()/credential = AzureCliAuthentication()/g" prepare_data.py - - python prepare_data.py --subscription $SUBSCRIPTION --group $RESOURCE_GROUP --workspace $WORKSPACE - - sleep 30 - else - echo "Found invalid job: $job" - fi - done - timeout-minutes: 300 - - - name: run jobs - run: | - bash .github/kubernetes-compute/tool.sh run_jobs_from_file "$JOB_LIST_FILE" - timeout-minutes: 300 - - - name: check jobs - if: ${{ always() }} - run: | - bash .github/kubernetes-compute/tool.sh count_result - timeout-minutes: 30 - - # report metrics - - name: report_metrics - if: ${{ always() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - bash .github/kubernetes-compute/tool.sh report_metrics upload_test_result_metrics - fi - timeout-minutes: 120 diff --git a/.github/workflows/kubernetes-compute-training-cli-jobs-automl.yml b/.github/workflows/kubernetes-compute-training-cli-jobs-automl.yml deleted file mode 100644 index 7da1733b0e..0000000000 --- a/.github/workflows/kubernetes-compute-training-cli-jobs-automl.yml +++ /dev/null @@ -1,94 +0,0 @@ -name: kubernetes-compute-training-cli-jobs-automl -on: - schedule: - - cron: "0 22 * * *" - workflow_dispatch: - inputs: - TEST_REGION: - description: 'Resource Region' - required: false - default: 'eastus' - JOB_SELECTOR: - description: 'Job selector used with grep command to select job workflows' - required: false - default: '^cli-jobs-automl.*yml$' - JOB_FILTER: - description: 'Job filter used with grep command to filter out job workflows' - required: false - default: 'automl-forecasting-task-github-dau|cli-automl-forecasting-task-bike-share|multiclass-task-fridge-items|segmentation-task-fridge-items|multilabel-task-fridge-items|detection-task-fridge-items|spark|java' - TIMEOUT: - description: 'Timeout of a single job' - required: false - default: '120m' - FILE_TICKET: - description: 'Whether to file icm ticket: true or false' - required: false - default: 'false' - WORKSPACE: - description: 'The workspace of the cluster' - required: false - default: 'amlarc-githubtest-ws' -jobs: - test: - runs-on: ubuntu-latest - env: - JOB_SELECTOR: '^cli-jobs-automl.*yml$|cli-jobs-basics-hello-automl-hello-automl-job-basic.yml' - JOB_FILTER: 'automl-forecasting-task-github-dau|cli-automl-forecasting-task-bike-share|multiclass-task-fridge-items|segmentation-task-fridge-items|multilabel-task-fridge-items|detection-task-fridge-items|spark|java' - JOB_LIST_FILE: kubernetes-compute-training-cli-jobs-automl.txt - TIMEOUT: '120m' - MIN_SUCCESS_NUM: 1 - WORKFLOW: https://github.com/Azure/azureml-examples/actions/workflows/kubernetes-compute-training-cli-jobs-automl.yml - - steps: - - name: check out repo - uses: actions/checkout@v2 - - name: setup python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - name: setup environment - run: | - bash .github/kubernetes-compute/tool.sh set_default_env - if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then - echo "LOCATION=${{ github.event.inputs.TEST_REGION }}" | tee -a $GITHUB_ENV - echo "JOB_SELECTOR=${{ github.event.inputs.JOB_SELECTOR }}" | tee -a $GITHUB_ENV - echo "JOB_FILTER=${{ github.event.inputs.JOB_FILTER }}" | tee -a $GITHUB_ENV - echo "TIMEOUT=${{ github.event.inputs.TIMEOUT }}" | tee -a $GITHUB_ENV - echo "FILE_TICKET=${{ github.event.inputs.FILE_TICKET }}" | tee -a $GITHUB_ENV - echo "WORKSPACE=${{ github.event.inputs.WORKSPACE }}" | tee -a $GITHUB_ENV - fi - bash .github/kubernetes-compute/tool.sh install_tools - set -x +e - bash cli/setup.sh - az version - timeout-minutes: 30 - - name: azure login - uses: azure/login@v1 - with: - creds: ${{secrets.AZUREML_CREDENTIALS}} - timeout-minutes: 30 - - - name: collect jobs - run: | - bash .github/kubernetes-compute/tool.sh collect_jobs_from_workflows "$JOB_LIST_FILE" "$JOB_SELECTOR" "$JOB_FILTER" - timeout-minutes: 30 - - - name: run jobs - run: | - bash .github/kubernetes-compute/tool.sh run_jobs_from_file "$JOB_LIST_FILE" - timeout-minutes: 300 - - - name: check jobs - if: ${{ always() }} - run: | - bash .github/kubernetes-compute/tool.sh count_result - timeout-minutes: 30 - - # report metrics - - name: report_metrics - if: ${{ always() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - bash .github/kubernetes-compute/tool.sh report_metrics upload_test_result_metrics - fi - timeout-minutes: 120 \ No newline at end of file diff --git a/.github/workflows/kubernetes-compute-training-cli-jobs-bad-cases.yml b/.github/workflows/kubernetes-compute-training-cli-jobs-bad-cases.yml deleted file mode 100644 index f06d35f58e..0000000000 --- a/.github/workflows/kubernetes-compute-training-cli-jobs-bad-cases.yml +++ /dev/null @@ -1,85 +0,0 @@ -name: kubernetes-compute-training-cli-jobs-bad-cases -on: - schedule: - - cron: "0 4 * * *" - workflow_dispatch: - inputs: - TEST_REGION: - description: 'Resource Region' - required: false - default: 'eastus' - TIMEOUT: - description: 'Timeout of a single job' - required: false - default: '120m' - FILE_TICKET: - description: 'Whether to file icm ticket: true or false' - required: false - default: 'false' - WORKSPACE: - description: 'The workspace of the cluster' - required: false - default: 'amlarc-githubtest-ws' -jobs: - test: - runs-on: ubuntu-latest - env: - JOB_LIST_FILE: kubernetes-compute-training-cli-jobs-bad-cases.txt - TIMEOUT: '120m' - MIN_SUCCESS_NUM: 0 - WORKFLOW: https://github.com/Azure/azureml-examples/actions/workflows/kubernetes-compute-training-cli-jobs-bad-cases.yml - - steps: - - name: check out repo - uses: actions/checkout@v2 - - name: setup python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - name: setup environment - run: | - bash .github/kubernetes-compute/tool.sh set_default_env - if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then - echo "LOCATION=${{ github.event.inputs.TEST_REGION }}" | tee -a $GITHUB_ENV - echo "TIMEOUT=${{ github.event.inputs.TIMEOUT }}" | tee -a $GITHUB_ENV - echo "FILE_TICKET=${{ github.event.inputs.FILE_TICKET }}" | tee -a $GITHUB_ENV - echo "WORKSPACE=${{ github.event.inputs.WORKSPACE }}" | tee -a $GITHUB_ENV - fi - bash .github/kubernetes-compute/tool.sh install_tools - set -x +e - bash cli/setup.sh - az version - timeout-minutes: 30 - - name: azure login - uses: azure/login@v1 - with: - creds: ${{secrets.AZUREML_CREDENTIALS}} - timeout-minutes: 30 - - - name: collect jobs - run: | - echo "cli/jobs/single-step/pytorch/cifar-distributed/job.yml" | tee -a $JOB_LIST_FILE - echo "cli/jobs/pipelines/automl/image-object-detection-task-fridge-items-pipeline/pipeline.yml" | tee -a $JOB_LIST_FILE - echo "cli/jobs/automl-standalone-jobs/cli-automl-forecasting-bike-share/cli-automl-forecasting-task-bike-share.yml" | tee -a $JOB_LIST_FILE - echo "cli/jobs/pipelines-with-components/image_classification_with_densenet/pipeline.yml" | tee -a $JOB_LIST_FILE - echo "cli/jobs/automl-standalone-jobs/cli-automl-forecasting-task-github-dau/cli-automl-forecasting-task-github-dau.yml" | tee -a $JOB_LIST_FILE - timeout-minutes: 30 - - - name: run jobs - run: | - bash .github/kubernetes-compute/tool.sh run_jobs_from_file "$JOB_LIST_FILE" - timeout-minutes: 300 - - - name: check jobs - if: ${{ always() }} - run: | - bash .github/kubernetes-compute/tool.sh count_result - timeout-minutes: 30 - - - name: report_metrics - if: ${{ always() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - bash .github/kubernetes-compute/tool.sh report_metrics upload_test_result_metrics - fi - timeout-minutes: 120 diff --git a/.github/workflows/kubernetes-compute-training-cli-jobs-basics.yml b/.github/workflows/kubernetes-compute-training-cli-jobs-basics.yml deleted file mode 100644 index 087a09cc8a..0000000000 --- a/.github/workflows/kubernetes-compute-training-cli-jobs-basics.yml +++ /dev/null @@ -1,94 +0,0 @@ -name: kubernetes-compute-training-cli-jobs-basics -on: - schedule: - - cron: "0 20 * * *" - workflow_dispatch: - inputs: - TEST_REGION: - description: 'Resource Region' - required: false - default: 'eastus' - JOB_SELECTOR: - description: 'Job selector used with grep command to select job workflows' - required: false - default: '^cli-jobs-basics.*yml$' - JOB_FILTER: - description: 'Job filter used with grep command to filter out job workflows' - required: false - default: 'java' - TIMEOUT: - description: 'Timeout of a single job' - required: false - default: '120m' - FILE_TICKET: - description: 'Whether to file icm ticket: true or false' - required: false - default: 'false' - WORKSPACE: - description: 'The workspace of the cluster' - required: false - default: 'amlarc-githubtest-ws' -jobs: - test: - runs-on: ubuntu-latest - env: - JOB_SELECTOR: '^cli-jobs-basics.*yml$' - JOB_FILTER: 'java' - JOB_LIST_FILE: kubernetes-compute-training-cli-jobs-basics.txt - TIMEOUT: '120m' - MIN_SUCCESS_NUM: 1 - WORKFLOW: https://github.com/Azure/azureml-examples/actions/workflows/kubernetes-compute-training-cli-jobs-basics.yml - - steps: - - name: check out repo - uses: actions/checkout@v2 - - name: setup python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - name: setup environment - run: | - bash .github/kubernetes-compute/tool.sh set_default_env - if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then - echo "LOCATION=${{ github.event.inputs.TEST_REGION }}" | tee -a $GITHUB_ENV - echo "JOB_SELECTOR=${{ github.event.inputs.JOB_SELECTOR }}" | tee -a $GITHUB_ENV - echo "JOB_FILTER=${{ github.event.inputs.JOB_FILTER }}" | tee -a $GITHUB_ENV - echo "TIMEOUT=${{ github.event.inputs.TIMEOUT }}" | tee -a $GITHUB_ENV - echo "FILE_TICKET=${{ github.event.inputs.FILE_TICKET }}" | tee -a $GITHUB_ENV - echo "WORKSPACE=${{ github.event.inputs.WORKSPACE }}" | tee -a $GITHUB_ENV - fi - bash .github/kubernetes-compute/tool.sh install_tools - set -x +e - bash cli/setup.sh - az version - timeout-minutes: 30 - - name: azure login - uses: azure/login@v1 - with: - creds: ${{secrets.AZUREML_CREDENTIALS}} - timeout-minutes: 30 - - - name: collect jobs - run: | - bash .github/kubernetes-compute/tool.sh collect_jobs_from_workflows "$JOB_LIST_FILE" "$JOB_SELECTOR" "$JOB_FILTER" - timeout-minutes: 30 - - - name: run jobs - run: | - bash .github/kubernetes-compute/tool.sh run_jobs_from_file "$JOB_LIST_FILE" - timeout-minutes: 300 - - - name: check jobs - if: ${{ always() }} - run: | - bash .github/kubernetes-compute/tool.sh count_result - timeout-minutes: 30 - - - name: report_metrics - if: ${{ always() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - bash .github/kubernetes-compute/tool.sh report_metrics upload_test_result_metrics - fi - timeout-minutes: 120 - diff --git a/.github/workflows/kubernetes-compute-training-cli-jobs-pipelines.yml b/.github/workflows/kubernetes-compute-training-cli-jobs-pipelines.yml deleted file mode 100644 index 433130eef4..0000000000 --- a/.github/workflows/kubernetes-compute-training-cli-jobs-pipelines.yml +++ /dev/null @@ -1,93 +0,0 @@ -name: kubernetes-compute-training-cli-jobs-pipelines -on: - schedule: - - cron: "0 21 * * *" - workflow_dispatch: - inputs: - TEST_REGION: - description: 'Resource Region' - required: false - default: 'eastus' - JOB_SELECTOR: - description: 'Job selector used with grep command to select job workflows' - required: false - default: '^cli-jobs-pipelines.*yml$' - JOB_FILTER: - description: 'Job filter used with grep command to filter out job workflows' - required: false - default: '4b_datastore_datapath_uri|image_classification_with_densenet|spark|image-instance-segmentation-task-fridge-items-pipeline|image-multiclass-classification-fridge-items-pipeline|image-multilabel-classification-fridge-items-pipeline|image-object-detection-task-fridge-items-pipeline' - TIMEOUT: - description: 'Timeout of a single job' - required: false - default: '120m' - FILE_TICKET: - description: 'Whether to file icm ticket: true or false' - required: false - default: 'false' - WORKSPACE: - description: 'The workspace of the cluster' - required: false - default: 'amlarc-githubtest-ws' -jobs: - test: - runs-on: ubuntu-latest - env: - JOB_SELECTOR: '^cli-jobs-pipelines.*yml$' - JOB_FILTER: '4b_datastore_datapath_uri|image_classification_with_densenet|spark|image-instance-segmentation-task-fridge-items-pipeline|image-multiclass-classification-fridge-items-pipeline|image-multilabel-classification-fridge-items-pipeline|image-object-detection-task-fridge-items-pipeline' - JOB_LIST_FILE: kubernetes-compute-training-cli-jobs-pipelines.txt - TIMEOUT: '120m' - MIN_SUCCESS_NUM: 1 - WORKFLOW: https://github.com/Azure/azureml-examples/actions/workflows/kubernetes-compute-training-cli-jobs-pipelines.yml - - steps: - - name: check out repo - uses: actions/checkout@v2 - - name: setup python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - name: setup environment - run: | - bash .github/kubernetes-compute/tool.sh set_default_env - if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then - echo "LOCATION=${{ github.event.inputs.TEST_REGION }}" | tee -a $GITHUB_ENV - echo "JOB_SELECTOR=${{ github.event.inputs.JOB_SELECTOR }}" | tee -a $GITHUB_ENV - echo "JOB_FILTER=${{ github.event.inputs.JOB_FILTER }}" | tee -a $GITHUB_ENV - echo "TIMEOUT=${{ github.event.inputs.TIMEOUT }}" | tee -a $GITHUB_ENV - echo "FILE_TICKET=${{ github.event.inputs.FILE_TICKET }}" | tee -a $GITHUB_ENV - echo "WORKSPACE=${{ github.event.inputs.WORKSPACE }}" | tee -a $GITHUB_ENV - fi - bash .github/kubernetes-compute/tool.sh install_tools - set -x +e - bash cli/setup.sh - az version - timeout-minutes: 30 - - name: azure login - uses: azure/login@v1 - with: - creds: ${{secrets.AZUREML_CREDENTIALS}} - timeout-minutes: 30 - - - name: collect jobs - run: | - bash .github/kubernetes-compute/tool.sh collect_jobs_from_workflows "$JOB_LIST_FILE" "$JOB_SELECTOR" "$JOB_FILTER" - timeout-minutes: 30 - - - name: run jobs - run: | - bash .github/kubernetes-compute/tool.sh run_jobs_from_file "$JOB_LIST_FILE" - timeout-minutes: 300 - - - name: check jobs - if: ${{ always() }} - run: | - bash .github/kubernetes-compute/tool.sh count_result - timeout-minutes: 30 - - - name: report_metrics - if: ${{ always() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - bash .github/kubernetes-compute/tool.sh report_metrics upload_test_result_metrics - fi - timeout-minutes: 120 diff --git a/.github/workflows/kubernetes-compute-training-cli-jobs-single-step-spark.yml b/.github/workflows/kubernetes-compute-training-cli-jobs-single-step-spark.yml deleted file mode 100644 index 58a269d535..0000000000 --- a/.github/workflows/kubernetes-compute-training-cli-jobs-single-step-spark.yml +++ /dev/null @@ -1,94 +0,0 @@ -name: kubernetes-compute-training-cli-jobs-single-step-spark -on: - schedule: - - cron: "0 2 * * *" - workflow_dispatch: - inputs: - TEST_REGION: - description: 'Resource Region' - required: false - default: 'eastus' - JOB_SELECTOR: - description: 'Job selector used with grep command to select job workflows' - required: false - default: '^cli-jobs-single-step-spark.*yml$' - JOB_FILTER: - description: 'Job filter used with grep command to filter out job workflows' - required: false - default: 'java' - TIMEOUT: - description: 'Timeout of a single job' - required: false - default: '300m' - FILE_TICKET: - description: 'Whether to file icm ticket: true or false' - required: false - default: 'false' - WORKSPACE: - description: 'The workspace of the cluster' - required: false - default: 'amlarc-githubtest-ws' -jobs: - test: - runs-on: ubuntu-latest - env: - JOB_SELECTOR: '^cli-jobs-single-step-spark.*yml$' - JOB_FILTER: 'java' - JOB_LIST_FILE: kubernetes-compute-training-cli-jobs-single-step-spark.txt - TIMEOUT: '300m' - MIN_SUCCESS_NUM: 1 - WORKFLOW: https://github.com/Azure/azureml-examples/actions/workflows/kubernetes-compute-training-cli-jobs-single-step-spark.yml - - steps: - - name: check out repo - uses: actions/checkout@v2 - - name: setup python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - name: setup environment - run: | - bash .github/kubernetes-compute/tool.sh set_default_env - if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then - echo "LOCATION=${{ github.event.inputs.TEST_REGION }}" | tee -a $GITHUB_ENV - echo "JOB_SELECTOR=${{ github.event.inputs.JOB_SELECTOR }}" | tee -a $GITHUB_ENV - echo "JOB_FILTER=${{ github.event.inputs.JOB_FILTER }}" | tee -a $GITHUB_ENV - echo "TIMEOUT=${{ github.event.inputs.TIMEOUT }}" | tee -a $GITHUB_ENV - echo "FILE_TICKET=${{ github.event.inputs.FILE_TICKET }}" | tee -a $GITHUB_ENV - echo "WORKSPACE=${{ github.event.inputs.WORKSPACE }}" | tee -a $GITHUB_ENV - fi - bash .github/kubernetes-compute/tool.sh install_tools - set -x +e - bash cli/setup.sh - az version - timeout-minutes: 30 - - name: azure login - uses: azure/login@v1 - with: - creds: ${{secrets.AZUREML_CREDENTIALS}} - timeout-minutes: 30 - - - name: collect jobs - run: | - echo "cli/jobs/single-step/spark/nyctaxi/job.yml" | tee -a $JOB_LIST_FILE - timeout-minutes: 30 - - - name: run jobs - run: | - bash .github/kubernetes-compute/tool.sh run_jobs_from_file "$JOB_LIST_FILE" - timeout-minutes: 360 - - - name: check jobs - if: ${{ always() }} - run: | - bash .github/kubernetes-compute/tool.sh count_result - timeout-minutes: 30 - - # report metrics - - name: report_metrics - if: ${{ always() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - bash .github/kubernetes-compute/tool.sh report_metrics upload_test_result_metrics - fi - timeout-minutes: 120 diff --git a/.github/workflows/kubernetes-compute-training-cli-jobs-single-step.yml b/.github/workflows/kubernetes-compute-training-cli-jobs-single-step.yml deleted file mode 100644 index c2d66cbea8..0000000000 --- a/.github/workflows/kubernetes-compute-training-cli-jobs-single-step.yml +++ /dev/null @@ -1,93 +0,0 @@ -name: kubernetes-compute-training-cli-jobs-single-step -on: - schedule: - - cron: "0 19 * * *" - workflow_dispatch: - inputs: - TEST_REGION: - description: 'Resource Region' - required: false - default: 'eastus' - JOB_SELECTOR: - description: 'Job selector used with grep command to select job workflows' - required: false - default: '^cli-jobs-single-step.*yml$' - JOB_FILTER: - description: 'Job filter used with grep command to filter out job workflows' - required: false - default: 'spark|java|sweep|pytorch-cifar-distributed-job' - TIMEOUT: - description: 'Timeout of a single job' - required: false - default: '120m' - FILE_TICKET: - description: 'Whether to file icm ticket: true or false' - required: false - default: 'false' - WORKSPACE: - description: 'The workspace of the cluster' - required: false - default: 'amlarc-githubtest-ws' -jobs: - test: - runs-on: ubuntu-latest - env: - JOB_SELECTOR: '^cli-jobs-single-step.*yml$' - JOB_FILTER: 'spark|java|sweep|pytorch-cifar-distributed-job' - JOB_LIST_FILE: kubernetes-compute-training-cli-jobs-single-step.txt - TIMEOUT: '120m' - MIN_SUCCESS_NUM: 1 - WORKFLOW: https://github.com/Azure/azureml-examples/actions/workflows/kubernetes-compute-training-cli-jobs-single-step.yml - - steps: - - name: check out repo - uses: actions/checkout@v2 - - name: setup python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - name: setup environment - run: | - bash .github/kubernetes-compute/tool.sh set_default_env - if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then - echo "LOCATION=${{ github.event.inputs.TEST_REGION }}" | tee -a $GITHUB_ENV - echo "JOB_SELECTOR=${{ github.event.inputs.JOB_SELECTOR }}" | tee -a $GITHUB_ENV - echo "JOB_FILTER=${{ github.event.inputs.JOB_FILTER }}" | tee -a $GITHUB_ENV - echo "TIMEOUT=${{ github.event.inputs.TIMEOUT }}" | tee -a $GITHUB_ENV - echo "FILE_TICKET=${{ github.event.inputs.FILE_TICKET }}" | tee -a $GITHUB_ENV - echo "WORKSPACE=${{ github.event.inputs.WORKSPACE }}" | tee -a $GITHUB_ENV - fi - bash .github/kubernetes-compute/tool.sh install_tools - set -x +e - bash cli/setup.sh - az version - timeout-minutes: 30 - - name: azure login - uses: azure/login@v1 - with: - creds: ${{secrets.AZUREML_CREDENTIALS}} - timeout-minutes: 30 - - - name: collect jobs - run: | - bash .github/kubernetes-compute/tool.sh collect_jobs_from_workflows "$JOB_LIST_FILE" "$JOB_SELECTOR" "$JOB_FILTER" - timeout-minutes: 30 - - - name: run jobs - run: | - bash .github/kubernetes-compute/tool.sh run_jobs_from_file "$JOB_LIST_FILE" - timeout-minutes: 300 - - - name: check jobs - if: ${{ always() }} - run: | - bash .github/kubernetes-compute/tool.sh count_result - timeout-minutes: 30 - - - name: report_metrics - if: ${{ always() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - bash .github/kubernetes-compute/tool.sh report_metrics upload_test_result_metrics - fi - timeout-minutes: 120 diff --git a/.github/workflows/kubernetes-compute-workspace-setup.yml b/.github/workflows/kubernetes-compute-workspace-setup.yml deleted file mode 100644 index fb7b5db85b..0000000000 --- a/.github/workflows/kubernetes-compute-workspace-setup.yml +++ /dev/null @@ -1,262 +0,0 @@ -name: kubernetes-compute-workspace-setup -on: - schedule: - - cron: "0 0 * * 3" - workflow_dispatch: - inputs: - LOCATION: - description: 'Resource Region' - required: false - default: 'eastus' - WORKSPACE: - description: 'The workspace of the cluster' - required: false - default: 'amlarc-githubtest-ws' - -jobs: - test: - runs-on: ubuntu-latest - env: - SUBSCRIPTION: 6560575d-fa06-4e7d-95fb-f962e74efd7a - RESOURCE_GROUP: azureml-examples - LOCATION: eastus - WORKSPACE: amlarc-githubtest-ws - - steps: - - name: replace env from workflow_dispatch - if: ${{ github.event_name == 'workflow_dispatch' }} - run: | - echo "LOCATION=${{ github.event.inputs.LOCATION }}" | tee -a $GITHUB_ENV - echo "WORKSPACE=${{ github.event.inputs.WORKSPACE }}" | tee -a $GITHUB_ENV - - name: check out repo - uses: actions/checkout@v2 - - name: setup python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - name: install tools - run: bash .github/kubernetes-compute/tool.sh install_tools - timeout-minutes: 30 - - name: azure login - uses: azure/login@v1 - with: - creds: ${{secrets.AZUREML_CREDENTIALS}} - timeout-minutes: 30 - - # provision resources - - name: setup_workspace - if: ${{ always() }} - run: bash .github/kubernetes-compute/tool.sh setup_workspace - timeout-minutes: 30 - - - name: set configure - if: ${{ always() }} - run: | - az account set --subscription $SUBSCRIPTION - az configure --defaults group=$RESOURCE_GROUP workspace=$WORKSPACE location=$LOCATION - pip install azure-identity - pip install azure-ai-ml - timeout-minutes: 30 - continue-on-error: true - - - name: cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multilablel-task-fridge-items/cli-automl-image-classification-multilabel-task-fridge-items.yml - if: ${{ always() }} - run: | - python prepare_data.py --subscription $SUBSCRIPTION --group $RESOURCE_GROUP --workspace $WORKSPACE - working-directory: cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multilablel-task-fridge-items - timeout-minutes: 30 - continue-on-error: true - - - name: cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multilablel-task-fridge-items/cli-automl-image-classification-multilabel-task-fridge-items-automode.yml - if: ${{ always() }} - run: | - python prepare_data.py --subscription $SUBSCRIPTION --group $RESOURCE_GROUP --workspace $WORKSPACE - working-directory: cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multilablel-task-fridge-items - timeout-minutes: 60 - continue-on-error: true - - - name: cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multiclass-task-fridge-items/cli-automl-image-classification-multiclass-task-fridge-items.yml - if: ${{ always() }} - run: | - python prepare_data.py --subscription $SUBSCRIPTION --group $RESOURCE_GROUP --workspace $WORKSPACE - working-directory: cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multiclass-task-fridge-items - timeout-minutes: 30 - continue-on-error: true - - - name: cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multiclass-task-fridge-items/cli-automl-image-classification-multiclass-task-fridge-items-automode.yml - if: ${{ always() }} - run: | - python prepare_data.py --subscription $SUBSCRIPTION --group $RESOURCE_GROUP --workspace $WORKSPACE - working-directory: cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multiclass-task-fridge-items - timeout-minutes: 60 - continue-on-error: true - - - name: cli/jobs/automl-standalone-jobs/cli-automl-image-instance-segmentation-task-fridge-items/cli-automl-image-instance-segmentation-task-fridge-items.yml - if: ${{ always() }} - run: | - python prepare_data.py --subscription $SUBSCRIPTION --group $RESOURCE_GROUP --workspace $WORKSPACE - working-directory: cli/jobs/automl-standalone-jobs/cli-automl-image-instance-segmentation-task-fridge-items - timeout-minutes: 30 - continue-on-error: true - - - name: cli/jobs/automl-standalone-jobs/cli-automl-image-instance-segmentation-task-fridge-items/cli-automl-image-instance-segmentation-task-fridge-items-automode.yml - if: ${{ always() }} - run: | - python prepare_data.py --subscription $SUBSCRIPTION --group $RESOURCE_GROUP --workspace $WORKSPACE - working-directory: cli/jobs/automl-standalone-jobs/cli-automl-image-instance-segmentation-task-fridge-items - timeout-minutes: 60 - continue-on-error: true - - - name: cli/jobs/automl-standalone-jobs/cli-automl-image-object-detection-task-fridge-items/cli-automl-image-object-detection-task-fridge-items.yml - if: ${{ always() }} - run: | - python prepare_data.py --subscription $SUBSCRIPTION --group $RESOURCE_GROUP --workspace $WORKSPACE - working-directory: cli/jobs/automl-standalone-jobs/cli-automl-image-object-detection-task-fridge-items - timeout-minutes: 30 - continue-on-error: true - - - name: cli/jobs/automl-standalone-jobs/cli-automl-image-object-detection-task-fridge-items/cli-automl-image-object-detection-task-fridge-items-automode.yml - if: ${{ always() }} - run: | - python prepare_data.py --subscription $SUBSCRIPTION --group $RESOURCE_GROUP --workspace $WORKSPACE - working-directory: cli/jobs/automl-standalone-jobs/cli-automl-image-object-detection-task-fridge-items - timeout-minutes: 60 - continue-on-error: true - - - name: set sampledata dataset - if: ${{ always() }} - run: | - set -x - pip install azure.cli.core - pip install azureml-dataset-runtime - python -c ' - - import sys - from azureml.core.workspace import Workspace - from azureml.core import Dataset - from azureml.core.authentication import AzureCliAuthentication - - cli_auth = AzureCliAuthentication() - ws = Workspace.get(subscription_id=sys.argv[1], - resource_group=sys.argv[2], - name=sys.argv[3], - auth=cli_auth) - datastore = ws.datastores["workspaceblobstore"] - dataset = Dataset.File.from_files(path=[(datastore, "example-data")]) - dataset.register(ws, "sampledata") - - ' "$SUBSCRIPTION" "$RESOURCE_GROUP" "$WORKSPACE" - timeout-minutes: 300 - continue-on-error: true - - - name: create asset for cli/jobs/pipelines-with-components/basics/1b_e2e_registered_components - if: ${{ always() }} - run: | - set -x - az ml component create --file train.yml - az ml component create --file score.yml - az ml component create --file eval.yml - working-directory: cli/jobs/pipelines-with-components/basics/1b_e2e_registered_components - timeout-minutes: 30 - continue-on-error: true - - - - name: setup env for cli/jobs/pipelines-with-components/rai_pipeline_adult_analyse/ - if: ${{ always() }} - run: | - set -x - az ml environment create --file environment/responsibleai-environment.yaml - az ml data create --file data/data_adult_train.yaml - az ml data create --file data/data_adult_test.yaml - working-directory: cli/jobs/pipelines-with-components/rai_pipeline_adult_analyse/ - timeout-minutes: 30 - continue-on-error: true - - - name: Please manually setup uri_file for cli/jobs/pipelines-with-components/basics/4b_datastore_datapath_uri/pipeline.yml - if: ${{ always() }} - run: | - echo Please manually setup uri_file for cli/jobs/pipelines-with-components/basics/4b_datastore_datapath_uri/pipeline.yml - working-directory: cli/jobs/pipelines-with-components/basics/4b_datastore_datapath_uri - timeout-minutes: 30 - continue-on-error: true - - - name: setup_asset /cli/assets/data/local-folder.yml - if: ${{ always() }} - run: az ml data create -f local-folder.yml - working-directory: cli/assets/data - timeout-minutes: 300 - continue-on-error: true - - - name: setup_dataset - if: ${{ always() }} - run: bash create-datasets.sh - working-directory: setup-repo - timeout-minutes: 300 - continue-on-error: true - - - name: download azcopy - if: ${{ always() }} - run: | - set -x - wget https://azcopyvnext.azureedge.net/release20220511/azcopy_linux_amd64_10.15.0.tar.gz - tar zxf azcopy_linux_amd64_10.15.0.tar.gz - cp azcopy_linux_amd64_10.15.0/azcopy . - working-directory: setup-repo - timeout-minutes: 30 - continue-on-error: true - - - name: Please manually run copy-data.sh - if: ${{ always() }} - run: | - # bash copy-data.sh - echo 'Please manually run "bash copy-data.sh" in setup-repo directory' - working-directory: setup-repo - timeout-minutes: 300 - continue-on-error: true - - - name: Setup uri_folder - if: ${{ always() }} - run: | - echo 'Please manually setup local-folder-example uri_folder for cli/jobs/basics/hello-data-uri-folder.yml' - echo 'Please manually setup local-folder-example uri_folder for cli/jobs/pipelines-with-components/basics/4d_data_input/pipeline.yml' - cat << EOF > local_uri_floder.yml - \$schema: https://azuremlschemas.azureedge.net/latest/data.schema.json - name: local-folder-example - description: Dataset created from local folder. - type: uri_folder - path: cli/jobs/pipelines-with-components/basics/4d_data_input/data - EOF - - az ml data create --subscription $SUBSCRIPTION --resource-group $RESOURCE_GROUP --workspace $WORKSPACE -f local_uri_floder.yml - timeout-minutes: 300 - continue-on-error: true - - - name: Setup mltable - if: ${{ always() }} - run: | - echo 'Please manually setup local-folder-example mltable for cli/jobs/basics/hello-dataset.yml' - cat << EOF > local_mltable.yml - \$schema: https://azuremlschemas.azureedge.net/latest/data.schema.json - name: sampledata - description: Dataset created from local folder. - type: mltable - path: cli/jobs/basics/hello-automl - EOF - - # az ml data create --subscription $SUBSCRIPTION --resource-group $RESOURCE_GROUP --workspace $WORKSPACE -f local_mltable.yml - timeout-minutes: 300 - continue-on-error: true - - - name: Setup hello-world.txt - if: ${{ always() }} - run: | - echo 'Please manually setup hello-world.txt for cli/jobs/pipelines-with-components/basics/4b_datastore_datapath_uri/pipeline.yml' - # echo "hello world" > hello-world.txt - # azcopy cp hello-world.txt https://amlarcgistorage7a0860601.blob.core.windows.net/azureml-blobstore-68875c58-4a7d-46e4-bcb9-e17da409f580/azureml/ - - timeout-minutes: 300 - continue-on-error: true - - - - name: over - run: echo over! diff --git a/.github/workflows/python-sdk-tutorial-auto-ml-forecasting-demand-forecasting-many-models.yml b/.github/workflows/python-sdk-tutorial-auto-ml-forecasting-demand-forecasting-many-models.yml new file mode 100644 index 0000000000..a3cc46868e --- /dev/null +++ b/.github/workflows/python-sdk-tutorial-auto-ml-forecasting-demand-forecasting-many-models.yml @@ -0,0 +1,64 @@ +name: auto-ml-forecasting-demand-forecasting-many-models +# This file is generated by v1/python-sdk/tutorials/automl-with-azureml/generate_workflows.py +on: + workflow_dispatch: + schedule: + - cron: "0 11 * * 2" + pull_request: + branches: + - main + paths: + - v1/python-sdk/tutorials/automl-with-azureml/forecasting-demand-forecasting-many-models/** + - v1/python-sdk/tutorials/automl-with-azureml/automl_env_linux.yml + - .github/workflows/python-sdk-tutorial-auto-ml-forecasting-demand-forecasting-many-models.yml +jobs: + build: + runs-on: ${{vars.V1_UBUNTU_RUNNER}} + defaults: + run: + shell: bash -l {0} + strategy: + fail-fast: false + steps: + - name: check out repo + uses: actions/checkout@v2 + - name: setup python + uses: actions/setup-python@v2 + with: + python-version: "3.8" + - name: Run Install packages + run: | + chmod +x ./v1/scripts/install-packages.sh + ./v1/scripts/install-packages.sh + shell: bash + - name: create automl conda environment + uses: conda-incubator/setup-miniconda@v2 + with: + activate-environment: azure_automl + environment-file: v1/python-sdk/tutorials/automl-with-azureml/automl_env_linux.yml + auto-activate-base: false + - name: install papermill and set up the IPython kernel + run: | + pip install papermill==2.4.0 + python -m ipykernel install --user --name azure_automl --display-name "Python (azure_automl)" + pip list + - name: azure login + uses: azure/login@v1 + with: + creds: ${{secrets.AZUREML_CREDENTIALS}} + - name: Run update-azure-extensions + run: | + chmod +x ./v1/scripts/update-azure-extensions.sh + ./v1/scripts/update-azure-extensions.sh + shell: bash + - name: attach to workspace + run: az ml folder attach -w main -g azureml-examples + - name: run auto-ml-forecasting-demand-forecasting-many-models.ipynb + run: papermill -k python auto-ml-forecasting-demand-forecasting-many-models.ipynb auto-ml-forecasting-demand-forecasting-many-models.output.ipynb + working-directory: v1/python-sdk/tutorials/automl-with-azureml/forecasting-demand-forecasting-many-models + - name: upload notebook's working folder as an artifact + if: ${{ always() }} + uses: actions/upload-artifact@v2 + with: + name: auto-ml-forecasting-demand-forecasting-many-models + path: v1/python-sdk/tutorials/automl-with-azureml/forecasting-demand-forecasting-many-models diff --git a/.github/workflows/sdk-foundation-models-system-evaluation-fill-mask-fill-mask.yml b/.github/workflows/sdk-foundation-models-system-evaluation-fill-mask-fill-mask.yml deleted file mode 100644 index fefc37004f..0000000000 --- a/.github/workflows/sdk-foundation-models-system-evaluation-fill-mask-fill-mask.yml +++ /dev/null @@ -1,77 +0,0 @@ -# This code is autogenerated. -# Code is generated by running custom script: python3 readme.py -# Any manual changes to this file may cause incorrect behavior. -# Any manual changes will be overwritten if the code is regenerated. - -name: sdk-foundation-models-system-evaluation-fill-mask-fill-mask -# This file is created by sdk/python/readme.py. -# Please do not edit directly. -on: - workflow_dispatch: - schedule: - - cron: "34 11/12 * * *" - pull_request: - branches: - - main - paths: - - sdk/python/foundation-models/system/evaluation/fill-mask/** - - .github/workflows/sdk-foundation-models-system-evaluation-fill-mask-fill-mask.yml - - sdk/python/dev-requirements.txt - - infra/** - - sdk/python/setup.sh -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true -jobs: - build: - runs-on: ubuntu-latest - steps: - - name: check out repo - uses: actions/checkout@v2 - - name: setup python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - name: pip install notebook reqs - run: pip install -r sdk/python/dev-requirements.txt - - name: pip install mlflow reqs - run: pip install -r sdk/python/mlflow-requirements.txt - - name: azure login - uses: azure/login@v1 - with: - creds: ${{secrets.AZUREML_CREDENTIALS}} - - name: bootstrap resources - run: | - echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}'; - bash bootstrap.sh - working-directory: infra - continue-on-error: false - - name: setup SDK - run: | - source "${{ github.workspace }}/infra/sdk_helpers.sh"; - source "${{ github.workspace }}/infra/init_environment.sh"; - bash setup.sh - working-directory: sdk/python - continue-on-error: true - - name: setup-cli - run: | - source "${{ github.workspace }}/infra/sdk_helpers.sh"; - source "${{ github.workspace }}/infra/init_environment.sh"; - bash setup.sh - working-directory: cli - continue-on-error: true - - name: run foundation-models/system/evaluation/fill-mask/fill-mask.ipynb - run: | - source "${{ github.workspace }}/infra/sdk_helpers.sh"; - source "${{ github.workspace }}/infra/init_environment.sh"; - bash "${{ github.workspace }}/infra/sdk_helpers.sh" generate_workspace_config "../../.azureml/config.json"; - bash "${{ github.workspace }}/infra/sdk_helpers.sh" replace_template_values "fill-mask.ipynb"; - [ -f "../../.azureml/config" ] && cat "../../.azureml/config"; - papermill -k python fill-mask.ipynb fill-mask.output.ipynb - working-directory: sdk/python/foundation-models/system/evaluation/fill-mask - - name: upload notebook's working folder as an artifact - if: ${{ always() }} - uses: actions/upload-artifact@v2 - with: - name: fill-mask - path: sdk/python/foundation-models/system/evaluation/fill-mask diff --git a/.github/workflows/sdk-foundation-models-system-evaluation-question-answering-question-answering.yml b/.github/workflows/sdk-foundation-models-system-evaluation-question-answering-question-answering.yml deleted file mode 100644 index da046dae81..0000000000 --- a/.github/workflows/sdk-foundation-models-system-evaluation-question-answering-question-answering.yml +++ /dev/null @@ -1,77 +0,0 @@ -# This code is autogenerated. -# Code is generated by running custom script: python3 readme.py -# Any manual changes to this file may cause incorrect behavior. -# Any manual changes will be overwritten if the code is regenerated. - -name: sdk-foundation-models-system-evaluation-question-answering-question-answering -# This file is created by sdk/python/readme.py. -# Please do not edit directly. -on: - workflow_dispatch: - schedule: - - cron: "12 3/12 * * *" - pull_request: - branches: - - main - paths: - - sdk/python/foundation-models/system/evaluation/question-answering/** - - .github/workflows/sdk-foundation-models-system-evaluation-question-answering-question-answering.yml - - sdk/python/dev-requirements.txt - - infra/** - - sdk/python/setup.sh -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true -jobs: - build: - runs-on: ubuntu-latest - steps: - - name: check out repo - uses: actions/checkout@v2 - - name: setup python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - name: pip install notebook reqs - run: pip install -r sdk/python/dev-requirements.txt - - name: pip install mlflow reqs - run: pip install -r sdk/python/mlflow-requirements.txt - - name: azure login - uses: azure/login@v1 - with: - creds: ${{secrets.AZUREML_CREDENTIALS}} - - name: bootstrap resources - run: | - echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}'; - bash bootstrap.sh - working-directory: infra - continue-on-error: false - - name: setup SDK - run: | - source "${{ github.workspace }}/infra/sdk_helpers.sh"; - source "${{ github.workspace }}/infra/init_environment.sh"; - bash setup.sh - working-directory: sdk/python - continue-on-error: true - - name: setup-cli - run: | - source "${{ github.workspace }}/infra/sdk_helpers.sh"; - source "${{ github.workspace }}/infra/init_environment.sh"; - bash setup.sh - working-directory: cli - continue-on-error: true - - name: run foundation-models/system/evaluation/question-answering/question-answering.ipynb - run: | - source "${{ github.workspace }}/infra/sdk_helpers.sh"; - source "${{ github.workspace }}/infra/init_environment.sh"; - bash "${{ github.workspace }}/infra/sdk_helpers.sh" generate_workspace_config "../../.azureml/config.json"; - bash "${{ github.workspace }}/infra/sdk_helpers.sh" replace_template_values "question-answering.ipynb"; - [ -f "../../.azureml/config" ] && cat "../../.azureml/config"; - papermill -k python question-answering.ipynb question-answering.output.ipynb - working-directory: sdk/python/foundation-models/system/evaluation/question-answering - - name: upload notebook's working folder as an artifact - if: ${{ always() }} - uses: actions/upload-artifact@v2 - with: - name: question-answering - path: sdk/python/foundation-models/system/evaluation/question-answering diff --git a/.github/workflows/sdk-foundation-models-system-evaluation-summarization-abstractive-and-extractive-summarization.yml b/.github/workflows/sdk-foundation-models-system-evaluation-summarization-abstractive-and-extractive-summarization.yml deleted file mode 100644 index 9cfb152b4b..0000000000 --- a/.github/workflows/sdk-foundation-models-system-evaluation-summarization-abstractive-and-extractive-summarization.yml +++ /dev/null @@ -1,77 +0,0 @@ -# This code is autogenerated. -# Code is generated by running custom script: python3 readme.py -# Any manual changes to this file may cause incorrect behavior. -# Any manual changes will be overwritten if the code is regenerated. - -name: sdk-foundation-models-system-evaluation-summarization-abstractive-and-extractive-summarization -# This file is created by sdk/python/readme.py. -# Please do not edit directly. -on: - workflow_dispatch: - schedule: - - cron: "28 9/12 * * *" - pull_request: - branches: - - main - paths: - - sdk/python/foundation-models/system/evaluation/summarization/** - - .github/workflows/sdk-foundation-models-system-evaluation-summarization-abstractive-and-extractive-summarization.yml - - sdk/python/dev-requirements.txt - - infra/** - - sdk/python/setup.sh -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true -jobs: - build: - runs-on: ubuntu-latest - steps: - - name: check out repo - uses: actions/checkout@v2 - - name: setup python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - name: pip install notebook reqs - run: pip install -r sdk/python/dev-requirements.txt - - name: pip install mlflow reqs - run: pip install -r sdk/python/mlflow-requirements.txt - - name: azure login - uses: azure/login@v1 - with: - creds: ${{secrets.AZUREML_CREDENTIALS}} - - name: bootstrap resources - run: | - echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}'; - bash bootstrap.sh - working-directory: infra - continue-on-error: false - - name: setup SDK - run: | - source "${{ github.workspace }}/infra/sdk_helpers.sh"; - source "${{ github.workspace }}/infra/init_environment.sh"; - bash setup.sh - working-directory: sdk/python - continue-on-error: true - - name: setup-cli - run: | - source "${{ github.workspace }}/infra/sdk_helpers.sh"; - source "${{ github.workspace }}/infra/init_environment.sh"; - bash setup.sh - working-directory: cli - continue-on-error: true - - name: run foundation-models/system/evaluation/summarization/abstractive-and-extractive-summarization.ipynb - run: | - source "${{ github.workspace }}/infra/sdk_helpers.sh"; - source "${{ github.workspace }}/infra/init_environment.sh"; - bash "${{ github.workspace }}/infra/sdk_helpers.sh" generate_workspace_config "../../.azureml/config.json"; - bash "${{ github.workspace }}/infra/sdk_helpers.sh" replace_template_values "abstractive-and-extractive-summarization.ipynb"; - [ -f "../../.azureml/config" ] && cat "../../.azureml/config"; - papermill -k python abstractive-and-extractive-summarization.ipynb abstractive-and-extractive-summarization.output.ipynb - working-directory: sdk/python/foundation-models/system/evaluation/summarization - - name: upload notebook's working folder as an artifact - if: ${{ always() }} - uses: actions/upload-artifact@v2 - with: - name: abstractive-and-extractive-summarization - path: sdk/python/foundation-models/system/evaluation/summarization diff --git a/.github/workflows/sdk-foundation-models-system-evaluation-text-classification-entailment-contradiction.yml b/.github/workflows/sdk-foundation-models-system-evaluation-text-classification-entailment-contradiction.yml deleted file mode 100644 index b84c8324f2..0000000000 --- a/.github/workflows/sdk-foundation-models-system-evaluation-text-classification-entailment-contradiction.yml +++ /dev/null @@ -1,77 +0,0 @@ -# This code is autogenerated. -# Code is generated by running custom script: python3 readme.py -# Any manual changes to this file may cause incorrect behavior. -# Any manual changes will be overwritten if the code is regenerated. - -name: sdk-foundation-models-system-evaluation-text-classification-entailment-contradiction -# This file is created by sdk/python/readme.py. -# Please do not edit directly. -on: - workflow_dispatch: - schedule: - - cron: "56 5/12 * * *" - pull_request: - branches: - - main - paths: - - sdk/python/foundation-models/system/evaluation/text-classification/** - - .github/workflows/sdk-foundation-models-system-evaluation-text-classification-entailment-contradiction.yml - - sdk/python/dev-requirements.txt - - infra/** - - sdk/python/setup.sh -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true -jobs: - build: - runs-on: ubuntu-latest - steps: - - name: check out repo - uses: actions/checkout@v2 - - name: setup python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - name: pip install notebook reqs - run: pip install -r sdk/python/dev-requirements.txt - - name: pip install mlflow reqs - run: pip install -r sdk/python/mlflow-requirements.txt - - name: azure login - uses: azure/login@v1 - with: - creds: ${{secrets.AZUREML_CREDENTIALS}} - - name: bootstrap resources - run: | - echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}'; - bash bootstrap.sh - working-directory: infra - continue-on-error: false - - name: setup SDK - run: | - source "${{ github.workspace }}/infra/sdk_helpers.sh"; - source "${{ github.workspace }}/infra/init_environment.sh"; - bash setup.sh - working-directory: sdk/python - continue-on-error: true - - name: setup-cli - run: | - source "${{ github.workspace }}/infra/sdk_helpers.sh"; - source "${{ github.workspace }}/infra/init_environment.sh"; - bash setup.sh - working-directory: cli - continue-on-error: true - - name: run foundation-models/system/evaluation/text-classification/entailment-contradiction.ipynb - run: | - source "${{ github.workspace }}/infra/sdk_helpers.sh"; - source "${{ github.workspace }}/infra/init_environment.sh"; - bash "${{ github.workspace }}/infra/sdk_helpers.sh" generate_workspace_config "../../.azureml/config.json"; - bash "${{ github.workspace }}/infra/sdk_helpers.sh" replace_template_values "entailment-contradiction.ipynb"; - [ -f "../../.azureml/config" ] && cat "../../.azureml/config"; - papermill -k python entailment-contradiction.ipynb entailment-contradiction.output.ipynb - working-directory: sdk/python/foundation-models/system/evaluation/text-classification - - name: upload notebook's working folder as an artifact - if: ${{ always() }} - uses: actions/upload-artifact@v2 - with: - name: entailment-contradiction - path: sdk/python/foundation-models/system/evaluation/text-classification diff --git a/.github/workflows/sdk-foundation-models-system-evaluation-text-classification-sentiment-analysis.yml b/.github/workflows/sdk-foundation-models-system-evaluation-text-classification-sentiment-analysis.yml deleted file mode 100644 index e01c8ff96f..0000000000 --- a/.github/workflows/sdk-foundation-models-system-evaluation-text-classification-sentiment-analysis.yml +++ /dev/null @@ -1,77 +0,0 @@ -# This code is autogenerated. -# Code is generated by running custom script: python3 readme.py -# Any manual changes to this file may cause incorrect behavior. -# Any manual changes will be overwritten if the code is regenerated. - -name: sdk-foundation-models-system-evaluation-text-classification-sentiment-analysis -# This file is created by sdk/python/readme.py. -# Please do not edit directly. -on: - workflow_dispatch: - schedule: - - cron: "20 7/12 * * *" - pull_request: - branches: - - main - paths: - - sdk/python/foundation-models/system/evaluation/text-classification/** - - .github/workflows/sdk-foundation-models-system-evaluation-text-classification-sentiment-analysis.yml - - sdk/python/dev-requirements.txt - - infra/** - - sdk/python/setup.sh -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true -jobs: - build: - runs-on: ubuntu-latest - steps: - - name: check out repo - uses: actions/checkout@v2 - - name: setup python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - name: pip install notebook reqs - run: pip install -r sdk/python/dev-requirements.txt - - name: pip install mlflow reqs - run: pip install -r sdk/python/mlflow-requirements.txt - - name: azure login - uses: azure/login@v1 - with: - creds: ${{secrets.AZUREML_CREDENTIALS}} - - name: bootstrap resources - run: | - echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}'; - bash bootstrap.sh - working-directory: infra - continue-on-error: false - - name: setup SDK - run: | - source "${{ github.workspace }}/infra/sdk_helpers.sh"; - source "${{ github.workspace }}/infra/init_environment.sh"; - bash setup.sh - working-directory: sdk/python - continue-on-error: true - - name: setup-cli - run: | - source "${{ github.workspace }}/infra/sdk_helpers.sh"; - source "${{ github.workspace }}/infra/init_environment.sh"; - bash setup.sh - working-directory: cli - continue-on-error: true - - name: run foundation-models/system/evaluation/text-classification/sentiment-analysis.ipynb - run: | - source "${{ github.workspace }}/infra/sdk_helpers.sh"; - source "${{ github.workspace }}/infra/init_environment.sh"; - bash "${{ github.workspace }}/infra/sdk_helpers.sh" generate_workspace_config "../../.azureml/config.json"; - bash "${{ github.workspace }}/infra/sdk_helpers.sh" replace_template_values "sentiment-analysis.ipynb"; - [ -f "../../.azureml/config" ] && cat "../../.azureml/config"; - papermill -k python sentiment-analysis.ipynb sentiment-analysis.output.ipynb - working-directory: sdk/python/foundation-models/system/evaluation/text-classification - - name: upload notebook's working folder as an artifact - if: ${{ always() }} - uses: actions/upload-artifact@v2 - with: - name: sentiment-analysis - path: sdk/python/foundation-models/system/evaluation/text-classification diff --git a/.github/workflows/sdk-foundation-models-system-evaluation-text-generation-text-generation.yml b/.github/workflows/sdk-foundation-models-system-evaluation-text-generation-text-generation.yml deleted file mode 100644 index 5b964e14a4..0000000000 --- a/.github/workflows/sdk-foundation-models-system-evaluation-text-generation-text-generation.yml +++ /dev/null @@ -1,77 +0,0 @@ -# This code is autogenerated. -# Code is generated by running custom script: python3 readme.py -# Any manual changes to this file may cause incorrect behavior. -# Any manual changes will be overwritten if the code is regenerated. - -name: sdk-foundation-models-system-evaluation-text-generation-text-generation -# This file is created by sdk/python/readme.py. -# Please do not edit directly. -on: - workflow_dispatch: - schedule: - - cron: "57 9/12 * * *" - pull_request: - branches: - - main - paths: - - sdk/python/foundation-models/system/evaluation/text-generation/** - - .github/workflows/sdk-foundation-models-system-evaluation-text-generation-text-generation.yml - - sdk/python/dev-requirements.txt - - infra/** - - sdk/python/setup.sh -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true -jobs: - build: - runs-on: ubuntu-latest - steps: - - name: check out repo - uses: actions/checkout@v2 - - name: setup python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - name: pip install notebook reqs - run: pip install -r sdk/python/dev-requirements.txt - - name: pip install mlflow reqs - run: pip install -r sdk/python/mlflow-requirements.txt - - name: azure login - uses: azure/login@v1 - with: - creds: ${{secrets.AZUREML_CREDENTIALS}} - - name: bootstrap resources - run: | - echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}'; - bash bootstrap.sh - working-directory: infra - continue-on-error: false - - name: setup SDK - run: | - source "${{ github.workspace }}/infra/sdk_helpers.sh"; - source "${{ github.workspace }}/infra/init_environment.sh"; - bash setup.sh - working-directory: sdk/python - continue-on-error: true - - name: setup-cli - run: | - source "${{ github.workspace }}/infra/sdk_helpers.sh"; - source "${{ github.workspace }}/infra/init_environment.sh"; - bash setup.sh - working-directory: cli - continue-on-error: true - - name: run foundation-models/system/evaluation/text-generation/text-generation.ipynb - run: | - source "${{ github.workspace }}/infra/sdk_helpers.sh"; - source "${{ github.workspace }}/infra/init_environment.sh"; - bash "${{ github.workspace }}/infra/sdk_helpers.sh" generate_workspace_config "../../.azureml/config.json"; - bash "${{ github.workspace }}/infra/sdk_helpers.sh" replace_template_values "text-generation.ipynb"; - [ -f "../../.azureml/config" ] && cat "../../.azureml/config"; - papermill -k python text-generation.ipynb text-generation.output.ipynb - working-directory: sdk/python/foundation-models/system/evaluation/text-generation - - name: upload notebook's working folder as an artifact - if: ${{ always() }} - uses: actions/upload-artifact@v2 - with: - name: text-generation - path: sdk/python/foundation-models/system/evaluation/text-generation diff --git a/.github/workflows/sdk-foundation-models-system-evaluation-token-classification-news-articles-entity-recognition.yml b/.github/workflows/sdk-foundation-models-system-evaluation-token-classification-news-articles-entity-recognition.yml deleted file mode 100644 index 23479dfa24..0000000000 --- a/.github/workflows/sdk-foundation-models-system-evaluation-token-classification-news-articles-entity-recognition.yml +++ /dev/null @@ -1,77 +0,0 @@ -# This code is autogenerated. -# Code is generated by running custom script: python3 readme.py -# Any manual changes to this file may cause incorrect behavior. -# Any manual changes will be overwritten if the code is regenerated. - -name: sdk-foundation-models-system-evaluation-token-classification-news-articles-entity-recognition -# This file is created by sdk/python/readme.py. -# Please do not edit directly. -on: - workflow_dispatch: - schedule: - - cron: "8 2/12 * * *" - pull_request: - branches: - - main - paths: - - sdk/python/foundation-models/system/evaluation/token-classification/** - - .github/workflows/sdk-foundation-models-system-evaluation-token-classification-news-articles-entity-recognition.yml - - sdk/python/dev-requirements.txt - - infra/** - - sdk/python/setup.sh -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true -jobs: - build: - runs-on: ubuntu-latest - steps: - - name: check out repo - uses: actions/checkout@v2 - - name: setup python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - name: pip install notebook reqs - run: pip install -r sdk/python/dev-requirements.txt - - name: pip install mlflow reqs - run: pip install -r sdk/python/mlflow-requirements.txt - - name: azure login - uses: azure/login@v1 - with: - creds: ${{secrets.AZUREML_CREDENTIALS}} - - name: bootstrap resources - run: | - echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}'; - bash bootstrap.sh - working-directory: infra - continue-on-error: false - - name: setup SDK - run: | - source "${{ github.workspace }}/infra/sdk_helpers.sh"; - source "${{ github.workspace }}/infra/init_environment.sh"; - bash setup.sh - working-directory: sdk/python - continue-on-error: true - - name: setup-cli - run: | - source "${{ github.workspace }}/infra/sdk_helpers.sh"; - source "${{ github.workspace }}/infra/init_environment.sh"; - bash setup.sh - working-directory: cli - continue-on-error: true - - name: run foundation-models/system/evaluation/token-classification/news-articles-entity-recognition.ipynb - run: | - source "${{ github.workspace }}/infra/sdk_helpers.sh"; - source "${{ github.workspace }}/infra/init_environment.sh"; - bash "${{ github.workspace }}/infra/sdk_helpers.sh" generate_workspace_config "../../.azureml/config.json"; - bash "${{ github.workspace }}/infra/sdk_helpers.sh" replace_template_values "news-articles-entity-recognition.ipynb"; - [ -f "../../.azureml/config" ] && cat "../../.azureml/config"; - papermill -k python news-articles-entity-recognition.ipynb news-articles-entity-recognition.output.ipynb - working-directory: sdk/python/foundation-models/system/evaluation/token-classification - - name: upload notebook's working folder as an artifact - if: ${{ always() }} - uses: actions/upload-artifact@v2 - with: - name: news-articles-entity-recognition - path: sdk/python/foundation-models/system/evaluation/token-classification diff --git a/.github/workflows/sdk-foundation-models-system-evaluation-translation-translation-romanian-to-english.yml b/.github/workflows/sdk-foundation-models-system-evaluation-translation-translation-romanian-to-english.yml deleted file mode 100644 index 4cf2f733af..0000000000 --- a/.github/workflows/sdk-foundation-models-system-evaluation-translation-translation-romanian-to-english.yml +++ /dev/null @@ -1,77 +0,0 @@ -# This code is autogenerated. -# Code is generated by running custom script: python3 readme.py -# Any manual changes to this file may cause incorrect behavior. -# Any manual changes will be overwritten if the code is regenerated. - -name: sdk-foundation-models-system-evaluation-translation-translation-romanian-to-english -# This file is created by sdk/python/readme.py. -# Please do not edit directly. -on: - workflow_dispatch: - schedule: - - cron: "28 3/12 * * *" - pull_request: - branches: - - main - paths: - - sdk/python/foundation-models/system/evaluation/translation/** - - .github/workflows/sdk-foundation-models-system-evaluation-translation-translation-romanian-to-english.yml - - sdk/python/dev-requirements.txt - - infra/** - - sdk/python/setup.sh -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true -jobs: - build: - runs-on: ubuntu-latest - steps: - - name: check out repo - uses: actions/checkout@v2 - - name: setup python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - name: pip install notebook reqs - run: pip install -r sdk/python/dev-requirements.txt - - name: pip install mlflow reqs - run: pip install -r sdk/python/mlflow-requirements.txt - - name: azure login - uses: azure/login@v1 - with: - creds: ${{secrets.AZUREML_CREDENTIALS}} - - name: bootstrap resources - run: | - echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}'; - bash bootstrap.sh - working-directory: infra - continue-on-error: false - - name: setup SDK - run: | - source "${{ github.workspace }}/infra/sdk_helpers.sh"; - source "${{ github.workspace }}/infra/init_environment.sh"; - bash setup.sh - working-directory: sdk/python - continue-on-error: true - - name: setup-cli - run: | - source "${{ github.workspace }}/infra/sdk_helpers.sh"; - source "${{ github.workspace }}/infra/init_environment.sh"; - bash setup.sh - working-directory: cli - continue-on-error: true - - name: run foundation-models/system/evaluation/translation/translation-romanian-to-english.ipynb - run: | - source "${{ github.workspace }}/infra/sdk_helpers.sh"; - source "${{ github.workspace }}/infra/init_environment.sh"; - bash "${{ github.workspace }}/infra/sdk_helpers.sh" generate_workspace_config "../../.azureml/config.json"; - bash "${{ github.workspace }}/infra/sdk_helpers.sh" replace_template_values "translation-romanian-to-english.ipynb"; - [ -f "../../.azureml/config" ] && cat "../../.azureml/config"; - papermill -k python translation-romanian-to-english.ipynb translation-romanian-to-english.output.ipynb - working-directory: sdk/python/foundation-models/system/evaluation/translation - - name: upload notebook's working folder as an artifact - if: ${{ always() }} - uses: actions/upload-artifact@v2 - with: - name: translation-romanian-to-english - path: sdk/python/foundation-models/system/evaluation/translation diff --git a/.github/workflows/sdk-foundation-models-system-finetune-question-answering-extractive-qa.yml b/.github/workflows/sdk-foundation-models-system-finetune-question-answering-extractive-qa.yml deleted file mode 100644 index 3735776e07..0000000000 --- a/.github/workflows/sdk-foundation-models-system-finetune-question-answering-extractive-qa.yml +++ /dev/null @@ -1,77 +0,0 @@ -# This code is autogenerated. -# Code is generated by running custom script: python3 readme.py -# Any manual changes to this file may cause incorrect behavior. -# Any manual changes will be overwritten if the code is regenerated. - -name: sdk-foundation-models-system-finetune-question-answering-extractive-qa -# This file is created by sdk/python/readme.py. -# Please do not edit directly. -on: - workflow_dispatch: - schedule: - - cron: "0 0 * * *" - pull_request: - branches: - - main - paths: - - sdk/python/foundation-models/system/finetune/question-answering/** - - .github/workflows/sdk-foundation-models-system-finetune-question-answering-extractive-qa.yml - - sdk/python/dev-requirements.txt - - infra/** - - sdk/python/setup.sh -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true -jobs: - build: - runs-on: ubuntu-latest - steps: - - name: check out repo - uses: actions/checkout@v2 - - name: setup python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - name: pip install notebook reqs - run: pip install -r sdk/python/dev-requirements.txt - - name: pip install mlflow reqs - run: pip install -r sdk/python/mlflow-requirements.txt - - name: azure login - uses: azure/login@v1 - with: - creds: ${{secrets.AZUREML_CREDENTIALS}} - - name: bootstrap resources - run: | - echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}'; - bash bootstrap.sh - working-directory: infra - continue-on-error: false - - name: setup SDK - run: | - source "${{ github.workspace }}/infra/sdk_helpers.sh"; - source "${{ github.workspace }}/infra/init_environment.sh"; - bash setup.sh - working-directory: sdk/python - continue-on-error: true - - name: setup-cli - run: | - source "${{ github.workspace }}/infra/sdk_helpers.sh"; - source "${{ github.workspace }}/infra/init_environment.sh"; - bash setup.sh - working-directory: cli - continue-on-error: true - - name: run foundation-models/system/finetune/question-answering/extractive-qa.ipynb - run: | - source "${{ github.workspace }}/infra/sdk_helpers.sh"; - source "${{ github.workspace }}/infra/init_environment.sh"; - bash "${{ github.workspace }}/infra/sdk_helpers.sh" generate_workspace_config "../../.azureml/config.json"; - bash "${{ github.workspace }}/infra/sdk_helpers.sh" replace_template_values "extractive-qa.ipynb"; - [ -f "../../.azureml/config" ] && cat "../../.azureml/config"; - papermill -k python extractive-qa.ipynb extractive-qa.output.ipynb - working-directory: sdk/python/foundation-models/system/finetune/question-answering - - name: upload notebook's working folder as an artifact - if: ${{ always() }} - uses: actions/upload-artifact@v2 - with: - name: extractive-qa - path: sdk/python/foundation-models/system/finetune/question-answering diff --git a/.github/workflows/sdk-foundation-models-system-finetune-summarization-news-summary.yml b/.github/workflows/sdk-foundation-models-system-finetune-summarization-news-summary.yml deleted file mode 100644 index de55d05410..0000000000 --- a/.github/workflows/sdk-foundation-models-system-finetune-summarization-news-summary.yml +++ /dev/null @@ -1,77 +0,0 @@ -# This code is autogenerated. -# Code is generated by running custom script: python3 readme.py -# Any manual changes to this file may cause incorrect behavior. -# Any manual changes will be overwritten if the code is regenerated. - -name: sdk-foundation-models-system-finetune-summarization-news-summary -# This file is created by sdk/python/readme.py. -# Please do not edit directly. -on: - workflow_dispatch: - schedule: - - cron: "0 0 * * *" - pull_request: - branches: - - main - paths: - - sdk/python/foundation-models/system/finetune/summarization/** - - .github/workflows/sdk-foundation-models-system-finetune-summarization-news-summary.yml - - sdk/python/dev-requirements.txt - - infra/** - - sdk/python/setup.sh -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true -jobs: - build: - runs-on: ubuntu-latest - steps: - - name: check out repo - uses: actions/checkout@v2 - - name: setup python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - name: pip install notebook reqs - run: pip install -r sdk/python/dev-requirements.txt - - name: pip install mlflow reqs - run: pip install -r sdk/python/mlflow-requirements.txt - - name: azure login - uses: azure/login@v1 - with: - creds: ${{secrets.AZUREML_CREDENTIALS}} - - name: bootstrap resources - run: | - echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}'; - bash bootstrap.sh - working-directory: infra - continue-on-error: false - - name: setup SDK - run: | - source "${{ github.workspace }}/infra/sdk_helpers.sh"; - source "${{ github.workspace }}/infra/init_environment.sh"; - bash setup.sh - working-directory: sdk/python - continue-on-error: true - - name: setup-cli - run: | - source "${{ github.workspace }}/infra/sdk_helpers.sh"; - source "${{ github.workspace }}/infra/init_environment.sh"; - bash setup.sh - working-directory: cli - continue-on-error: true - - name: run foundation-models/system/finetune/summarization/news-summary.ipynb - run: | - source "${{ github.workspace }}/infra/sdk_helpers.sh"; - source "${{ github.workspace }}/infra/init_environment.sh"; - bash "${{ github.workspace }}/infra/sdk_helpers.sh" generate_workspace_config "../../.azureml/config.json"; - bash "${{ github.workspace }}/infra/sdk_helpers.sh" replace_template_values "news-summary.ipynb"; - [ -f "../../.azureml/config" ] && cat "../../.azureml/config"; - papermill -k python news-summary.ipynb news-summary.output.ipynb - working-directory: sdk/python/foundation-models/system/finetune/summarization - - name: upload notebook's working folder as an artifact - if: ${{ always() }} - uses: actions/upload-artifact@v2 - with: - name: news-summary - path: sdk/python/foundation-models/system/finetune/summarization diff --git a/.github/workflows/sdk-foundation-models-system-finetune-text-classification-emotion-detection.yml b/.github/workflows/sdk-foundation-models-system-finetune-text-classification-emotion-detection.yml deleted file mode 100644 index 93afea236f..0000000000 --- a/.github/workflows/sdk-foundation-models-system-finetune-text-classification-emotion-detection.yml +++ /dev/null @@ -1,77 +0,0 @@ -# This code is autogenerated. -# Code is generated by running custom script: python3 readme.py -# Any manual changes to this file may cause incorrect behavior. -# Any manual changes will be overwritten if the code is regenerated. - -name: sdk-foundation-models-system-finetune-text-classification-emotion-detection -# This file is created by sdk/python/readme.py. -# Please do not edit directly. -on: - workflow_dispatch: - schedule: - - cron: "0 0 * * *" - pull_request: - branches: - - main - paths: - - sdk/python/foundation-models/system/finetune/text-classification/** - - .github/workflows/sdk-foundation-models-system-finetune-text-classification-emotion-detection.yml - - sdk/python/dev-requirements.txt - - infra/** - - sdk/python/setup.sh -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true -jobs: - build: - runs-on: ubuntu-latest - steps: - - name: check out repo - uses: actions/checkout@v2 - - name: setup python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - name: pip install notebook reqs - run: pip install -r sdk/python/dev-requirements.txt - - name: pip install mlflow reqs - run: pip install -r sdk/python/mlflow-requirements.txt - - name: azure login - uses: azure/login@v1 - with: - creds: ${{secrets.AZUREML_CREDENTIALS}} - - name: bootstrap resources - run: | - echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}'; - bash bootstrap.sh - working-directory: infra - continue-on-error: false - - name: setup SDK - run: | - source "${{ github.workspace }}/infra/sdk_helpers.sh"; - source "${{ github.workspace }}/infra/init_environment.sh"; - bash setup.sh - working-directory: sdk/python - continue-on-error: true - - name: setup-cli - run: | - source "${{ github.workspace }}/infra/sdk_helpers.sh"; - source "${{ github.workspace }}/infra/init_environment.sh"; - bash setup.sh - working-directory: cli - continue-on-error: true - - name: run foundation-models/system/finetune/text-classification/emotion-detection.ipynb - run: | - source "${{ github.workspace }}/infra/sdk_helpers.sh"; - source "${{ github.workspace }}/infra/init_environment.sh"; - bash "${{ github.workspace }}/infra/sdk_helpers.sh" generate_workspace_config "../../.azureml/config.json"; - bash "${{ github.workspace }}/infra/sdk_helpers.sh" replace_template_values "emotion-detection.ipynb"; - [ -f "../../.azureml/config" ] && cat "../../.azureml/config"; - papermill -k python emotion-detection.ipynb emotion-detection.output.ipynb - working-directory: sdk/python/foundation-models/system/finetune/text-classification - - name: upload notebook's working folder as an artifact - if: ${{ always() }} - uses: actions/upload-artifact@v2 - with: - name: emotion-detection - path: sdk/python/foundation-models/system/finetune/text-classification diff --git a/.github/workflows/sdk-foundation-models-system-finetune-token-classification-token-classification.yml b/.github/workflows/sdk-foundation-models-system-finetune-token-classification-token-classification.yml deleted file mode 100644 index 5330b7a783..0000000000 --- a/.github/workflows/sdk-foundation-models-system-finetune-token-classification-token-classification.yml +++ /dev/null @@ -1,77 +0,0 @@ -# This code is autogenerated. -# Code is generated by running custom script: python3 readme.py -# Any manual changes to this file may cause incorrect behavior. -# Any manual changes will be overwritten if the code is regenerated. - -name: sdk-foundation-models-system-finetune-token-classification-token-classification -# This file is created by sdk/python/readme.py. -# Please do not edit directly. -on: - workflow_dispatch: - schedule: - - cron: "0 0 * * *" - pull_request: - branches: - - main - paths: - - sdk/python/foundation-models/system/finetune/token-classification/** - - .github/workflows/sdk-foundation-models-system-finetune-token-classification-token-classification.yml - - sdk/python/dev-requirements.txt - - infra/** - - sdk/python/setup.sh -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true -jobs: - build: - runs-on: ubuntu-latest - steps: - - name: check out repo - uses: actions/checkout@v2 - - name: setup python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - name: pip install notebook reqs - run: pip install -r sdk/python/dev-requirements.txt - - name: pip install mlflow reqs - run: pip install -r sdk/python/mlflow-requirements.txt - - name: azure login - uses: azure/login@v1 - with: - creds: ${{secrets.AZUREML_CREDENTIALS}} - - name: bootstrap resources - run: | - echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}'; - bash bootstrap.sh - working-directory: infra - continue-on-error: false - - name: setup SDK - run: | - source "${{ github.workspace }}/infra/sdk_helpers.sh"; - source "${{ github.workspace }}/infra/init_environment.sh"; - bash setup.sh - working-directory: sdk/python - continue-on-error: true - - name: setup-cli - run: | - source "${{ github.workspace }}/infra/sdk_helpers.sh"; - source "${{ github.workspace }}/infra/init_environment.sh"; - bash setup.sh - working-directory: cli - continue-on-error: true - - name: run foundation-models/system/finetune/token-classification/token-classification.ipynb - run: | - source "${{ github.workspace }}/infra/sdk_helpers.sh"; - source "${{ github.workspace }}/infra/init_environment.sh"; - bash "${{ github.workspace }}/infra/sdk_helpers.sh" generate_workspace_config "../../.azureml/config.json"; - bash "${{ github.workspace }}/infra/sdk_helpers.sh" replace_template_values "token-classification.ipynb"; - [ -f "../../.azureml/config" ] && cat "../../.azureml/config"; - papermill -k python token-classification.ipynb token-classification.output.ipynb - working-directory: sdk/python/foundation-models/system/finetune/token-classification - - name: upload notebook's working folder as an artifact - if: ${{ always() }} - uses: actions/upload-artifact@v2 - with: - name: token-classification - path: sdk/python/foundation-models/system/finetune/token-classification diff --git a/.github/workflows/sdk-foundation-models-system-finetune-translation-translation.yml b/.github/workflows/sdk-foundation-models-system-finetune-translation-translation.yml deleted file mode 100644 index 5347d7be3a..0000000000 --- a/.github/workflows/sdk-foundation-models-system-finetune-translation-translation.yml +++ /dev/null @@ -1,77 +0,0 @@ -# This code is autogenerated. -# Code is generated by running custom script: python3 readme.py -# Any manual changes to this file may cause incorrect behavior. -# Any manual changes will be overwritten if the code is regenerated. - -name: sdk-foundation-models-system-finetune-translation-translation -# This file is created by sdk/python/readme.py. -# Please do not edit directly. -on: - workflow_dispatch: - schedule: - - cron: "0 0 * * *" - pull_request: - branches: - - main - paths: - - sdk/python/foundation-models/system/finetune/translation/** - - .github/workflows/sdk-foundation-models-system-finetune-translation-translation.yml - - sdk/python/dev-requirements.txt - - infra/** - - sdk/python/setup.sh -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true -jobs: - build: - runs-on: ubuntu-latest - steps: - - name: check out repo - uses: actions/checkout@v2 - - name: setup python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - name: pip install notebook reqs - run: pip install -r sdk/python/dev-requirements.txt - - name: pip install mlflow reqs - run: pip install -r sdk/python/mlflow-requirements.txt - - name: azure login - uses: azure/login@v1 - with: - creds: ${{secrets.AZUREML_CREDENTIALS}} - - name: bootstrap resources - run: | - echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}'; - bash bootstrap.sh - working-directory: infra - continue-on-error: false - - name: setup SDK - run: | - source "${{ github.workspace }}/infra/sdk_helpers.sh"; - source "${{ github.workspace }}/infra/init_environment.sh"; - bash setup.sh - working-directory: sdk/python - continue-on-error: true - - name: setup-cli - run: | - source "${{ github.workspace }}/infra/sdk_helpers.sh"; - source "${{ github.workspace }}/infra/init_environment.sh"; - bash setup.sh - working-directory: cli - continue-on-error: true - - name: run foundation-models/system/finetune/translation/translation.ipynb - run: | - source "${{ github.workspace }}/infra/sdk_helpers.sh"; - source "${{ github.workspace }}/infra/init_environment.sh"; - bash "${{ github.workspace }}/infra/sdk_helpers.sh" generate_workspace_config "../../.azureml/config.json"; - bash "${{ github.workspace }}/infra/sdk_helpers.sh" replace_template_values "translation.ipynb"; - [ -f "../../.azureml/config" ] && cat "../../.azureml/config"; - papermill -k python translation.ipynb translation.output.ipynb - working-directory: sdk/python/foundation-models/system/finetune/translation - - name: upload notebook's working folder as an artifact - if: ${{ always() }} - uses: actions/upload-artifact@v2 - with: - name: translation - path: sdk/python/foundation-models/system/finetune/translation diff --git a/best-practices/largescale-deep-learning/Training/Bert-Pretrain/AML-DeepSpeed-submit.yml b/best-practices/largescale-deep-learning/Training/Bert-Pretrain/AML-DeepSpeed-submit.yml index 22581a0379..511a350e5a 100644 --- a/best-practices/largescale-deep-learning/Training/Bert-Pretrain/AML-DeepSpeed-submit.yml +++ b/best-practices/largescale-deep-learning/Training/Bert-Pretrain/AML-DeepSpeed-submit.yml @@ -24,15 +24,15 @@ resources: shm_size: 3100m services: my_jupyterlab: - job_service_type: jupyter_lab + type: jupyter_lab nodes: all my_tensorboard: - job_service_type: tensor_board + type: tensor_board properties: logDir: "outputs/runs/" nodes: all my_vscode: - job_service_type: vs_code + type: vs_code nodes: all \ No newline at end of file diff --git a/best-practices/largescale-deep-learning/Training/Bert-Pretrain/AML-submit.yml b/best-practices/largescale-deep-learning/Training/Bert-Pretrain/AML-submit.yml index 2cc92c7a86..41581df17a 100644 --- a/best-practices/largescale-deep-learning/Training/Bert-Pretrain/AML-submit.yml +++ b/best-practices/largescale-deep-learning/Training/Bert-Pretrain/AML-submit.yml @@ -20,13 +20,13 @@ resources: instance_count: 2 services: my_jupyterlab: - job_service_type: jupyter_lab + type: jupyter_lab nodes: all my_tensorboard: - job_service_type: tensor_board + type: tensor_board properties: logDir: "outputs/runs/" nodes: all my_vscode: - job_service_type: vs_code + type: vs_code nodes: all \ No newline at end of file diff --git a/best-practices/largescale-deep-learning/Training/README.md b/best-practices/largescale-deep-learning/Training/README.md index 227d9b730f..5629d26b8e 100644 --- a/best-practices/largescale-deep-learning/Training/README.md +++ b/best-practices/largescale-deep-learning/Training/README.md @@ -214,14 +214,14 @@ To achive the best possible performance and resource utilization of jobs on Azur ``` services: my_jupyterlab: - job_service_type: jupyter_lab + type: jupyter_lab nodes: all my_tensorboard: - job_service_type: tensor_board + type: tensor_board log_dir: "outputs/runs/" #default is working directory on job container nodes: all my_vscode: - job_service_type: vs_code + type: vs_code nodes: all ``` diff --git a/cli/foundation-models/system/evaluation/fill-mask/fill-mask-pipeline.yml b/cli/foundation-models/system/evaluation/fill-mask/fill-mask-pipeline.yml deleted file mode 100644 index b9c73a605c..0000000000 --- a/cli/foundation-models/system/evaluation/fill-mask/fill-mask-pipeline.yml +++ /dev/null @@ -1,20 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json -type: pipeline - -experiment_name: fill-mask-model-evaluation-subgraph - -compute: gpu-cluster-big - -jobs: - pipeline_component_job: - type: pipeline - component: azureml://registries/azureml-preview/components/model_evaluation_pipeline/labels/latest - inputs: - task: fill-mask - test_data: - path: "../../../../../sdk/python/foundation-models/system/evaluation/fill-mask/fill-mask/small-test.jsonl" - type: uri_file - mlflow_model: - path: azureml://registries/azureml-preview/models/bert-based-uncased/versions/3 - label_column_name: title - device: gpu diff --git a/cli/foundation-models/system/evaluation/question-answering/extractive-qa-pipeline.yml b/cli/foundation-models/system/evaluation/question-answering/extractive-qa-pipeline.yml deleted file mode 100644 index ee7d8b4931..0000000000 --- a/cli/foundation-models/system/evaluation/question-answering/extractive-qa-pipeline.yml +++ /dev/null @@ -1,20 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json -type: pipeline - -experiment_name: question-answering-model-evaluation-subgraph - -compute: gpu-cluster-big - -jobs: - pipeline_component_job: - type: pipeline - component: azureml://registries/azureml-preview/components/model_evaluation_pipeline/labels/latest - inputs: - task: question-answering - test_data: - path: "../../../../../sdk/python/foundation-models/system/evaluation/question-answering/squad-v2/small-test.jsonl" - type: uri_file - mlflow_model: - path: azureml://registries/azureml-preview/models/distilbert-base-uncased-distilled-squad/versions/3 - label_column_name: answer_text - device: gpu diff --git a/cli/foundation-models/system/evaluation/summarization/abstractive-and-extractive-summarization-pipeline.yml b/cli/foundation-models/system/evaluation/summarization/abstractive-and-extractive-summarization-pipeline.yml deleted file mode 100644 index e17aba156d..0000000000 --- a/cli/foundation-models/system/evaluation/summarization/abstractive-and-extractive-summarization-pipeline.yml +++ /dev/null @@ -1,20 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json -type: pipeline - -experiment_name: text-summarization-model-evaluation-subgraph - -compute: gpu-cluster-big - -jobs: - pipeline_component_job: - type: pipeline - component: azureml://registries/azureml-preview/components/model_evaluation_pipeline/labels/latest - inputs: - task: text-summarization - test_data: - path: "../../../../../sdk/python/foundation-models/system/evaluation/summarization/cnn_dailymail/small-test.jsonl" - type: uri_file - mlflow_model: - path: azureml://registries/azureml-preview/models/sshleifer-distilbart-cnn-12-6/versions/3 - label_column_name: summary - device: gpu diff --git a/cli/foundation-models/system/evaluation/text-classification/entailment-contradiction-pipeline.yml b/cli/foundation-models/system/evaluation/text-classification/entailment-contradiction-pipeline.yml deleted file mode 100644 index 087c090e65..0000000000 --- a/cli/foundation-models/system/evaluation/text-classification/entailment-contradiction-pipeline.yml +++ /dev/null @@ -1,20 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json -type: pipeline - -experiment_name: text-classification-mnli-model-evaluation-subgraph - -compute: gpu-cluster-big - -jobs: - pipeline_component_job: - type: pipeline - component: azureml://registries/azureml-preview/components/model_evaluation_pipeline/labels/latest - inputs: - task: text-classification - test_data: - path: "../../../../../sdk/python/foundation-models/system/evaluation/text-classification/glue-mnli-dataset/small_train.jsonl" - type: uri_file - mlflow_model: - path: azureml://registries/azureml-preview/models/bert-based-uncased/versions/3 - label_column_name: label_string - device: gpu diff --git a/cli/foundation-models/system/evaluation/text-generation/text-generation-pipeline.yml b/cli/foundation-models/system/evaluation/text-generation/text-generation-pipeline.yml deleted file mode 100644 index 76264bc297..0000000000 --- a/cli/foundation-models/system/evaluation/text-generation/text-generation-pipeline.yml +++ /dev/null @@ -1,20 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json -type: pipeline - -experiment_name: text-generation-model-evaluation-subgraph - -compute: gpu-cluster-big - -jobs: - pipeline_component_job: - type: pipeline - component: azureml://registries/azureml-preview/components/model_evaluation_pipeline/labels/latest - inputs: - task: text-generation - test_data: - path: "../../../../../sdk/python/foundation-models/system/evaluation/text-generation/text-generation/small-test.jsonl" - type: uri_file - mlflow_model: - path: azureml://registries/azureml-preview/models/gpt2/versions/3 - label_column_name: ground_truth - device: gpu diff --git a/cli/foundation-models/system/evaluation/token-classification/news-articles-entity-recognition-pipeline.yml b/cli/foundation-models/system/evaluation/token-classification/news-articles-entity-recognition-pipeline.yml deleted file mode 100644 index fa7207f2c2..0000000000 --- a/cli/foundation-models/system/evaluation/token-classification/news-articles-entity-recognition-pipeline.yml +++ /dev/null @@ -1,20 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json -type: pipeline - -experiment_name: named-entity-recognition-model-evaluation-subgraph - -compute: gpu-cluster-big - -jobs: - pipeline_component_job: - type: pipeline - component: azureml://registries/azureml-preview/components/model_evaluation_pipeline/labels/latest - inputs: - task: text-named-entity-recognition - test_data: - path: "../../../../../sdk/python/foundation-models/system/evaluation/token-classification/conll2003/small-test.jsonl" - type: uri_file - mlflow_model: - path: azureml://registries/azureml-preview/models/jean-baptiste-camembert-ner/versions/3 - label_column_name: ner_tags_str - device: gpu diff --git a/cli/foundation-models/system/evaluation/translation/translation-pipeline.yml b/cli/foundation-models/system/evaluation/translation/translation-pipeline.yml deleted file mode 100644 index 74bf800333..0000000000 --- a/cli/foundation-models/system/evaluation/translation/translation-pipeline.yml +++ /dev/null @@ -1,20 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json -type: pipeline - -experiment_name: text-translation-model-evaluation-subgraph - -compute: gpu-cluster-big - -jobs: - pipeline_component_job: - type: pipeline - component: azureml://registries/azureml-preview/components/model_evaluation_pipeline/labels/latest - inputs: - task: text-translation - test_data: - path: "../../../../../sdk/python/foundation-models/system/evaluation/translation/wmt16_ro-en/small-test.jsonl" - type: uri_file - mlflow_model: - path: azureml://registries/azureml-preview/models/t5-base/versions/4 - label_column_name: ro - device: gpu diff --git a/cli/foundation-models/system/finetune/question-answering/deploy.yml b/cli/foundation-models/system/finetune/question-answering/deploy.yml deleted file mode 100644 index 40b0d93f09..0000000000 --- a/cli/foundation-models/system/finetune/question-answering/deploy.yml +++ /dev/null @@ -1,4 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json -name: demo -instance_type: Standard_DS3_v2 -instance_count: 1 \ No newline at end of file diff --git a/cli/foundation-models/system/finetune/question-answering/extractive-qa-pipeline.yml b/cli/foundation-models/system/finetune/question-answering/extractive-qa-pipeline.yml deleted file mode 100644 index 9db2405668..0000000000 --- a/cli/foundation-models/system/finetune/question-answering/extractive-qa-pipeline.yml +++ /dev/null @@ -1,90 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json -type: pipeline - -experiment_name: question-answering-extractive-qna - -inputs: - compute_model_import: gpu-cluster-big - compute_preprocess: gpu-cluster-big - compute_finetune: gpu-cluster-big - compute_model_evaluation: gpu-cluster-big - - # specify the foundation model available in the azureml system registry - mlflow_model_path: - path: azureml://registries/azureml-preview/models/bert-based-uncased/versions/3 - # huggingface_id: 'bert-base-uncased' # if you want to use a huggingface model, uncomment this line and comment the above line - - # map the dataset files to parameters - train_file_path: - type: uri_file - path: "../../../../../sdk/python/foundation-models/system/finetune/question-answering/squad-dataset/small_train.jsonl" - validation_file_path: - type: uri_file - path: "../../../../../sdk/python/foundation-models/system/finetune/question-answering/squad-dataset/small_validation.jsonl" - test_file_path: - type: uri_file - path: "../../../../../sdk/python/foundation-models/system/finetune/question-answering/squad-dataset/small_test.jsonl" - evaluation_config_path: - type: uri_file - path: "../../../../../sdk/python/foundation-models/system/finetune/question-answering/question-answering-config.json" - - # The following parameters map to the dataset fields - # the question whose answer needs to be extracted from the provided context  - # question_key parameter maps to the "question" field in the SQuAD dataset - question_key: "question" - # the context that contains the answer to the question - # context_key parameter maps to the "context" field in the SQuAD dataset - context_key: "context" - # The value of this field is text in json format with two nested keys, answer_start_key and answer_text_key with their corresponding values - # answers_key parameter maps to the "answers" field in the SQuAD dataset - answers_key: "answers" - # Refers to the position where the answer beings in context. Needs a value that maps to a nested key in the values of the answers_key parameter. - # in the SQuAD dataset, the answer_start_key maps "answer_start" under "answer" - answer_start_key: "answer_start" - # Contains the answer to the question. Needs a value that maps to a nested key in the values of the answers_key parameter - # in the SQuAD dataset, the answer_text_key maps to "text" under "answer" - answer_text_key: "text" - - # training settings - number_of_gpu_to_use_finetuning: 2 - num_train_epochs: 3 - learning_rate: 2e-5 - -outputs: - # map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model - # registering the model is required to deploy the model to an online or batch endpoint - trained_model: - type: mlflow_model - -settings: - force_rerun: true - -jobs: - extractive_qna_finetune_job: - type: pipeline - # component: azureml://registries/azureml-preview/components/question_answering_pipeline/versions/0.0.3 - component: azureml://registries/azureml-preview/components/question_answering_pipeline/labels/latest - inputs: - mlflow_model_path: ${{parent.inputs.mlflow_model_path}} - - compute_model_import: ${{parent.inputs.compute_model_import}} - compute_preprocess: ${{parent.inputs.compute_preprocess}} - compute_finetune: ${{parent.inputs.compute_finetune}} - compute_model_evaluation: ${{parent.inputs.compute_model_evaluation}} - - train_file_path: ${{parent.inputs.train_file_path}} - validation_file_path: ${{parent.inputs.validation_file_path}} - test_file_path: ${{parent.inputs.test_file_path}} - evaluation_config: ${{parent.inputs.evaluation_config_path}} - - question_key: ${{parent.inputs.question_key}} - context_key: ${{parent.inputs.context_key}} - answers_key: ${{parent.inputs.answers_key}} - answer_start_key: ${{parent.inputs.answer_start_key}} - answer_text_key: ${{parent.inputs.answer_text_key}} - - number_of_gpu_to_use_finetuning: ${{parent.inputs.number_of_gpu_to_use_finetuning}} - num_train_epochs: ${{parent.inputs.num_train_epochs}} - learning_rate: ${{parent.inputs.learning_rate}} - outputs: - mlflow_model_folder: ${{parent.outputs.trained_model}} diff --git a/cli/foundation-models/system/finetune/question-answering/extractive-qa.sh b/cli/foundation-models/system/finetune/question-answering/extractive-qa.sh deleted file mode 100644 index 20dd673bf3..0000000000 --- a/cli/foundation-models/system/finetune/question-answering/extractive-qa.sh +++ /dev/null @@ -1,186 +0,0 @@ -set -x -# the commands in this file map to steps in this notebook: https://aka.ms/azureml-ft-sdk-emotion-detection -# the data files are available in the same folder as the above notebook - -# script inputs -subscription_id="" -resource_group_name="" -workspace_name="" -registry_name="azureml" - -compute_cluster="gpu-cluster-big" -# if above compute cluster does not exist, create it with the following vm size -compute_sku="Standard_ND40rs_v2" -# This is the number of GPUs in a single node of the selected 'vm_size' compute. -# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train. -# Setting this to more than the number of GPUs will result in an error. -gpus_per_node=2 -# This is the foundation model for finetuning -model_name="bert-base-uncased" -# using the latest version of the model - not working yet -model_version=1 - -version=$(date +%s) -finetuned_model_name=$model_name"-extractive-qna" -endpoint_name="ext-qna-$version" -deployment_sku="Standard_DS3_v2" - - -# training data -train_data="../../../../../sdk/python/foundation-models/system/finetune/question-answering/squad-dataset/small_train.jsonl" -# validation data -validation_data="../../../../../sdk/python/foundation-models/system/finetune/question-answering/squad-dataset/small_validation.jsonl" -# test data -test_data="../../../../../sdk/python/foundation-models/system/finetune/question-answering/squad-dataset/small_test.jsonl" -# evaluation config -evaluation_config="../../../../../sdk/python/foundation-models/system/finetune/question-answering/question-answering-config.json" -# scoring_file -scoring_file="../../../../../sdk/python/foundation-models/system/finetune/question-answering/squad-dataset/sample_score.json" - -# finetuning job parameters -finetuning_pipeline_component="question_answering_pipeline" -# The following parameters map to the dataset fields -# the question whose answer needs to be extracted from the provided context  -# question_key parameter maps to the "question" field in the SQuAD dataset -question_key="question" -# the context that contains the answer to the question -# context_key parameter maps to the "context" field in the SQuAD dataset -context_key="context" -# The value of this field is text in json format with two nested keys, answer_start_key and answer_text_key with their corresponding values -# answers_key parameter maps to the "answers" field in the SQuAD dataset -answers_key="answers" -# Refers to the position where the answer beings in context. Needs a value that maps to a nested key in the values of the answers_key parameter. -# in the SQuAD dataset, the answer_start_key maps "answer_start" under "answer" -answer_start_key="answer_start" -# Contains the answer to the question. Needs a value that maps to a nested key in the values of the answers_key parameter -# in the SQuAD dataset, the answer_text_key maps to "text" under "answer" -answer_text_key="text" -# Training settings -number_of_gpu_to_use_finetuning=$gpus_per_node # set to the number of GPUs available in the compute -num_train_epochs=3 -learning_rate=2e-5 - -# 1. Setup pre-requisites - -if [ "$subscription_id" = "" ] || \ - [ "$resource_group_name" = "" ] || \ - [ "$workspace_name" = "" ]; then - echo "Please update the script with the subscription_id, resource_group_name and workspace_name" - exit 1 -fi - -az account set -s $subscription_id -workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name" - -# check if $compute_cluster exists, else create it -if az ml compute show --name $compute_cluster $workspace_info -then - echo "Compute cluster $compute_cluster already exists" -else - echo "Creating compute cluster $compute_cluster" - az ml compute create --name $compute_cluster --type amlcompute --min-instances 0 --max-instances 2 --size $compute_sku $workspace_info || { - echo "Failed to create compute cluster $compute_cluster" - exit 1 - } -fi - -# 2. Check if the model exists in the registry -# need to confirm model show command works for registries outside the tenant (aka system registry) -if ! az ml model show --name $model_name --version $model_version --registry-name $registry_name -then - echo "Model $model_name:$model_version does not exist in registry $registry_name" - exit 1 -fi - -# 3. Check if training data, validation data and test data exist -if [ ! -f $train_data ]; then - echo "Training data $train_data does not exist" - exit 1 -fi -if [ ! -f $validation_data ]; then - echo "Validation data $validation_data does not exist" - exit 1 -fi -if [ ! -f $test_data ]; then - echo "Test data $test_data does not exist" - exit 1 -fi - -# 4. Submit finetuning job using pipeline.yml - -# check if the finetuning pipeline component exists -if ! az ml component show --name $finetuning_pipeline_component --label latest --registry-name $registry_name -then - echo "Finetuning pipeline component $finetuning_pipeline_component does not exist" - exit 1 -fi - -# need to switch to using latest version for model, currently blocked with a bug. -# submit finetuning job -parent_job_name=$( az ml job create --file ./extractive-qa-pipeline.yml $workspace_info --query name -o tsv --set \ - jobs.extractive_qna_finetune_job.component="azureml://registries/$registry_name/components/$finetuning_pipeline_component/labels/latest" \ - inputs.compute_model_import=$compute_cluster \ - inputs.compute_preprocess=$compute_cluster \ - inputs.compute_finetune=$compute_cluster \ - inputs.compute_model_evaluation=$compute_cluster \ - inputs.mlflow_model_path.path="azureml://registries/$registry_name/models/$model_name/versions/$model_version" \ - inputs.train_file_path.path=$train_data \ - inputs.validation_file_path.path=$validation_data \ - inputs.test_file_path.path=$test_data \ - inputs.evaluation_config.path=$evaluation_config \ - inputs.question_key=$question_key \ - inputs.context_key=$context_key \ - inputs.answers_key=$answers_key \ - inputs.answer_start_key=$answer_start_key \ - inputs.answer_text_key=$answer_text_key \ - inputs.number_of_gpu_to_use_finetuning=$number_of_gpu_to_use_finetuning \ - inputs.num_train_epochs=$num_train_epochs \ - inputs.learning_rate=$learning_rate ) || { - echo "Failed to submit finetuning job" - exit 1 - } - -az ml job stream --name $parent_job_name $workspace_info || { - echo "job stream failed"; exit 1; -} - -# 5. Create model in workspace from train job output -az ml model create --name $finetuned_model_name --version $version --type mlflow_model \ - --path azureml://jobs/$parent_job_name/outputs/trained_model $workspace_info || { - echo "model create in workspace failed"; exit 1; -} - -# 6. Deploy the model to an endpoint -# create online endpoint -az ml online-endpoint create --name $endpoint_name $workspace_info || { - echo "endpoint create failed"; exit 1; -} - -# deploy model from registry to endpoint in workspace -# You can find here the list of SKU's supported for deployment - https://learn.microsoft.com/en-us/azure/machine-learning/reference-managed-online-endpoints-vm-sku-list -az ml online-deployment create --file deploy.yml $workspace_info --all-traffic --set \ - endpoint_name=$endpoint_name model=azureml:$finetuned_model_name:$version \ - instance_type=$deployment_sku || { - echo "deployment create failed"; exit 1; -} - -# 7. Try a sample scoring request - -# Check if scoring data file exists -if [ -f $scoring_file ]; then - echo "Invoking endpoint $endpoint_name with following input:\n\n" - cat $scoring_file - echo "\n\n" -else - echo "Scoring file $scoring_file does not exist" - exit 1 -fi - -az ml online-endpoint invoke --name $endpoint_name --request-file $scoring_file $workspace_info || { - echo "endpoint invoke failed"; exit 1; -} - -# 8. Delete the endpoint -az ml online-endpoint delete --name $endpoint_name $workspace_info --yes || { - echo "endpoint delete failed"; exit 1; -} diff --git a/cli/foundation-models/system/finetune/summarization/deploy.yml b/cli/foundation-models/system/finetune/summarization/deploy.yml deleted file mode 100644 index 40b0d93f09..0000000000 --- a/cli/foundation-models/system/finetune/summarization/deploy.yml +++ /dev/null @@ -1,4 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json -name: demo -instance_type: Standard_DS3_v2 -instance_count: 1 \ No newline at end of file diff --git a/cli/foundation-models/system/finetune/summarization/download-dataset.py b/cli/foundation-models/system/finetune/summarization/download-dataset.py new file mode 100644 index 0000000000..e2b98eac10 --- /dev/null +++ b/cli/foundation-models/system/finetune/summarization/download-dataset.py @@ -0,0 +1,87 @@ +# import library to parse command line arguments +import argparse, os +import pandas as pd +import os + +parser = argparse.ArgumentParser() +# add an argument to specify a dataset name to download +parser.add_argument("--dataset", type=str, default="cnn_dailymail", help="dataset name") +# add an argument to specify the config name of the dataset +parser.add_argument( + "--config_name", type=str, default="3.0.0", help="config name of the dataset" +) +# argument to save a fraction of the dataset +parser.add_argument( + "--fraction", type=float, default=0.05, help="fraction of the dataset to save" +) +# add an argument to specify the directory to download the dataset to +parser.add_argument( + "--download_dir", + type=str, + default="./news-summary-dataset", + help="directory to download the dataset to", +) +args = parser.parse_args() + +# create the download directory if it does not exist +if not os.path.exists(args.download_dir): + os.makedirs(args.download_dir) + +# import hugging face datasets library +from datasets import load_dataset, get_dataset_split_names + +for split in get_dataset_split_names(args.dataset, config_name=args.config_name): + print(f"Loading {split} split of {args.dataset} dataset...") + # load the split of the dataset + dataset = load_dataset(args.dataset, args.config_name, split=split) + # save the split of the dataset to the download directory as json lines file + dataset.select(range(int(dataset.num_rows * args.fraction))).to_json( + os.path.join(args.download_dir, f"{split}.jsonl") + ) + +train_df = pd.read_json(os.path.join(args.download_dir, "train.jsonl"), lines=True) +validation_df = pd.read_json( + os.path.join(args.download_dir, "validation.jsonl"), lines=True +) +# this dataset doesn't have test data, so split the validation_df into test_df and validation_df +test_df = validation_df.sample(frac=0.5, random_state=42) +validation_df.drop(test_df.index, inplace=True) +# drop the id column as it is not needed for fine tuning +train_df.drop(columns=["id"], inplace=True) +validation_df.drop(columns=["id"], inplace=True) +test_df.drop(columns=["id"], inplace=True) + + +# save 20% of the rows from the dataframes into files with small_ prefix in the ./news-summary-dataset folder +train_df.sample(frac=0.2).to_json( + os.path.join(args.download_dir, "small_train.jsonl"), orient="records", lines=True +) +validation_df.sample(frac=0.2).to_json( + os.path.join(args.download_dir, "small_validation.jsonl"), + orient="records", + lines=True, +) +test_df.sample(frac=0.2).to_json( + os.path.join(args.download_dir, "small_test.jsonl"), orient="records", lines=True +) + + +# generate sample scoring data +# read ./news-summary-dataset/small_test.jsonl into a pandas dataframe +import pandas as pd +import json + +test_df = pd.read_json( + os.path.join(args.download_dir, "small_test.jsonl"), orient="records", lines=True +) +# take 1 random sample +test_df = test_df.sample(n=1) +# rebuild index +test_df.reset_index(drop=True, inplace=True) +# rename the highlights column to ground_truth_summary +test_df.rename(columns={"highlights": "ground_truth_summary"}, inplace=True) +# create a json object with the key as "inputs" and value as a list of values from the article column of the test dataframe +test_json = {"inputs": {"input_string": test_df["article"].tolist()}} +# save the json object to a file named sample_score.json in the ./emotion-dataset folder +with open(os.path.join(args.download_dir, "sample_score.json"), "w") as f: + json.dump(test_json, f) diff --git a/cli/foundation-models/system/finetune/summarization/news-summary-pipeline.yml b/cli/foundation-models/system/finetune/summarization/news-summary-pipeline.yml deleted file mode 100644 index 77490c046e..0000000000 --- a/cli/foundation-models/system/finetune/summarization/news-summary-pipeline.yml +++ /dev/null @@ -1,77 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json -type: pipeline - -experiment_name: summarization-news-summary - -inputs: - compute_model_import: gpu-cluster-big - compute_preprocess: gpu-cluster-big - compute_finetune: gpu-cluster-big - compute_model_evaluation: gpu-cluster-big - - # specify the foundation model available in the azureml system registry - mlflow_model_path: - path: azureml://registries/azureml-preview/models/t5-small/versions/4 - # huggingface_id: 't5-small' # if you want to use a huggingface model, uncomment this line and comment the above line - - # map the dataset files to parameters - train_file_path: - type: uri_file - path: "../../../../../sdk/python/foundation-models/system/finetune/summarization/news-summary-dataset/small_train.jsonl" - validation_file_path: - type: uri_file - path: "../../../../../sdk/python/foundation-models/system/finetune/summarization/news-summary-dataset/small_validation.jsonl" - test_file_path: - type: uri_file - path: "../../../../../sdk/python/foundation-models/system/finetune/summarization/news-summary-dataset/small_test.jsonl" - evaluation_config_path: - type: uri_file - path: "../../../../../sdk/python/foundation-models/system/finetune/summarization/summarization-config.json" - - - # The following parameters map to the dataset fields - # document_key parameter maps to the "article" field in the news summary dataset - document_key: "article" - # summary_key parameter maps to the "highlights" field in the news summary dataset - summary_key: "highlights" - - # training settings - number_of_gpu_to_use_finetuning: 2 - num_train_epochs: 3 - learning_rate: 2e-5 - -outputs: - # map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model - # registering the model is required to deploy the model to an online or batch endpoint - trained_model: - type: mlflow_model - -settings: - force_rerun: true - -jobs: - news_summary_finetune_job: - type: pipeline - # component: azureml://registries/azureml-preview/components/summarization_pipeline/versions/0.0.3 - component: azureml://registries/azureml-preview/components/summarization_pipeline/labels/latest - inputs: - mlflow_model_path: ${{parent.inputs.mlflow_model_path}} - - compute_model_import: ${{parent.inputs.compute_model_import}} - compute_preprocess: ${{parent.inputs.compute_preprocess}} - compute_finetune: ${{parent.inputs.compute_finetune}} - compute_model_evaluation: ${{parent.inputs.compute_model_evaluation}} - - train_file_path: ${{parent.inputs.train_file_path}} - validation_file_path: ${{parent.inputs.validation_file_path}} - test_file_path: ${{parent.inputs.test_file_path}} - evaluation_config: ${{parent.inputs.evaluation_config_path}} - - document_key: ${{parent.inputs.document_key}} - summary_key: ${{parent.inputs.summary_key}} - - number_of_gpu_to_use_finetuning: ${{parent.inputs.number_of_gpu_to_use_finetuning}} - num_train_epochs: ${{parent.inputs.num_train_epochs}} - learning_rate: ${{parent.inputs.learning_rate}} - outputs: - mlflow_model_folder: ${{parent.outputs.trained_model}} diff --git a/cli/foundation-models/system/finetune/summarization/news-summary.sh b/cli/foundation-models/system/finetune/summarization/news-summary.sh deleted file mode 100644 index 3078fd2abf..0000000000 --- a/cli/foundation-models/system/finetune/summarization/news-summary.sh +++ /dev/null @@ -1,172 +0,0 @@ -set -x -# the commands in this file map to steps in this notebook: https://aka.ms/azureml-ft-sdk-emotion-detection -# the data files are available in the same folder as the above notebook - -# script inputs -subscription_id="" -resource_group_name="" -workspace_name="" -registry_name="azureml" - -compute_cluster="gpu-cluster-big" -# if above compute cluster does not exist, create it with the following vm size -compute_sku="Standard_ND40rs_v2" -# This is the number of GPUs in a single node of the selected 'vm_size' compute. -# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train. -# Setting this to more than the number of GPUs will result in an error. -gpus_per_node=2 -# This is the foundation model for finetuning -model_name="t5-small" -# using the latest version of the model - not working yet -model_version=1 - -version=$(date +%s) -finetuned_model_name=$model_name"-news-summary" -endpoint_name="news-summary-$version" -deployment_sku="Standard_DS3_v2" - - -# training data -train_data="../../../../../sdk/python/foundation-models/system/finetune/summarization/news-summary-dataset/small_train.jsonl" -# validation data -validation_data="../../../../../sdk/python/foundation-models/system/finetune/summarization/news-summary-dataset/small_validation.jsonl" -# test data -test_data="../../../../../sdk/python/foundation-models/system/finetune/summarization/news-summary-dataset/small_test.jsonl" -# evaluation config -evaluation_config="../../../../../sdk/python/foundation-models/system/finetune/summarization/summarization-config.json" -# scoring_file -scoring_file="../../../../../sdk/python/foundation-models/system/finetune/summarization/news-summary-dataset/sample_score.json" - -# finetuning job parameters -finetuning_pipeline_component="summarization_pipeline" -# The following parameters map to the dataset fields -# document_key parameter maps to the "article" field in the news summary dataset -document_key="article" -# summary_key parameter maps to the "highlights" field in the news summary dataset -summary_key="highlights" -# Training settings -number_of_gpu_to_use_finetuning=$gpus_per_node # set to the number of GPUs available in the compute -num_train_epochs=3 -learning_rate=2e-5 - -# 1. Setup pre-requisites - -if [ "$subscription_id" = "" ] || \ - [ "$resource_group_name" = "" ] || \ - [ "$workspace_name" = "" ]; then - echo "Please update the script with the subscription_id, resource_group_name and workspace_name" - exit 1 -fi - -az account set -s $subscription_id -workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name" - -# check if $compute_cluster exists, else create it -if az ml compute show --name $compute_cluster $workspace_info -then - echo "Compute cluster $compute_cluster already exists" -else - echo "Creating compute cluster $compute_cluster" - az ml compute create --name $compute_cluster --type amlcompute --min-instances 0 --max-instances 2 --size $compute_sku $workspace_info || { - echo "Failed to create compute cluster $compute_cluster" - exit 1 - } -fi - -# 2. Check if the model exists in the registry -# need to confirm model show command works for registries outside the tenant (aka system registry) -if ! az ml model show --name $model_name --version $model_version --registry-name $registry_name -then - echo "Model $model_name:$model_version does not exist in registry $registry_name" - exit 1 -fi - -# 3. Check if training data, validation data and test data exist -if [ ! -f $train_data ]; then - echo "Training data $train_data does not exist" - exit 1 -fi -if [ ! -f $validation_data ]; then - echo "Validation data $validation_data does not exist" - exit 1 -fi -if [ ! -f $test_data ]; then - echo "Test data $test_data does not exist" - exit 1 -fi - -# 4. Submit finetuning job using pipeline.yml - -# check if the finetuning pipeline component exists -if ! az ml component show --name $finetuning_pipeline_component --label latest --registry-name $registry_name -then - echo "Finetuning pipeline component $finetuning_pipeline_component does not exist" - exit 1 -fi - -# need to switch to using latest version for model, currently blocked with a bug. -# submit finetuning job -parent_job_name=$( az ml job create --file ./news-summary-pipeline.yml $workspace_info --query name -o tsv --set \ - jobs.news_summary_finetune_job.component="azureml://registries/$registry_name/components/$finetuning_pipeline_component/labels/latest" \ - inputs.compute_model_import=$compute_cluster \ - inputs.compute_preprocess=$compute_cluster \ - inputs.compute_finetune=$compute_cluster \ - inputs.compute_model_evaluation=$compute_cluster \ - inputs.mlflow_model_path.path="azureml://registries/$registry_name/models/$model_name/versions/$model_version" \ - inputs.train_file_path.path=$train_data \ - inputs.validation_file_path.path=$validation_data \ - inputs.test_file_path.path=$test_data \ - inputs.evaluation_config.path=$evaluation_config \ - inputs.document_key=$document_key \ - inputs.summary_key=$summary_key \ - inputs.number_of_gpu_to_use_finetuning=$number_of_gpu_to_use_finetuning \ - inputs.num_train_epochs=$num_train_epochs \ - inputs.learning_rate=$learning_rate ) || { - echo "Failed to submit finetuning job" - exit 1 - } - -az ml job stream --name $parent_job_name $workspace_info || { - echo "job stream failed"; exit 1; -} - -# 5. Create model in workspace from train job output -az ml model create --name $finetuned_model_name --version $version --type mlflow_model \ - --path azureml://jobs/$parent_job_name/outputs/trained_model $workspace_info || { - echo "model create in workspace failed"; exit 1; -} - -# 6. Deploy the model to an endpoint -# create online endpoint -az ml online-endpoint create --name $endpoint_name $workspace_info || { - echo "endpoint create failed"; exit 1; -} - -# deploy model from registry to endpoint in workspace -# You can find here the list of SKU's supported for deployment - https://learn.microsoft.com/en-us/azure/machine-learning/reference-managed-online-endpoints-vm-sku-list -az ml online-deployment create --file deploy.yml $workspace_info --all-traffic --set \ - endpoint_name=$endpoint_name model=azureml:$finetuned_model_name:$version \ - instance_type=$deployment_sku || { - echo "deployment create failed"; exit 1; -} - -# 7. Try a sample scoring request - -# Check if scoring data file exists -if [ -f $scoring_file ]; then - echo "Invoking endpoint $endpoint_name with following input:\n\n" - cat $scoring_file - echo "\n\n" -else - echo "Scoring file $scoring_file does not exist" - exit 1 -fi - -az ml online-endpoint invoke --name $endpoint_name --request-file $scoring_file $workspace_info || { - echo "endpoint invoke failed"; exit 1; -} - -# 8. Delete the endpoint -az ml online-endpoint delete --name $endpoint_name $workspace_info --yes || { - echo "endpoint delete failed"; exit 1; -} diff --git a/sdk/python/foundation-models/system/finetune/summarization/summarization-config.json b/cli/foundation-models/system/finetune/summarization/summarization-config.json similarity index 100% rename from sdk/python/foundation-models/system/finetune/summarization/summarization-config.json rename to cli/foundation-models/system/finetune/summarization/summarization-config.json diff --git a/cli/foundation-models/system/finetune/text-classification/deploy.yml b/cli/foundation-models/system/finetune/text-classification/deploy.yml deleted file mode 100644 index 40b0d93f09..0000000000 --- a/cli/foundation-models/system/finetune/text-classification/deploy.yml +++ /dev/null @@ -1,4 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json -name: demo -instance_type: Standard_DS3_v2 -instance_count: 1 \ No newline at end of file diff --git a/cli/foundation-models/system/finetune/text-classification/emotion-detection-pipeline.yml b/cli/foundation-models/system/finetune/text-classification/emotion-detection-pipeline.yml deleted file mode 100644 index c7f88973d4..0000000000 --- a/cli/foundation-models/system/finetune/text-classification/emotion-detection-pipeline.yml +++ /dev/null @@ -1,75 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json -type: pipeline - -experiment_name: text-classification-emotion-detection - -inputs: - compute_model_import: gpu-cluster-big - compute_preprocess: gpu-cluster-big - compute_finetune: gpu-cluster-big - compute_model_evaluation: gpu-cluster-big - - # specify the foundation model available in the azureml system registry - mlflow_model_path: - path: azureml://registries/azureml-preview/models/bert-based-uncased/versions/3 - # huggingface_id: 'bert-base-uncased' # if you want to use a huggingface model, uncomment this line and comment the above line - - # map the dataset files to parameters - train_file_path: - type: uri_file - path: "../../../../../sdk/python/foundation-models/system/finetune/text-classification/emotion-dataset/small_train.jsonl" - validation_file_path: - type: uri_file - path: "../../../../../sdk/python/foundation-models/system/finetune/text-classification/emotion-dataset/small_validation.jsonl" - test_file_path: - type: uri_file - path: "../../../../../sdk/python/foundation-models/system/finetune/text-classification/emotion-dataset/small_test.jsonl" - evaluation_config_path: - type: uri_file - path: "../../../../../sdk/python/foundation-models/system/finetune/text-classification/text-classification-config.json" - - - # The following parameters map to the dataset fields - sentence1_key: "text" - label_key: "label_string" - - # training settings - number_of_gpu_to_use_finetuning: 2 - num_train_epochs: 3 - learning_rate: 2e-5 - -outputs: - # map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model - # registering the model is required to deploy the model to an online or batch endpoint - trained_model: - type: mlflow_model - -settings: - force_rerun: true - -jobs: - emotion_detection_finetune_job: - type: pipeline - # component: azureml://registries/azureml-preview/components/text_classification_pipeline/versions/0.0.3 - component: azureml://registries/azureml-preview/components/text_classification_pipeline/labels/latest - inputs: - mlflow_model_path: ${{parent.inputs.mlflow_model_path}} - - compute_model_import: ${{parent.inputs.compute_model_import}} - compute_preprocess: ${{parent.inputs.compute_preprocess}} - compute_finetune: ${{parent.inputs.compute_finetune}} - compute_model_evaluation: ${{parent.inputs.compute_model_evaluation}} - - train_file_path: ${{parent.inputs.train_file_path}} - validation_file_path: ${{parent.inputs.validation_file_path}} - test_file_path: ${{parent.inputs.test_file_path}} - evaluation_config: ${{parent.inputs.evaluation_config_path}} - - sentence1_key: ${{parent.inputs.sentence1_key}} - label_key: ${{parent.inputs.label_key}} - - number_of_gpu_to_use_finetuning: ${{parent.inputs.number_of_gpu_to_use_finetuning}} - num_train_epochs: ${{parent.inputs.num_train_epochs}} - learning_rate: ${{parent.inputs.learning_rate}} - outputs: - mlflow_model_folder: ${{parent.outputs.trained_model}} diff --git a/cli/foundation-models/system/finetune/text-classification/emotion-detection.sh b/cli/foundation-models/system/finetune/text-classification/emotion-detection.sh deleted file mode 100644 index f09d212324..0000000000 --- a/cli/foundation-models/system/finetune/text-classification/emotion-detection.sh +++ /dev/null @@ -1,170 +0,0 @@ -set -x -# the commands in this file map to steps in this notebook: https://aka.ms/azureml-ft-sdk-emotion-detection -# the data files are available in the same folder as the above notebook - -# script inputs -subscription_id="" -resource_group_name="" -workspace_name="" -registry_name="azureml" - -compute_cluster="gpu-cluster-big" -# if above compute cluster does not exist, create it with the following vm size -compute_sku="Standard_ND40rs_v2" -# This is the number of GPUs in a single node of the selected 'vm_size' compute. -# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train. -# Setting this to more than the number of GPUs will result in an error. -gpus_per_node=2 -# This is the foundation model for finetuning -model_name="bert-base-uncased" -# using the latest version of the model - not working yet -model_version=1 - -version=$(date +%s) -finetuned_model_name=$model_name"-emotion-detection" -endpoint_name="emotion-$version" -deployment_sku="Standard_DS3_v2" - - -# training data -train_data="../../../../../sdk/python/foundation-models/system/finetune/text-classification/emotion-dataset/small_train.jsonl" -# validation data -validation_data="../../../../../sdk/python/foundation-models/system/finetune/text-classification/emotion-dataset/small_validation.jsonl" -# test data -test_data="../../../../../sdk/python/foundation-models/system/finetune/text-classification/emotion-dataset/small_test.jsonl" -# evaluation config -evaluation_config="../../../../../sdk/python/foundation-models/system/finetune/text-classification/text-classification-config.json" -# scoring_file -scoring_file="../../../../../sdk/python/foundation-models/system/finetune/text-classification/emotion-dataset/sample_score.json" - -# finetuning job parameters -finetuning_pipeline_component="text_classification_pipeline" -# The following parameters map to the dataset fields -sentence1_key="text" -label_key="label_string" -# Training settings -number_of_gpu_to_use_finetuning=$gpus_per_node # set to the number of GPUs available in the compute -num_train_epochs=3 -learning_rate=2e-5 - -# 1. Setup pre-requisites - -if [ "$subscription_id" = "" ] || \ - [ "$resource_group_name" = "" ] || \ - [ "$workspace_name" = "" ]; then - echo "Please update the script with the subscription_id, resource_group_name and workspace_name" - exit 1 -fi - -az account set -s $subscription_id -workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name" - -# check if $compute_cluster exists, else create it -if az ml compute show --name $compute_cluster $workspace_info -then - echo "Compute cluster $compute_cluster already exists" -else - echo "Creating compute cluster $compute_cluster" - az ml compute create --name $compute_cluster --type amlcompute --min-instances 0 --max-instances 2 --size $compute_sku $workspace_info || { - echo "Failed to create compute cluster $compute_cluster" - exit 1 - } -fi - -# 2. Check if the model exists in the registry -# need to confirm model show command works for registries outside the tenant (aka system registry) -if ! az ml model show --name $model_name --version $model_version --registry-name $registry_name -then - echo "Model $model_name:$model_version does not exist in registry $registry_name" - exit 1 -fi - -# 3. Check if training data, validation data and test data exist -if [ ! -f $train_data ]; then - echo "Training data $train_data does not exist" - exit 1 -fi -if [ ! -f $validation_data ]; then - echo "Validation data $validation_data does not exist" - exit 1 -fi -if [ ! -f $test_data ]; then - echo "Test data $test_data does not exist" - exit 1 -fi - -# 4. Submit finetuning job using pipeline.yml - -# check if the finetuning pipeline component exists -if ! az ml component show --name $finetuning_pipeline_component --label latest --registry-name $registry_name -then - echo "Finetuning pipeline component $finetuning_pipeline_component does not exist" - exit 1 -fi - -# need to switch to using latest version for model, currently blocked with a bug. -# submit finetuning job -parent_job_name=$( az ml job create --file ./emotion-detection-pipeline.yml $workspace_info --query name -o tsv --set \ - jobs.emotion_detection_finetune_job.component="azureml://registries/$registry_name/components/$finetuning_pipeline_component/labels/latest" \ - inputs.compute_model_import=$compute_cluster \ - inputs.compute_preprocess=$compute_cluster \ - inputs.compute_finetune=$compute_cluster \ - inputs.compute_model_evaluation=$compute_cluster \ - inputs.mlflow_model_path.path="azureml://registries/$registry_name/models/$model_name/versions/$model_version" \ - inputs.train_file_path.path=$train_data \ - inputs.validation_file_path.path=$validation_data \ - inputs.test_file_path.path=$test_data \ - inputs.evaluation_config.path=$evaluation_config \ - inputs.sentence1_key=$sentence1_key \ - inputs.label_key=$label_key \ - inputs.number_of_gpu_to_use_finetuning=$number_of_gpu_to_use_finetuning \ - inputs.num_train_epochs=$num_train_epochs \ - inputs.learning_rate=$learning_rate ) || { - echo "Failed to submit finetuning job" - exit 1 - } - -az ml job stream --name $parent_job_name $workspace_info || { - echo "job stream failed"; exit 1; -} - -# 5. Create model in workspace from train job output -az ml model create --name $finetuned_model_name --version $version --type mlflow_model \ - --path azureml://jobs/$parent_job_name/outputs/trained_model $workspace_info || { - echo "model create in workspace failed"; exit 1; -} - -# 6. Deploy the model to an endpoint -# create online endpoint -az ml online-endpoint create --name $endpoint_name $workspace_info || { - echo "endpoint create failed"; exit 1; -} - -# deploy model from registry to endpoint in workspace -# You can find here the list of SKU's supported for deployment - https://learn.microsoft.com/en-us/azure/machine-learning/reference-managed-online-endpoints-vm-sku-list -az ml online-deployment create --file deploy.yml $workspace_info --all-traffic --set \ - endpoint_name=$endpoint_name model=azureml:$finetuned_model_name:$version \ - instance_type=$deployment_sku || { - echo "deployment create failed"; exit 1; -} - -# 7. Try a sample scoring request - -# Check if scoring data file exists -if [ -f $scoring_file ]; then - echo "Invoking endpoint $endpoint_name with following input:\n\n" - cat $scoring_file - echo "\n\n" -else - echo "Scoring file $scoring_file does not exist" - exit 1 -fi - -az ml online-endpoint invoke --name $endpoint_name --request-file $scoring_file $workspace_info || { - echo "endpoint invoke failed"; exit 1; -} - -# 8. Delete the endpoint -az ml online-endpoint delete --name $endpoint_name $workspace_info --yes || { - echo "endpoint delete failed"; exit 1; -} diff --git a/cli/foundation-models/system/finetune/token-classification/deploy.yml b/cli/foundation-models/system/finetune/token-classification/deploy.yml deleted file mode 100644 index 40b0d93f09..0000000000 --- a/cli/foundation-models/system/finetune/token-classification/deploy.yml +++ /dev/null @@ -1,4 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json -name: demo -instance_type: Standard_DS3_v2 -instance_count: 1 \ No newline at end of file diff --git a/cli/foundation-models/system/finetune/token-classification/token-classification-pipeline.yml b/cli/foundation-models/system/finetune/token-classification/token-classification-pipeline.yml deleted file mode 100644 index eeba47f899..0000000000 --- a/cli/foundation-models/system/finetune/token-classification/token-classification-pipeline.yml +++ /dev/null @@ -1,75 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json -type: pipeline - -experiment_name: token-classification-ner - -inputs: - compute_model_import: gpu-cluster-big - compute_preprocess: gpu-cluster-big - compute_finetune: gpu-cluster-big - compute_model_evaluation: gpu-cluster-big - - # specify the foundation model available in the azureml system registry - mlflow_model_path: - path: azureml://registries/azureml-preview/models/bert-based-uncased/versions/3 - # huggingface_id: 'bert-base-uncased' # if you want to use a huggingface model, uncomment this line and comment the above line - - # map the dataset files to parameters - train_file_path: - type: uri_file - path: "../../../../../sdk/python/foundation-models/system/finetune/token-classification/conll2003-dataset/small_train.jsonl" - validation_file_path: - type: uri_file - path: "../../../../../sdk/python/foundation-models/system/finetune/token-classification/conll2003-dataset/small_validation.jsonl" - test_file_path: - type: uri_file - path: "../../../../../sdk/python/foundation-models/system/finetune/token-classification/conll2003-dataset/small_test.jsonl" - evaluation_config_path: - type: uri_file - path: "../../../../../sdk/python/foundation-models/system/finetune/token-classification/token-classification-config.json" - - - # The following parameters map to the dataset fields - token_key: "tokens" - tag_key: "ner_tags_str" - - # training settings - number_of_gpu_to_use_finetuning: 2 - num_train_epochs: 3 - learning_rate: 2e-5 - -outputs: - # map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model - # registering the model is required to deploy the model to an online or batch endpoint - trained_model: - type: mlflow_model - -settings: - force_rerun: true - -jobs: - ner_finetune_job: - type: pipeline - # component: azureml://registries/azureml-preview/components/token_classification_pipeline/versions/0.0.3 - component: azureml://registries/azureml-preview/components/token_classification_pipeline/labels/latest - inputs: - mlflow_model_path: ${{parent.inputs.mlflow_model_path}} - - compute_model_import: ${{parent.inputs.compute_model_import}} - compute_preprocess: ${{parent.inputs.compute_preprocess}} - compute_finetune: ${{parent.inputs.compute_finetune}} - compute_model_evaluation: ${{parent.inputs.compute_model_evaluation}} - - train_file_path: ${{parent.inputs.train_file_path}} - validation_file_path: ${{parent.inputs.validation_file_path}} - test_file_path: ${{parent.inputs.test_file_path}} - evaluation_config: ${{parent.inputs.evaluation_config_path}} - - token_key: ${{parent.inputs.token_key}} - tag_key: ${{parent.inputs.tag_key}} - - number_of_gpu_to_use_finetuning: ${{parent.inputs.number_of_gpu_to_use_finetuning}} - num_train_epochs: ${{parent.inputs.num_train_epochs}} - learning_rate: ${{parent.inputs.learning_rate}} - outputs: - mlflow_model_folder: ${{parent.outputs.trained_model}} diff --git a/cli/foundation-models/system/finetune/token-classification/token-classification.sh b/cli/foundation-models/system/finetune/token-classification/token-classification.sh deleted file mode 100644 index 40151b3c5b..0000000000 --- a/cli/foundation-models/system/finetune/token-classification/token-classification.sh +++ /dev/null @@ -1,170 +0,0 @@ -set -x -# the commands in this file map to steps in this notebook: https://aka.ms/azureml-ft-sdk-emotion-detection -# the data files are available in the same folder as the above notebook - -# script inputs -subscription_id="" -resource_group_name="" -workspace_name="" -registry_name="azureml" - -compute_cluster="gpu-cluster-big" -# if above compute cluster does not exist, create it with the following vm size -compute_sku="Standard_ND40rs_v2" -# This is the number of GPUs in a single node of the selected 'vm_size' compute. -# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train. -# Setting this to more than the number of GPUs will result in an error. -gpus_per_node=2 -# This is the foundation model for finetuning -model_name="bert-base-uncased" -# using the latest version of the model - not working yet -model_version=1 - -version=$(date +%s) -finetuned_model_name=$model_name"-ner" -endpoint_name="ner-$version" -deployment_sku="Standard_DS3_v2" - - -# training data -train_data="../../../../../sdk/python/foundation-models/system/finetune/token-classification/conll2003-dataset/small_train.jsonl" -# validation data -validation_data="../../../../../sdk/python/foundation-models/system/finetune/token-classification/conll2003-dataset/small_validation.jsonl" -# test data -test_data="../../../../../sdk/python/foundation-models/system/finetune/token-classification/conll2003-dataset/small_test.jsonl" -# evaluation config -evaluation_config="../../../../../sdk/python/foundation-models/system/finetune/token-classification/token-classification-config.json" -# scoring_file -scoring_file="../../../../../sdk/python/foundation-models/system/finetune/token-classification/conll2003-dataset/sample_score.json" - -# finetuning job parameters -finetuning_pipeline_component="token_classification_pipeline" -# The following parameters map to the dataset fields -token_key="tokens" -tag_key="ner_tags_str" -# Training settings -number_of_gpu_to_use_finetuning=$gpus_per_node # set to the number of GPUs available in the compute -num_train_epochs=3 -learning_rate=2e-5 - -# 1. Setup pre-requisites - -if [ "$subscription_id" = "" ] || \ - [ "$resource_group_name" = "" ] || \ - [ "$workspace_name" = "" ]; then - echo "Please update the script with the subscription_id, resource_group_name and workspace_name" - exit 1 -fi - -az account set -s $subscription_id -workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name" - -# check if $compute_cluster exists, else create it -if az ml compute show --name $compute_cluster $workspace_info -then - echo "Compute cluster $compute_cluster already exists" -else - echo "Creating compute cluster $compute_cluster" - az ml compute create --name $compute_cluster --type amlcompute --min-instances 0 --max-instances 2 --size $compute_sku $workspace_info || { - echo "Failed to create compute cluster $compute_cluster" - exit 1 - } -fi - -# 2. Check if the model exists in the registry -# need to confirm model show command works for registries outside the tenant (aka system registry) -if ! az ml model show --name $model_name --version $model_version --registry-name $registry_name -then - echo "Model $model_name:$model_version does not exist in registry $registry_name" - exit 1 -fi - -# 3. Check if training data, validation data and test data exist -if [ ! -f $train_data ]; then - echo "Training data $train_data does not exist" - exit 1 -fi -if [ ! -f $validation_data ]; then - echo "Validation data $validation_data does not exist" - exit 1 -fi -if [ ! -f $test_data ]; then - echo "Test data $test_data does not exist" - exit 1 -fi - -# 4. Submit finetuning job using pipeline.yml - -# check if the finetuning pipeline component exists -if ! az ml component show --name $finetuning_pipeline_component --label latest --registry-name $registry_name -then - echo "Finetuning pipeline component $finetuning_pipeline_component does not exist" - exit 1 -fi - -# need to switch to using latest version for model, currently blocked with a bug. -# submit finetuning job -parent_job_name=$( az ml job create --file ./token-classification-pipeline.yml $workspace_info --query name -o tsv --set \ - jobs.ner_finetune_job.component="azureml://registries/$registry_name/components/$finetuning_pipeline_component/labels/latest" \ - inputs.compute_model_import=$compute_cluster \ - inputs.compute_preprocess=$compute_cluster \ - inputs.compute_finetune=$compute_cluster \ - inputs.compute_model_evaluation=$compute_cluster \ - inputs.mlflow_model_path.path="azureml://registries/$registry_name/models/$model_name/versions/$model_version" \ - inputs.train_file_path.path=$train_data \ - inputs.validation_file_path.path=$validation_data \ - inputs.test_file_path.path=$test_data \ - inputs.evaluation_config.path=$evaluation_config \ - inputs.token_key=$token_key \ - inputs.tag_key=$tag_key \ - inputs.number_of_gpu_to_use_finetuning=$number_of_gpu_to_use_finetuning \ - inputs.num_train_epochs=$num_train_epochs \ - inputs.learning_rate=$learning_rate ) || { - echo "Failed to submit finetuning job" - exit 1 - } - -az ml job stream --name $parent_job_name $workspace_info || { - echo "job stream failed"; exit 1; -} - -# 5. Create model in workspace from train job output -az ml model create --name $finetuned_model_name --version $version --type mlflow_model \ - --path azureml://jobs/$parent_job_name/outputs/trained_model $workspace_info || { - echo "model create in workspace failed"; exit 1; -} - -# 6. Deploy the model to an endpoint -# create online endpoint -az ml online-endpoint create --name $endpoint_name $workspace_info || { - echo "endpoint create failed"; exit 1; -} - -# deploy model from registry to endpoint in workspace -# You can find here the list of SKU's supported for deployment - https://learn.microsoft.com/en-us/azure/machine-learning/reference-managed-online-endpoints-vm-sku-list -az ml online-deployment create --file deploy.yml $workspace_info --all-traffic --set \ - endpoint_name=$endpoint_name model=azureml:$finetuned_model_name:$version \ - instance_type=$deployment_sku || { - echo "deployment create failed"; exit 1; -} - -# 7. Try a sample scoring request - -# Check if scoring data file exists -if [ -f $scoring_file ]; then - echo "Invoking endpoint $endpoint_name with following input:\n\n" - cat $scoring_file - echo "\n\n" -else - echo "Scoring file $scoring_file does not exist" - exit 1 -fi - -az ml online-endpoint invoke --name $endpoint_name --request-file $scoring_file $workspace_info || { - echo "endpoint invoke failed"; exit 1; -} - -# 8. Delete the endpoint -az ml online-endpoint delete --name $endpoint_name $workspace_info --yes || { - echo "endpoint delete failed"; exit 1; -} diff --git a/cli/foundation-models/system/finetune/translation/deploy.yml b/cli/foundation-models/system/finetune/translation/deploy.yml deleted file mode 100644 index 40b0d93f09..0000000000 --- a/cli/foundation-models/system/finetune/translation/deploy.yml +++ /dev/null @@ -1,4 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json -name: demo -instance_type: Standard_DS3_v2 -instance_count: 1 \ No newline at end of file diff --git a/cli/foundation-models/system/finetune/translation/translation-pipeline.yml b/cli/foundation-models/system/finetune/translation/translation-pipeline.yml deleted file mode 100644 index 6392a655a8..0000000000 --- a/cli/foundation-models/system/finetune/translation/translation-pipeline.yml +++ /dev/null @@ -1,77 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json -type: pipeline - -experiment_name: translation-wmt16-en-ro - -inputs: - compute_model_import: gpu-cluster-big - compute_preprocess: gpu-cluster-big - compute_finetune: gpu-cluster-big - compute_model_evaluation: gpu-cluster-big - - # specify the foundation model available in the azureml system registry - mlflow_model_path: - path: azureml://registries/azureml-preview/models/t5-small/versions/4 - # huggingface_id: 't5-small' # if you want to use a huggingface model, uncomment this line and comment the above line - - # map the dataset files to parameters - train_file_path: - type: uri_file - path: "../../../../../sdk/python/foundation-models/system/finetune/translation/wmt16-en-ro-dataset/small_train.jsonl" - validation_file_path: - type: uri_file - path: "../../../../../sdk/python/foundation-models/system/finetune/translation/wmt16-en-ro-dataset/small_validation.jsonl" - test_file_path: - type: uri_file - path: "../../../../../sdk/python/foundation-models/system/finetune/translation/wmt16-en-ro-dataset/small_test.jsonl" - evaluation_config_path: - type: uri_file - path: "../../../../../sdk/python/foundation-models/system/finetune/translation/translation-config.json" - - - # The following parameters map to the dataset fields - # source_lang parameter maps to the "en" field in the wmt16 dataset - source_lang: "en" - # target_lang parameter maps to the "ro" field in the wmt16 dataset - target_lang: "ro" - - # training settings - number_of_gpu_to_use_finetuning: 2 - num_train_epochs: 3 - learning_rate: 2e-5 - -outputs: - # map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model - # registering the model is required to deploy the model to an online or batch endpoint - trained_model: - type: mlflow_model - -settings: - force_rerun: true - -jobs: - translation_en_ro_finetune_job: - type: pipeline - # component: azureml://registries/azureml-preview/components/translation_pipeline/versions/0.0.3 - component: azureml://registries/azureml-preview/components/translation_pipeline/labels/latest - inputs: - mlflow_model_path: ${{parent.inputs.mlflow_model_path}} - - compute_model_import: ${{parent.inputs.compute_model_import}} - compute_preprocess: ${{parent.inputs.compute_preprocess}} - compute_finetune: ${{parent.inputs.compute_finetune}} - compute_model_evaluation: ${{parent.inputs.compute_model_evaluation}} - - train_file_path: ${{parent.inputs.train_file_path}} - validation_file_path: ${{parent.inputs.validation_file_path}} - test_file_path: ${{parent.inputs.test_file_path}} - evaluation_config: ${{parent.inputs.evaluation_config_path}} - - source_lang: ${{parent.inputs.source_lang}} - target_lang: ${{parent.inputs.target_lang}} - - number_of_gpu_to_use_finetuning: ${{parent.inputs.number_of_gpu_to_use_finetuning}} - num_train_epochs: ${{parent.inputs.num_train_epochs}} - learning_rate: ${{parent.inputs.learning_rate}} - outputs: - mlflow_model_folder: ${{parent.outputs.trained_model}} diff --git a/cli/foundation-models/system/finetune/translation/translation.sh b/cli/foundation-models/system/finetune/translation/translation.sh deleted file mode 100644 index 494addff0d..0000000000 --- a/cli/foundation-models/system/finetune/translation/translation.sh +++ /dev/null @@ -1,171 +0,0 @@ -#! /bin/bash -set -x -# the commands in this file map to steps in this notebook: https://aka.ms/azureml-ft-sdk-emotion-detection -# the data files are available in the same folder as the above notebook - -# script inputs -subscription_id="" -resource_group_name="" -workspace_name="" -registry_name="azureml" - -compute_cluster="gpu-cluster-big" -# if above compute cluster does not exist, create it with the following vm size -compute_sku="Standard_ND40rs_v2" -# This is the number of GPUs in a single node of the selected 'vm_size' compute. -# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train. -# Setting this to more than the number of GPUs will result in an error. -gpus_per_node=2 -# This is the foundation model for finetuning -model_name="t5-small" -# using the latest version of the model - not working yet -model_version=1 - -version=$(date +%s) -finetuned_model_name=$model_name"-wmt16-en-ro" -endpoint_name="translation-en-ro-$version" -deployment_sku="Standard_DS3_v2" - - -# training data -train_data="../../../../../sdk/python/foundation-models/system/finetune/translation/wmt16-en-ro-dataset/small_train.jsonl" -# validation data -validation_data="../../../../../sdk/python/foundation-models/system/finetune/translation/wmt16-en-ro-dataset/small_validation.jsonl" -# test data -test_data="../../../../../sdk/python/foundation-models/system/finetune/translation/wmt16-en-ro-dataset/small_test.jsonl" -# evaluation config -evaluation_config="../../../../../sdk/python/foundation-models/system/finetune/translation/translation-config.json" -# scoring_file -scoring_file="../../../../../sdk/python/foundation-models/system/finetune/translation/wmt16-en-ro-dataset/sample_score.json" - -# finetuning job parameters -finetuning_pipeline_component="translation_pipeline" -# The following parameters map to the dataset fields -# source_lang parameter maps to the "en" field in the wmt16 dataset -source_lang="en" -# target_lang parameter maps to the "ro" field in the wmt16 dataset -target_lang="ro" -# Training settings -number_of_gpu_to_use_finetuning=$gpus_per_node # set to the number of GPUs available in the compute -num_train_epochs=3 -learning_rate=2e-5 - -# 1. Setup pre-requisites -if [ "$subscription_id" = "" ] || \ - [ "$resource_group_name" = "" ] || \ - [ "$workspace_name" = "" ]; then - echo "Please update the script with the subscription_id, resource_group_name and workspace_name" - exit 1 -fi -az account set -s $subscription_id -workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name" - -# check if $compute_cluster exists, else create it -if az ml compute show --name $compute_cluster $workspace_info -then - echo "Compute cluster $compute_cluster already exists" -else - echo "Creating compute cluster $compute_cluster" - az ml compute create --name $compute_cluster --type amlcompute --min-instances 0 --max-instances 2 --size $compute_sku $workspace_info || { - echo "Failed to create compute cluster $compute_cluster" - exit 1 - } -fi - -# 2. Check if the model exists in the registry -# need to confirm model show command works for registries outside the tenant (aka system registry) -if ! az ml model show --name $model_name --version $model_version --registry-name $registry_name -then - echo "Model $model_name:$model_version does not exist in registry $registry_name" - exit 1 -fi - -# 3. Check if training data, validation data and test data exist -if [ ! -f $train_data ]; then - echo "Training data $train_data does not exist" - exit 1 -fi -if [ ! -f $validation_data ]; then - echo "Validation data $validation_data does not exist" - exit 1 -fi -if [ ! -f $test_data ]; then - echo "Test data $test_data does not exist" - exit 1 -fi - -# 4. Submit finetuning job using pipeline.yml - -# check if the finetuning pipeline component exists -if ! az ml component show --name $finetuning_pipeline_component --label latest --registry-name $registry_name -then - echo "Finetuning pipeline component $finetuning_pipeline_component does not exist" - exit 1 -fi - -# need to switch to using latest version for model, currently blocked with a bug. -# submit finetuning job -parent_job_name=$( az ml job create --file ./translation-pipeline.yml $workspace_info --query name -o tsv --set \ - jobs.translation_en_ro_finetune_job.component="azureml://registries/$registry_name/components/$finetuning_pipeline_component/labels/latest" \ - inputs.compute_model_import=$compute_cluster \ - inputs.compute_preprocess=$compute_cluster \ - inputs.compute_finetune=$compute_cluster \ - inputs.compute_model_evaluation=$compute_cluster \ - inputs.mlflow_model_path.path="azureml://registries/$registry_name/models/$model_name/versions/$model_version" \ - inputs.train_file_path.path=$train_data \ - inputs.validation_file_path.path=$validation_data \ - inputs.test_file_path.path=$test_data \ - inputs.evaluation_config.path=$evaluation_config \ - inputs.source_lang=$source_lang \ - inputs.target_lang=$target_lang \ - inputs.number_of_gpu_to_use_finetuning=$number_of_gpu_to_use_finetuning \ - inputs.num_train_epochs=$num_train_epochs \ - inputs.learning_rate=$learning_rate ) || { - echo "Failed to submit finetuning job" - exit 1 - } - -az ml job stream --name $parent_job_name $workspace_info || { - echo "job stream failed"; exit 1; -} - -# 5. Create model in workspace from train job output -az ml model create --name $finetuned_model_name --version $version --type mlflow_model \ - --path azureml://jobs/$parent_job_name/outputs/trained_model $workspace_info || { - echo "model create in workspace failed"; exit 1; -} - -# 6. Deploy the model to an endpoint -# create online endpoint -az ml online-endpoint create --name $endpoint_name $workspace_info || { - echo "endpoint create failed"; exit 1; -} - -# deploy model from registry to endpoint in workspace -# You can find here the list of SKU's supported for deployment - https://learn.microsoft.com/en-us/azure/machine-learning/reference-managed-online-endpoints-vm-sku-list -az ml online-deployment create --file deploy.yml $workspace_info --all-traffic --set \ - endpoint_name=$endpoint_name model=azureml:$finetuned_model_name:$version \ - instance_type=$deployment_sku || { - echo "deployment create failed"; exit 1; -} - -# 7. Try a sample scoring request - -# Check if scoring data file exists -if [ -f $scoring_file ]; then - echo "Invoking endpoint $endpoint_name with following input:\n\n" - cat $scoring_file - echo "\n\n" -else - echo "Scoring file $scoring_file does not exist" - exit 1 -fi - -az ml online-endpoint invoke --name $endpoint_name --request-file $scoring_file $workspace_info || { - echo "endpoint invoke failed"; exit 1; -} - -# 8. Delete the endpoint -az ml online-endpoint delete --name $endpoint_name $workspace_info --yes || { - echo "endpoint delete failed"; exit 1; -} diff --git a/cli/foundation-models/system/inference/automatic-speech-recognition/asr-online-endpoint.sh b/cli/foundation-models/system/inference/automatic-speech-recognition/asr-online-endpoint.sh deleted file mode 100644 index bf79df063d..0000000000 --- a/cli/foundation-models/system/inference/automatic-speech-recognition/asr-online-endpoint.sh +++ /dev/null @@ -1,79 +0,0 @@ -set -x -# the commands in this file map to steps in this notebook: https://aka.ms/azureml-infer-online-sdk-asr -# the sample scoring file available in the same folder as the above notebook - -# script inputs -registry_name="azureml-preview" -subscription_id="" -resource_group_name="" -workspace_name="" - -# This is the model from system registry that needs to be deployed -model_name="openai-whisper-large" -# using the latest version of the model - not working yet -model_version=1 - -version=$(date +%s) -endpoint_name="asr-$version" - -# todo: fetch deployment_sku from the min_inference_sku tag of the model -deployment_sku="Standard_DS4_v2" - -# scoring_file -scoring_file="../../../../../sdk/python/foundation-models/system/inference/automatic-speech-recognition/sample-request/sample_score.json" - -# 1. Setup pre-requisites -if [ "$subscription_id" = "" ] || \ - ["$resource_group_name" = "" ] || \ - [ "$workspace_name" = "" ]; then - echo "Please update the script with the subscription_id, resource_group_name and workspace_name" - exit 1 -fi - -az account set -s $subscription_id -workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name" - -# 2. Check if the model exists in the registry -# need to confirm model show command works for registries outside the tenant (aka system registry) -if ! az ml model show --name $model_name --version $model_version --registry-name $registry_name -then - echo "Model $model_name:$model_version does not exist in registry $registry_name" - exit 1 -fi - -# 3. Deploy the model to an endpoint -# create online endpoint -az ml online-endpoint create --name $endpoint_name $workspace_info || { - echo "endpoint create failed"; exit 1; -} - -# deploy model from registry to endpoint in workspace -az ml online-deployment create --file deploy.yml $workspace_info --all-traffic --set \ - endpoint_name=$endpoint_name model=azureml://registries/$registry_name/models/$model_name/versions/$model_version \ - instance_type=$deployment_sku || { - echo "deployment create failed"; exit 1; -} - -# 4. Try a sample scoring request - -# Check if scoring data file exists -if [ -f $scoring_file ]; then - echo "Invoking endpoint $endpoint_name with following input:\n\n" - cat $scoring_file - echo "\n\n" -else - echo "Scoring file $scoring_file does not exist" - exit 1 -fi - -az ml online-endpoint invoke --name $endpoint_name --request-file $scoring_file $workspace_info || { - echo "endpoint invoke failed"; exit 1; -} - -# 6. Delete the endpoint -az ml online-endpoint delete --name $endpoint_name $workspace_info --yes || { - echo "endpoint delete failed"; exit 1; -} - - - diff --git a/cli/foundation-models/system/inference/automatic-speech-recognition/deploy.yml b/cli/foundation-models/system/inference/automatic-speech-recognition/deploy.yml deleted file mode 100644 index 48bce7ade6..0000000000 --- a/cli/foundation-models/system/inference/automatic-speech-recognition/deploy.yml +++ /dev/null @@ -1,6 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json -name: demo -instance_type: Standard_DS4_v2 -instance_count: 1 -request_settings: - request_timeout_ms: 60000 \ No newline at end of file diff --git a/cli/foundation-models/system/inference/fill-mask/deploy.yml b/cli/foundation-models/system/inference/fill-mask/deploy.yml deleted file mode 100644 index 336e5519f5..0000000000 --- a/cli/foundation-models/system/inference/fill-mask/deploy.yml +++ /dev/null @@ -1,6 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json -name: demo -instance_type: Standard_DS3_v2 -instance_count: 1 -request_settings: - request_timeout_ms: 60000 \ No newline at end of file diff --git a/cli/foundation-models/system/inference/fill-mask/fill-mask-online-endpoint.sh b/cli/foundation-models/system/inference/fill-mask/fill-mask-online-endpoint.sh deleted file mode 100644 index fbfe2d68bb..0000000000 --- a/cli/foundation-models/system/inference/fill-mask/fill-mask-online-endpoint.sh +++ /dev/null @@ -1,79 +0,0 @@ -set -x -# the commands in this file map to steps in this notebook: https://aka.ms/azureml-infer-online-sdk-fill-mask -# the sample scoring file available in the same folder as the above notebook - -# script inputs -registry_name="azureml-preview" -subscription_id="" -resource_group_name="" -workspace_name="" - -# This is the model from system registry that needs to be deployed -model_name="bert-base-uncased" -# using the latest version of the model - not working yet -model_version=3 - -version=$(date +%s) -endpoint_name="fill-mask-$version" - -# todo: fetch deployment_sku from the min_inference_sku tag of the model -deployment_sku="Standard_DS2_v2" - -# scoring_file -scoring_file="../../../../../sdk/python/foundation-models/system/inference/fill-mask/book-corpus-dataset/sample_score.json" - -# 1. Setup pre-requisites -if [ "$subscription_id" = "" ] || \ - ["$resource_group_name" = "" ] || \ - [ "$workspace_name" = "" ]; then - echo "Please update the script with the subscription_id, resource_group_name and workspace_name" - exit 1 -fi - -az account set -s $subscription_id -workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name" - -# 2. Check if the model exists in the registry -# need to confirm model show command works for registries outside the tenant (aka system registry) -if ! az ml model show --name $model_name --version $model_version --registry-name $registry_name -then - echo "Model $model_name:$model_version does not exist in registry $registry_name" - exit 1 -fi - -# 3. Deploy the model to an endpoint -# create online endpoint -az ml online-endpoint create --name $endpoint_name $workspace_info || { - echo "endpoint create failed"; exit 1; -} - -# deploy model from registry to endpoint in workspace -az ml online-deployment create --file deploy.yml $workspace_info --all-traffic --set \ - endpoint_name=$endpoint_name model=azureml://registries/$registry_name/models/$model_name/versions/$model_version \ - instance_type=$deployment_sku || { - echo "deployment create failed"; exit 1; -} - -# 4. Try a sample scoring request - -# Check if scoring data file exists -if [ -f $scoring_file ]; then - echo "Invoking endpoint $endpoint_name with following input:\n\n" - cat $scoring_file - echo "\n\n" -else - echo "Scoring file $scoring_file does not exist" - exit 1 -fi - -az ml online-endpoint invoke --name $endpoint_name --request-file $scoring_file $workspace_info || { - echo "endpoint invoke failed"; exit 1; -} - -# 6. Delete the endpoint -az ml online-endpoint delete --name $endpoint_name $workspace_info --yes || { - echo "endpoint delete failed"; exit 1; -} - - - diff --git a/cli/foundation-models/system/inference/question-answering/deploy.yml b/cli/foundation-models/system/inference/question-answering/deploy.yml deleted file mode 100644 index 336e5519f5..0000000000 --- a/cli/foundation-models/system/inference/question-answering/deploy.yml +++ /dev/null @@ -1,6 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json -name: demo -instance_type: Standard_DS3_v2 -instance_count: 1 -request_settings: - request_timeout_ms: 60000 \ No newline at end of file diff --git a/cli/foundation-models/system/inference/question-answering/question-answering-online-endpoint.sh b/cli/foundation-models/system/inference/question-answering/question-answering-online-endpoint.sh deleted file mode 100644 index d0a8579621..0000000000 --- a/cli/foundation-models/system/inference/question-answering/question-answering-online-endpoint.sh +++ /dev/null @@ -1,79 +0,0 @@ -set -x -# the commands in this file map to steps in this notebook: https://aka.ms/azureml-infer-online-sdk-question-answering -# the sample scoring file available in the same folder as the above notebook - -# script inputs -registry_name="azureml-preview" -subscription_id="" -resource_group_name="" -workspace_name="" - -# This is the model from system registry that needs to be deployed -model_name="deepset-minilm-uncased-squad2" -# using the latest version of the model - not working yet -model_version=3 - -version=$(date +%s) -endpoint_name="question-answering-$version" - -# todo: fetch deployment_sku from the min_inference_sku tag of the model -deployment_sku="Standard_DS2_v2" - -# scoring_file -scoring_file="../../../../../sdk/python/foundation-models/system/inference/question-answering/squad-dataset/sample_score.json" - -# 1. Setup pre-requisites -if [ "$subscription_id" = "" ] || \ - ["$resource_group_name" = "" ] || \ - [ "$workspace_name" = "" ]; then - echo "Please update the script with the subscription_id, resource_group_name and workspace_name" - exit 1 -fi - -az account set -s $subscription_id -workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name" - -# 2. Check if the model exists in the registry -# need to confirm model show command works for registries outside the tenant (aka system registry) -if ! az ml model show --name $model_name --version $model_version --registry-name $registry_name -then - echo "Model $model_name:$model_version does not exist in registry $registry_name" - exit 1 -fi - -# 3. Deploy the model to an endpoint -# create online endpoint -az ml online-endpoint create --name $endpoint_name $workspace_info || { - echo "endpoint create failed"; exit 1; -} - -# deploy model from registry to endpoint in workspace -az ml online-deployment create --file deploy.yml $workspace_info --all-traffic --set \ - endpoint_name=$endpoint_name model=azureml://registries/$registry_name/models/$model_name/versions/$model_version \ - instance_type=$deployment_sku || { - echo "deployment create failed"; exit 1; -} - -# 4. Try a sample scoring request - -# Check if scoring data file exists -if [ -f $scoring_file ]; then - echo "Invoking endpoint $endpoint_name with following input:\n\n" - cat $scoring_file - echo "\n\n" -else - echo "Scoring file $scoring_file does not exist" - exit 1 -fi - -az ml online-endpoint invoke --name $endpoint_name --request-file $scoring_file $workspace_info || { - echo "endpoint invoke failed"; exit 1; -} - -# 6. Delete the endpoint -az ml online-endpoint delete --name $endpoint_name $workspace_info --yes || { - echo "endpoint delete failed"; exit 1; -} - - - diff --git a/cli/foundation-models/system/inference/summarization/deploy.yml b/cli/foundation-models/system/inference/summarization/deploy.yml deleted file mode 100644 index 336e5519f5..0000000000 --- a/cli/foundation-models/system/inference/summarization/deploy.yml +++ /dev/null @@ -1,6 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json -name: demo -instance_type: Standard_DS3_v2 -instance_count: 1 -request_settings: - request_timeout_ms: 60000 \ No newline at end of file diff --git a/cli/foundation-models/system/inference/summarization/summarization-online-endpoint.sh b/cli/foundation-models/system/inference/summarization/summarization-online-endpoint.sh deleted file mode 100644 index 6948d59502..0000000000 --- a/cli/foundation-models/system/inference/summarization/summarization-online-endpoint.sh +++ /dev/null @@ -1,79 +0,0 @@ -set -x -# the commands in this file map to steps in this notebook: https://aka.ms/azureml-infer-online-sdk-summarization -# the sample scoring file available in the same folder as the above notebook - -# script inputs -registry_name="azureml-preview" -subscription_id="" -resource_group_name="" -workspace_name="" - -# This is the model from system registry that needs to be deployed -model_name="sshleifer-distilbart-cnn-12-6" -# using the latest version of the model - not working yet -model_version=3 - -version=$(date +%s) -endpoint_name="summarization-$version" - -# todo: fetch deployment_sku from the min_inference_sku tag of the model -deployment_sku="Standard_DS3_v2" - -# scoring_file -scoring_file="../../../../../sdk/python/foundation-models/system/inference/summarization/news-summary-dataset/sample_score.json" - -# 1. Setup pre-requisites -if [ "$subscription_id" = "" ] || \ - ["$resource_group_name" = "" ] || \ - [ "$workspace_name" = "" ]; then - echo "Please update the script with the subscription_id, resource_group_name and workspace_name" - exit 1 -fi - -az account set -s $subscription_id -workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name" - -# 2. Check if the model exists in the registry -# need to confirm model show command works for registries outside the tenant (aka system registry) -if ! az ml model show --name $model_name --version $model_version --registry-name $registry_name -then - echo "Model $model_name:$model_version does not exist in registry $registry_name" - exit 1 -fi - -# 3. Deploy the model to an endpoint -# create online endpoint -az ml online-endpoint create --name $endpoint_name $workspace_info || { - echo "endpoint create failed"; exit 1; -} - -# deploy model from registry to endpoint in workspace -az ml online-deployment create --file deploy.yml $workspace_info --all-traffic --set \ - endpoint_name=$endpoint_name model=azureml://registries/$registry_name/models/$model_name/versions/$model_version \ - instance_type=$deployment_sku || { - echo "deployment create failed"; exit 1; -} - -# 4. Try a sample scoring request - -# Check if scoring data file exists -if [ -f $scoring_file ]; then - echo "Invoking endpoint $endpoint_name with following input:\n\n" - cat $scoring_file - echo "\n\n" -else - echo "Scoring file $scoring_file does not exist" - exit 1 -fi - -az ml online-endpoint invoke --name $endpoint_name --request-file $scoring_file $workspace_info || { - echo "endpoint invoke failed"; exit 1; -} - -# 6. Delete the endpoint -az ml online-endpoint delete --name $endpoint_name $workspace_info --yes || { - echo "endpoint delete failed"; exit 1; -} - - - diff --git a/cli/foundation-models/system/inference/text-classification/deploy.yml b/cli/foundation-models/system/inference/text-classification/deploy.yml deleted file mode 100644 index 336e5519f5..0000000000 --- a/cli/foundation-models/system/inference/text-classification/deploy.yml +++ /dev/null @@ -1,6 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json -name: demo -instance_type: Standard_DS3_v2 -instance_count: 1 -request_settings: - request_timeout_ms: 60000 \ No newline at end of file diff --git a/cli/foundation-models/system/inference/text-classification/text-classification-online-endpoint.sh b/cli/foundation-models/system/inference/text-classification/text-classification-online-endpoint.sh deleted file mode 100644 index 75d193e047..0000000000 --- a/cli/foundation-models/system/inference/text-classification/text-classification-online-endpoint.sh +++ /dev/null @@ -1,79 +0,0 @@ -set -x -# the commands in this file map to steps in this notebook: https://aka.ms/azureml-infer-online-sdk-text-classification -# the sample scoring file available in the same folder as the above notebook - -# script inputs -registry_name="azureml-preview" -subscription_id="" -resource_group_name="" -workspace_name="" - -# This is the model from system registry that needs to be deployed -model_name="microsoft-deberta-base-mnli" -# using the latest version of the model - not working yet -model_version=3 - -version=$(date +%s) -endpoint_name="text-classification-$version" - -# todo: fetch deployment_sku from the min_inference_sku tag of the model -deployment_sku="Standard_DS3_v2" - -# scoring_file -scoring_file="../../../../../sdk/python/foundation-models/system/inference/text-classification/glue-mnli-dataset/sample_score.json" - -# 1. Setup pre-requisites -if [ "$subscription_id" = "" ] || \ - ["$resource_group_name" = "" ] || \ - [ "$workspace_name" = "" ]; then - echo "Please update the script with the subscription_id, resource_group_name and workspace_name" - exit 1 -fi - -az account set -s $subscription_id -workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name" - -# 2. Check if the model exists in the registry -# need to confirm model show command works for registries outside the tenant (aka system registry) -if ! az ml model show --name $model_name --version $model_version --registry-name $registry_name -then - echo "Model $model_name:$model_version does not exist in registry $registry_name" - exit 1 -fi - -# 3. Deploy the model to an endpoint -# create online endpoint -az ml online-endpoint create --name $endpoint_name $workspace_info || { - echo "endpoint create failed"; exit 1; -} - -# deploy model from registry to endpoint in workspace -az ml online-deployment create --file deploy.yml $workspace_info --all-traffic --set \ - endpoint_name=$endpoint_name model=azureml://registries/$registry_name/models/$model_name/versions/$model_version \ - instance_type=$deployment_sku || { - echo "deployment create failed"; exit 1; -} - -# 4. Try a sample scoring request - -# Check if scoring data file exists -if [ -f $scoring_file ]; then - echo "Invoking endpoint $endpoint_name with following input:\n\n" - cat $scoring_file - echo "\n\n" -else - echo "Scoring file $scoring_file does not exist" - exit 1 -fi - -az ml online-endpoint invoke --name $endpoint_name --request-file $scoring_file $workspace_info || { - echo "endpoint invoke failed"; exit 1; -} - -# 6. Delete the endpoint -az ml online-endpoint delete --name $endpoint_name $workspace_info --yes || { - echo "endpoint delete failed"; exit 1; -} - - - diff --git a/cli/foundation-models/system/inference/text-generation/deploy.yml b/cli/foundation-models/system/inference/text-generation/deploy.yml deleted file mode 100644 index 336e5519f5..0000000000 --- a/cli/foundation-models/system/inference/text-generation/deploy.yml +++ /dev/null @@ -1,6 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json -name: demo -instance_type: Standard_DS3_v2 -instance_count: 1 -request_settings: - request_timeout_ms: 60000 \ No newline at end of file diff --git a/cli/foundation-models/system/inference/text-generation/text-generation-online-endpoint.sh b/cli/foundation-models/system/inference/text-generation/text-generation-online-endpoint.sh deleted file mode 100644 index 6b3428d639..0000000000 --- a/cli/foundation-models/system/inference/text-generation/text-generation-online-endpoint.sh +++ /dev/null @@ -1,79 +0,0 @@ -set -x -# the commands in this file map to steps in this notebook: https://aka.ms/azureml-infer-online-sdk-text-generation -# the sample scoring file available in the same folder as the above notebook - -# script inputs -registry_name="azureml-preview" -subscription_id="" -resource_group_name="" -workspace_name="" - -# This is the model from system registry that needs to be deployed -model_name="gpt2" -# using the latest version of the model - not working yet -model_version=3 - -version=$(date +%s) -endpoint_name="text-generation-$version" - -# todo: fetch deployment_sku from the min_inference_sku tag of the model -deployment_sku="Standard_DS2_v2" - -# scoring_file -scoring_file="../../../../../sdk/python/foundation-models/system/inference/text-generation/book-corpus-dataset/sample_score.json" - -# 1. Setup pre-requisites -if [ "$subscription_id" = "" ] || \ - ["$resource_group_name" = "" ] || \ - [ "$workspace_name" = "" ]; then - echo "Please update the script with the subscription_id, resource_group_name and workspace_name" - exit 1 -fi - -az account set -s $subscription_id -workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name" - -# 2. Check if the model exists in the registry -# need to confirm model show command works for registries outside the tenant (aka system registry) -if ! az ml model show --name $model_name --version $model_version --registry-name $registry_name -then - echo "Model $model_name:$model_version does not exist in registry $registry_name" - exit 1 -fi - -# 3. Deploy the model to an endpoint -# create online endpoint -az ml online-endpoint create --name $endpoint_name $workspace_info || { - echo "endpoint create failed"; exit 1; -} - -# deploy model from registry to endpoint in workspace -az ml online-deployment create --file deploy.yml $workspace_info --all-traffic --set \ - endpoint_name=$endpoint_name model=azureml://registries/$registry_name/models/$model_name/versions/$model_version \ - instance_type=$deployment_sku || { - echo "deployment create failed"; exit 1; -} - -# 4. Try a sample scoring request - -# Check if scoring data file exists -if [ -f $scoring_file ]; then - echo "Invoking endpoint $endpoint_name with following input:\n\n" - cat $scoring_file - echo "\n\n" -else - echo "Scoring file $scoring_file does not exist" - exit 1 -fi - -az ml online-endpoint invoke --name $endpoint_name --request-file $scoring_file $workspace_info || { - echo "endpoint invoke failed"; exit 1; -} - -# 6. Delete the endpoint -az ml online-endpoint delete --name $endpoint_name $workspace_info --yes || { - echo "endpoint delete failed"; exit 1; -} - - - diff --git a/cli/foundation-models/system/inference/token-classification/deploy.yml b/cli/foundation-models/system/inference/token-classification/deploy.yml deleted file mode 100644 index 336e5519f5..0000000000 --- a/cli/foundation-models/system/inference/token-classification/deploy.yml +++ /dev/null @@ -1,6 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json -name: demo -instance_type: Standard_DS3_v2 -instance_count: 1 -request_settings: - request_timeout_ms: 60000 \ No newline at end of file diff --git a/cli/foundation-models/system/inference/token-classification/token-classification-online-endpoint.sh b/cli/foundation-models/system/inference/token-classification/token-classification-online-endpoint.sh deleted file mode 100644 index 7ca6b1e351..0000000000 --- a/cli/foundation-models/system/inference/token-classification/token-classification-online-endpoint.sh +++ /dev/null @@ -1,79 +0,0 @@ -set -x -# the commands in this file map to steps in this notebook: https://aka.ms/azureml-infer-online-sdk-token-classification -# the sample scoring file available in the same folder as the above notebook - -# script inputs -registry_name="azureml-preview" -subscription_id="" -resource_group_name="" -workspace_name="" - -# This is the model from system registry that needs to be deployed -model_name="Jean-Baptiste-camembert-ner" -# using the latest version of the model - not working yet -model_version=3 - -version=$(date +%s) -endpoint_name="token-classification-$version" - -# todo: fetch deployment_sku from the min_inference_sku tag of the model -deployment_sku="Standard_DS2_v2" - -# scoring_file -scoring_file="../../../../../sdk/python/foundation-models/system/inference/token-classification/Jean-Baptiste-wikiner_fr/sample_score.json" - -# 1. Setup pre-requisites -if [ "$subscription_id" = "" ] || \ - ["$resource_group_name" = "" ] || \ - [ "$workspace_name" = "" ]; then - echo "Please update the script with the subscription_id, resource_group_name and workspace_name" - exit 1 -fi - -az account set -s $subscription_id -workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name" - -# 2. Check if the model exists in the registry -# need to confirm model show command works for registries outside the tenant (aka system registry) -if ! az ml model show --name $model_name --version $model_version --registry-name $registry_name -then - echo "Model $model_name:$model_version does not exist in registry $registry_name" - exit 1 -fi - -# 3. Deploy the model to an endpoint -# create online endpoint -az ml online-endpoint create --name $endpoint_name $workspace_info || { - echo "endpoint create failed"; exit 1; -} - -# deploy model from registry to endpoint in workspace -az ml online-deployment create --file deploy.yml $workspace_info --all-traffic --set \ - endpoint_name=$endpoint_name model=azureml://registries/$registry_name/models/$model_name/versions/$model_version \ - instance_type=$deployment_sku || { - echo "deployment create failed"; exit 1; -} - -# 4. Try a sample scoring request - -# Check if scoring data file exists -if [ -f $scoring_file ]; then - echo "Invoking endpoint $endpoint_name with following input:\n\n" - cat $scoring_file - echo "\n\n" -else - echo "Scoring file $scoring_file does not exist" - exit 1 -fi - -az ml online-endpoint invoke --name $endpoint_name --request-file $scoring_file $workspace_info || { - echo "endpoint invoke failed"; exit 1; -} - -# 6. Delete the endpoint -az ml online-endpoint delete --name $endpoint_name $workspace_info --yes || { - echo "endpoint delete failed"; exit 1; -} - - - diff --git a/cli/foundation-models/system/inference/translation/deploy.yml b/cli/foundation-models/system/inference/translation/deploy.yml deleted file mode 100644 index 9c7951062c..0000000000 --- a/cli/foundation-models/system/inference/translation/deploy.yml +++ /dev/null @@ -1,6 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json -name: demo -instance_type: Standard_DS3_v2 -instance_count: 1 -request_settings: - request_timeout_ms: 60000 diff --git a/cli/foundation-models/system/inference/translation/translation-online-endpoint.sh b/cli/foundation-models/system/inference/translation/translation-online-endpoint.sh deleted file mode 100644 index 61e1e8337e..0000000000 --- a/cli/foundation-models/system/inference/translation/translation-online-endpoint.sh +++ /dev/null @@ -1,79 +0,0 @@ -set -x -# the commands in this file map to steps in this notebook: https://aka.ms/azureml-infer-online-sdk-translation -# the sample scoring file available in the same folder as the above notebook - -# script inputs -registry_name="azureml-preview" -subscription_id="" -resource_group_name="" -workspace_name="" - -# This is the model from system registry that needs to be deployed -model_name="t5-small" -# using the latest version of the model - not working yet -model_version=4 - -version=$(date +%s) -endpoint_name="translation-$version" - -# todo: fetch deployment_sku from the min_inference_sku tag of the model -deployment_sku="Standard_DS2_v2" - -# scoring_file -scoring_file="../../../../../sdk/python/foundation-models/system/inference/translation/wmt16-en-ro-dataset/sample_score.json" - -# 1. Setup pre-requisites -if [ "$subscription_id" = "" ] || \ - ["$resource_group_name" = "" ] || \ - [ "$workspace_name" = "" ]; then - echo "Please update the script with the subscription_id, resource_group_name and workspace_name" - exit 1 -fi - -az account set -s $subscription_id -workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name" - -# 2. Check if the model exists in the registry -# need to confirm model show command works for registries outside the tenant (aka system registry) -if ! az ml model show --name $model_name --version $model_version --registry-name $registry_name -then - echo "Model $model_name:$model_version does not exist in registry $registry_name" - exit 1 -fi - -# 3. Deploy the model to an endpoint -# create online endpoint -az ml online-endpoint create --name $endpoint_name $workspace_info || { - echo "endpoint create failed"; exit 1; -} - -# deploy model from registry to endpoint in workspace -az ml online-deployment create --file deploy.yml $workspace_info --all-traffic --set \ - endpoint_name=$endpoint_name model=azureml://registries/$registry_name/models/$model_name/versions/$model_version \ - instance_type=$deployment_sku || { - echo "deployment create failed"; exit 1; -} - -# 4. Try a sample scoring request - -# Check if scoring data file exists -if [ -f $scoring_file ]; then - echo "Invoking endpoint $endpoint_name with following input:\n\n" - cat $scoring_file - echo "\n\n" -else - echo "Scoring file $scoring_file does not exist" - exit 1 -fi - -az ml online-endpoint invoke --name $endpoint_name --request-file $scoring_file $workspace_info || { - echo "endpoint invoke failed"; exit 1; -} - -# 6. Delete the endpoint -az ml online-endpoint delete --name $endpoint_name $workspace_info --yes || { - echo "endpoint delete failed"; exit 1; -} - - - diff --git a/cli/jobs/automl-standalone-jobs/cli-automl-text-ner-conll-distributed-sweeping/cli-automl-text-ner-conll2003-distributed-sweeping.yml b/cli/jobs/automl-standalone-jobs/cli-automl-text-ner-conll-distributed-sweeping/cli-automl-text-ner-conll2003-distributed-sweeping.yml index e98dbb900c..2f9cc83b25 100644 --- a/cli/jobs/automl-standalone-jobs/cli-automl-text-ner-conll-distributed-sweeping/cli-automl-text-ner-conll2003-distributed-sweeping.yml +++ b/cli/jobs/automl-standalone-jobs/cli-automl-text-ner-conll-distributed-sweeping/cli-automl-text-ner-conll2003-distributed-sweeping.yml @@ -37,10 +37,10 @@ sweep: search_space: - model_name: type: choice - values: [bert_base_cased, roberta_base] + values: [bert-base-cased, roberta-base] - model_name: type: choice - values: [distilroberta_base] + values: [distilroberta-base] weight_decay: type: uniform min_value: 0.01 diff --git a/cli/jobs/basics/hello-interactive.yml b/cli/jobs/basics/hello-interactive.yml index 8feb26b61c..443234e213 100644 --- a/cli/jobs/basics/hello-interactive.yml +++ b/cli/jobs/basics/hello-interactive.yml @@ -6,13 +6,13 @@ compute: azureml:cpu-cluster services: my_vscode: - job_service_type: vs_code + type: vs_code my_jupyter_lab: - job_service_type: jupyter_lab + type: jupyter_lab my_tensorboard: - job_service_type: tensor_board + type: tensor_board log_dir: "outputs/tblogs" # my_ssh: -# job_service_type: tensor_board +# type: tensor_board # ssh_public_keys: # nodes: all # Use the `nodes` property for a distributed job to run interactive services on all nodes. If `nodes` are not selected, by default, interactive applications are only enabled on the head node. \ No newline at end of file diff --git a/cli/jobs/nebulaml/bert-pretrain-deepspeed/README.md b/cli/jobs/nebulaml/bert-pretrain-deepspeed/README.md deleted file mode 100644 index 16a604dd7f..0000000000 --- a/cli/jobs/nebulaml/bert-pretrain-deepspeed/README.md +++ /dev/null @@ -1,104 +0,0 @@ -## Bert Pretraining With Nebula - -This example will focus on pretraining a BERT model for Masked Language Modeling (MLM) on the GLUE dataset. Bert is a large model and in this article you can learn on tips and tricks to be able to train with high efficiency for compute and memory without impacting the quality of model. - -## Setup: -### Hardware -V100 GPUs (ND40rs) are recommended for this job. This example was originally run using 2 ND40rs nodes with 8 V100 GPUs each. -#### Linear Scaling with Infini band Enabled SKUs -To attain linear scaling for large model, one important step can be to use InfiniBand. InfiniBand enables low-latency, GPU-to-GPU communication across nodes in a cluster. InfiniBand requires specialized hardware to operate. Only some VM SKUs on Azure contain this required hardware. You can view the full list of InfiniBand-enabled machine SKUs [here](https://learn.microsoft.com/en-us/azure/virtual-machines/sizes-hpc#rdma-capable-instances). - -### Setup the environment -The environment found at ``src/envrionments`` is an ACPT environment with multiple accelerators to boost the training job and is available out of the box in AzureML. If you would like to add additional packages, edit the appropriate files in ``src/environments`` with your changes, then create the custom environment using the following command: -``` -az ml environment create --file ./src/environments/env.yml -``` -### Load the dataset -Load the dataset from HuggingFace preprocessed data for GLUE. -``` -def load_raw_glue_dataset(task: str) -> Union[DatasetDict, Dataset]: - dataset = load_dataset("glue", actual_task(task)) - return dataset - -def load_encoded_glue_dataset( - task: str, tokenizer: PreTrainedTokenizerBase -) -> Union[DatasetDict, Dataset]: - """Load GLUE data, apply tokenizer and split into train/validation.""" - tokenizer_func = construct_tokenizer_function(tokenizer=tokenizer, task=task) - raw_dataset = load_raw_glue_dataset(task) - encoded_dataset = raw_dataset.map(tokenizer_func, batched=True) - - validation_key = ( - "validation_mismatched" - if task == "mnli-mm" - else "validation_matched" - if task == "mnli" - else "validation" - ) - return encoded_dataset["train"], encoded_dataset[validation_key] -``` - -### Training script overview -The script first loads the dataset using the load_dataset function and then tokenizes the text using the BERT tokenizer provided by the transformers library. -The tokenization is performed using a mapping function that maps the sentences to the tokenized version with or without truncation based on the value of the truncate_longer_samples variable. Then the BertForMaskedLM model is instantiated and trained using the Trainer class. - -The TrainingArguments object is used to define the training configuration, including the output directory, the number of training epochs, the batch size, and the evaluation strategy. Finally, the DataCollatorForLanguageModeling is used to create a function to collate the tokenized data and train the MLM model. - -### Nebula checkpointing -Nebula checkpoint can be enabled for Pytorch vanilla training as well as Deepspeed. - ---save-model parameter makes sure that model parameter status is written to the output directory mounted in the blob. Under the hood, on rerunning the experiment, job checks if checkpoint is available, it resumes from checkpoint and saves the training time significantly. - -Add below to the ds_config.json to enable Nebula checkpointing: -``` -"nebula": { - "enabled": true, - "persistent_storage_path": "/outputs/nebula_checkpoint/", - "persistent_time_interval": 10, - "num_of_version_in_retention": 2, - "enable_nebula_load": true -}, -``` - -After your job runs successfully, you can see below logs in user logs, to check wether checkpoints have been saved successfully by Nebula or not and how much time it takes to save a file in checkpoints. - -``` -[2023-03-27 03:42:54,860] [INFO] [nebula_checkpoint_engine.py:47:save] [Nebula] Saving pytorch_model.bin under tag checkpoint-20... -[1679888575], size is [219004580], Time difference = 68100[µs] -``` - -## Running the Job -### Submit with Deepspeed -To try BERT pretraining with DeepSpeed, submit the following command from within this directory: -``` -az ml job create --file job.yml -``` -To submit with DeepSpeed, we also need to include a ``ds_config.json`` file that specifies the DeepSpeed configuration. The following configuration file was found using DeepSpeed autotuning on bert-large, which is detailed [here](../DeepSpeed-Autotuning/README.md). -``` -{ - "train_micro_batch_size_per_gpu": 93, - "fp16": { - "enabled": true - }, - "flops_profiler": { - "enabled": true, - "profile_step": 1, - "module_depth": -1, - "top_modules": 1, - "detailed": true, - "output_file": "outputs/profile.txt" - }, - "zero_optimization": { - "stage": 1 - }, - "gradient_accumulation_steps": 1 -} -``` -> NOTE: Make sure the configurations inside your ``ds_config.json`` file are the same as the equivalent arguments in your ``AML-DeepSpeed-submit.yml`` file. For example, ``train_micro_batch_size_per_gpu`` should have the same value as ``--per_device_train_batch_size``. - -Some benefits of using DeepSpeed include: -- DeepSpeed provides features like ZeRO (Zero Redundancy Optimizer) that can help reduce the memory footprint of the model during training. This can be useful when training very large models or working with limited resources. -- With DeepSpeed, you may be able to use larger batch sizes during training, which can help improve the efficiency of the training process. This is because DeepSpeed provides features like ZeRO-Offload, which can reduce the amount of memory needed to store the parameters of the model. - -> Running out of memory during training is a common issue in deep learning, To overcome this issue, Deepspeed stage 3 (zero infinity) can be used which offload memory to CPU/NvME disk. It is recommended to start with smaller batch size. A larger batch size requires more memory to process and backpropogate gradients. - diff --git a/cli/jobs/nebulaml/bert-pretrain-deepspeed/job.yml b/cli/jobs/nebulaml/bert-pretrain-deepspeed/job.yml deleted file mode 100644 index c6ff635545..0000000000 --- a/cli/jobs/nebulaml/bert-pretrain-deepspeed/job.yml +++ /dev/null @@ -1,23 +0,0 @@ -# Training job submission via AML CLI v2 - -$schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json - -command: pip list && python pretrain_glue.py --save_steps 20 --deepspeed ds_config_bertbase.json --num_train_epochs 100 --output_dir outputs --disable_tqdm 1 --local_rank $RANK --evaluation_strategy "epoch" --logging_strategy "epoch" --per_device_train_batch_size 532 --gradient_accumulation_steps 1 --per_device_eval_batch_size 532 --learning_rate 3e-05 --adam_beta1 0.8 --adam_beta2 0.999 --weight_decay 3e-07 --warmup_steps 500 --fp16 --logging_steps 1000 --model_checkpoint "bert-base-uncased" - -experiment_name: bert-pretrain-nebula-ds-optimal -environment: # Should replace your environment -environment_variables: - AZUREML_COMPUTE_USE_COMMON_RUNTIME: 'True' - AZUREML_COMMON_RUNTIME_USE_INTERACTIVE_CAPABILITY: 'True' -code: src -outputs: - output: - type: uri_folder - mode: rw_mount - path: azureml://datastores/workspaceblobstore/paths/outputs -compute: # Should replace your compute -distribution: - type: pytorch - process_count_per_instance: 4 -resources: - instance_count: 4 diff --git a/cli/jobs/nebulaml/bert-pretrain-deepspeed/src/ds_config_bertbase.json b/cli/jobs/nebulaml/bert-pretrain-deepspeed/src/ds_config_bertbase.json deleted file mode 100644 index 454701b405..0000000000 --- a/cli/jobs/nebulaml/bert-pretrain-deepspeed/src/ds_config_bertbase.json +++ /dev/null @@ -1,27 +0,0 @@ -{ - "train_micro_batch_size_per_gpu": 532, - "fp16": { - "enabled": true - }, - "flops_profiler": { - "enabled": true, - "profile_step": 1, - "module_depth": -1, - "top_modules": 1, - "detailed": true, - "output_file": "outputs/profile.txt" - }, - "zero_optimization": { - "stage": 3, - "stage3_gather_16bit_weights_on_model_save": true - }, - "gradient_accumulation_steps": 1, - "train_batch_size": 8512, - "nebula": { - "enabled": true, - "persistent_storage_path": "/outputs/nebula_checkpoint/", - "persistent_time_interval": 10, - "num_of_version_in_retention": 2, - "enable_nebula_load": true - } -} diff --git a/cli/jobs/nebulaml/bert-pretrain-deepspeed/src/environment/context/Dockerfile b/cli/jobs/nebulaml/bert-pretrain-deepspeed/src/environment/context/Dockerfile deleted file mode 100644 index d199635bb8..0000000000 --- a/cli/jobs/nebulaml/bert-pretrain-deepspeed/src/environment/context/Dockerfile +++ /dev/null @@ -1,48 +0,0 @@ -FROM ptebic.azurecr.io/public/azureml/aifx/stable-ubuntu2004-cu117-py38-torch1131:biweekly.202301.2 -# RUN pip install deepspeed -U -RUN pip install git+https://github.com/huggingface/transformers -RUN pip install datasets -RUN pip install accelerate -RUN pip install scikit-learn -RUN pip install apache_beam -RUN pip install evaluate - -RUN pip install git+https://github.com/FreyaRao/DeepSpeed.git@qrao/fix_save_16mit_model_error - -# Install pip dependencies -RUN pip install 'ipykernel~=6.0' \ - 'azureml-core' \ - 'azureml-dataset-runtime' \ - 'azureml-defaults' \ - 'azure-ml-component' \ - 'azureml-mlflow' \ - 'azureml-contrib-services' \ - 'azureml-contrib-services' \ - 'torch-tb-profiler~=0.4.0' \ - 'py-spy==0.3.12' \ - 'debugpy~=1.6.3' - -RUN pip install \ - azure-ai-ml \ - azureml-inference-server-http \ - inference-schema~=1.4.2.1 \ - MarkupSafe==2.0.1 \ - regex \ - pybind11 - -# Inference requirements -COPY --from=mcr.microsoft.com/azureml/o16n-base/python-assets:20220607.v1 /artifacts /var/ -RUN /var/requirements/install_system_requirements.sh && \ - cp /var/configuration/rsyslog.conf /etc/rsyslog.conf && \ - cp /var/configuration/nginx.conf /etc/nginx/sites-available/app && \ - ln -sf /etc/nginx/sites-available/app /etc/nginx/sites-enabled/app && \ - rm -f /etc/nginx/sites-enabled/default -ENV SVDIR=/var/runit -ENV WORKER_TIMEOUT=400 -EXPOSE 5001 8883 8888 - -# support Deepspeed launcher requirement of passwordless ssh login -RUN apt-get update -RUN apt-get install -y openssh-server openssh-client - - diff --git a/cli/jobs/nebulaml/bert-pretrain-deepspeed/src/environment/env.yml b/cli/jobs/nebulaml/bert-pretrain-deepspeed/src/environment/env.yml deleted file mode 100644 index 60eea896a0..0000000000 --- a/cli/jobs/nebulaml/bert-pretrain-deepspeed/src/environment/env.yml +++ /dev/null @@ -1,27 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/environment.schema.json -name: ACPT_Nebula -version: 1 -build: - path: ./context/ -tags: - os: ubuntu - os_version: 20.04 - hpcx: 2.10 - mpi: openmpi - mpi_version: 4.1.2rc4 - ucx: 1.12.0 - cuda: 11.7 - cublas: 11.10.3.66 - cudnn: 8.4.1 - nccl: 2.12.10 - rapids: 22.04 - rdma_core: 36.0 - hpc_x: 2.10 - nsight_compute: 2022.2.1 - nsight_systems: 2022.1.3.3 - nccl_test: 2.11.0 - azureml-defaults: 1.41.0 - mlflow: 1.25.1 - transformers: 4.18.0 - torch: "1.13.0a0+340c412" - pynvml: 11.4.1 diff --git a/cli/jobs/nebulaml/bert-pretrain-deepspeed/src/glue_datasets.py b/cli/jobs/nebulaml/bert-pretrain-deepspeed/src/glue_datasets.py deleted file mode 100644 index 9f7eae536f..0000000000 --- a/cli/jobs/nebulaml/bert-pretrain-deepspeed/src/glue_datasets.py +++ /dev/null @@ -1,154 +0,0 @@ -"""A collection of utility methods for working with the GLUE dataset -Primarily includes methods to: - - Download raw GLUE data - - Process GLUE data with a given tokenizer -Can also be run as a script in which case it will download and process the -GLUE data for a specified task, and use a specified tokenizer to process -the data, which is then written to provided output directory. -""" -import argparse -import os -import logging -from typing import Any, Union, Dict, Callable -from datasets import load_dataset, load_metric -from datasets import DatasetDict, Dataset, Metric # used for typing -from torch.utils.data.dataset import Dataset -from transformers import PreTrainedTokenizerBase, AutoTokenizer - -# specific mapping from glue task to dataset column names -task_columns = { - "cola": ("sentence",), - "mnli": ("premise", "hypothesis"), - "mnli-mm": ("premise", "hypothesis"), - "mrpc": ("sentence1", "sentence2"), - "qnli": ("question", "sentence"), - "qqp": ("question1", "question2"), - "rte": ("sentence1", "sentence2"), - "sst2": ("sentence",), - "stsb": ("sentence1", "sentence2"), - "wnli": ("sentence1", "sentence2"), -} - -# mnli-mm is a special name used by huggingface -actual_task = lambda task: "mnli" if task == "mnli-mm" else task - - -def num_labels_from_task(task: str) -> int: - """Return the number of labels for the GLUE task.""" - if task.startswith("mnli"): - return 3 - elif task.startswith("stsb"): - return 1 - else: - # all other glue tasks have 2 class labels - return 2 - - -def load_metric_from_task(task: str) -> Metric: - """Load the metric for the corresponding GLUE task.""" - metric = load_metric("glue", actual_task(task)) - return metric - - -def get_metric_name_from_task(task: str) -> str: - """Get the name of the metric for the corresponding GLUE task. - If using `load_best_model_at_end=True` in TrainingArguments then you need - `metric_for_best_model=metric_name`. Use this method to get the metric_name - for the corresponding GLUE task. - """ - if task == "stsb": - return "pearson" - elif task == "cola": - return "matthews_correlation" - else: - return "accuracy" - - -def construct_tokenizer_function( - tokenizer: PreTrainedTokenizerBase, task: str -) -> Callable[[Union[Dict, Any]], Union[Dict, Any]]: - """Construct function used to tokenize GLUE data. - Some GLUE tasks (CoLA and SST2) have single sentence input, while the rest - have sentence pairs. This method returns a method that applies the appropriate - tokenizer to an example input based on that tasks sentence_keys. - Args: - tokenizer: A Transformers Tokenizer used to convert raw sentences into - something our model can understand. - task: Names of the GLUE task. - Returns: - A function that applies our tokenizer to example sentence(s) from the - associated GLUE task. - """ - - sentence_keys = task_columns.get(task) - - if len(sentence_keys) == 1: - sentence1_key = sentence_keys[0] - - def tokenize_single_sentence(examples: Union[Dict, Any]) -> Union[Dict, Any]: - return tokenizer(examples[sentence1_key], truncation=True) - - return tokenize_single_sentence - - else: - sentence1_key, sentence2_key = sentence_keys - - def tokenize_sentence_pair(examples: Union[Dict, Any]) -> Union[Dict, Any]: - return tokenizer( - examples[sentence1_key], examples[sentence2_key], truncation=True - ) - - return tokenize_sentence_pair - - -def load_raw_glue_dataset(task: str) -> Union[DatasetDict, Dataset]: - dataset = load_dataset("glue", actual_task(task)) - return dataset - - -def load_encoded_glue_dataset( - task: str, tokenizer: PreTrainedTokenizerBase -) -> Union[DatasetDict, Dataset]: - """Load GLUE data, apply tokenizer and split into train/validation.""" - tokenizer_func = construct_tokenizer_function(tokenizer=tokenizer, task=task) - raw_dataset = load_raw_glue_dataset(task) - encoded_dataset = raw_dataset.map(tokenizer_func, batched=True) - - validation_key = ( - "validation_mismatched" - if task == "mnli-mm" - else "validation_matched" - if task == "mnli" - else "validation" - ) - - return encoded_dataset["train"], encoded_dataset[validation_key] - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--model_checkpoint", default="bert-large-uncased") - parser.add_argument("--task", help="Name of GLUE task") - parser.add_argument( - "--use_fast", - action="store_false", - help="Bool that determines to use fast tokenizer or not. Default is True.", - ) - parser.add_argument( - "--output_dir", help="Directory to store tokenized GLUE dataset." - ) - args, unparsed = parser.parse_known_args() - - tokenizer = AutoTokenizer.from_pretrained( - args.model_checkpoint, use_fast=args.use_fast - ) - - logger.info("Downloading raw") - tokenized_dataset = load_encoded_glue_dataset( - task=args.task.lower(), tokenizer=tokenizer - ) - - logger.info(f"Saving processed dataset to {args.output_dir}...") - os.makedirs(args.output_dir, exist_ok=True) - tokenized_dataset.save_to_disk(args.output_dir) - logger.info("Done!") diff --git a/cli/jobs/nebulaml/bert-pretrain-deepspeed/src/pretrain_glue.py b/cli/jobs/nebulaml/bert-pretrain-deepspeed/src/pretrain_glue.py deleted file mode 100644 index 2dc2879c6e..0000000000 --- a/cli/jobs/nebulaml/bert-pretrain-deepspeed/src/pretrain_glue.py +++ /dev/null @@ -1,124 +0,0 @@ -import numpy as np -import mlflow -import time -from typing import Dict, Callable -import json - -# from dataclasses import dataclass, field -import transformers -from transformers import ( - AutoModelForSequenceClassification, - AutoTokenizer, - EvalPrediction, - Trainer, - HfArgumentParser, - TrainingArguments, -) -from glue_datasets import ( - load_encoded_glue_dataset, - num_labels_from_task, - load_metric_from_task, -) - -# pretraining -from transformers import AutoConfig -from transformers import DataCollatorForLanguageModeling - -# Azure ML imports - could replace this with e.g. wandb or mlflow -from transformers.integrations import MLflowCallback - - -def construct_compute_metrics_function(task: str) -> Callable[[EvalPrediction], Dict]: - metric = load_metric_from_task(task) - - if task != "stsb": - - def compute_metrics_function(eval_pred: EvalPrediction) -> Dict: - predictions, labels = eval_pred - predictions = np.argmax(predictions, axis=1) - return metric.compute(predictions=predictions, references=labels) - - else: - - def compute_metrics_function(eval_pred: EvalPrediction) -> Dict: - predictions, labels = eval_pred - predictions = predictions[:, 0] - return metric.compute(predictions=predictions, references=labels) - - return compute_metrics_function - - -if __name__ == "__main__": - - parser = HfArgumentParser(TrainingArguments) - parser.add_argument("--task", default="cola", help="name of GLUE task to compute") - parser.add_argument("--model_checkpoint", default="bert-large-uncased") - training_args, args = parser.parse_args_into_dataclasses() - - transformers.logging.set_verbosity_debug() - - # with open('ds_config.json') as f: - # my_config = json.load(f) - - task: str = args.task.lower() - - num_labels = num_labels_from_task(task) - - # model = AutoModelForSequenceClassification.from_pretrained( - # args.model_checkpoint, num_labels=num_labels - # ) - - tokenizer = AutoTokenizer.from_pretrained(args.model_checkpoint, use_fast=True) - context_length = 512 - # tokenizer.pad_token = tokenizer.eos_token - # data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False) - - model_config = AutoConfig.from_pretrained( - args.model_checkpoint, - vocab_size=len(tokenizer), - n_ctx=context_length, - bos_token_id=tokenizer.bos_token_id, - eos_token_id=tokenizer.eos_token_id, - ) - model = AutoModelForSequenceClassification.from_config(model_config) - # model.config.pad_token_id = model.config.eos_token_id - # if tokenizer.pad_token is None: - # print("adding pad_token!") - # # note: adding new pad token will change the vocab size - # # to keep it simple just reuse an existing special token - # # https://github.com/huggingface/transformers/issues/6263 - # tokenizer.pad_token = tokenizer.eos_token - # model.config.pad_token_id = model.config.eos_token_id - - encoded_dataset_train, encoded_dataset_eval = load_encoded_glue_dataset( - task=task, tokenizer=tokenizer - ) - - compute_metrics = construct_compute_metrics_function(args.task) - - trainer = Trainer( - model, - training_args, - train_dataset=encoded_dataset_train, - eval_dataset=encoded_dataset_eval, - # data_collator=data_collator, - tokenizer=tokenizer, - compute_metrics=compute_metrics, - ) - - trainer.pop_callback(MLflowCallback) - - start = time.time() - result = trainer.train() - - print(f"Time: {result.metrics['train_runtime']:.2f}") - print(f"Samples/second: {result.metrics['train_samples_per_second']:.2f}") - print("Training...") - - mlflow.log_metric( - "time/epoch", (time.time() - start) / 60 / training_args.num_train_epochs - ) - - print("Evaluation...") - - trainer.evaluate() diff --git a/cli/jobs/nebulaml/bert-pretrain-deepspeed/src/pretrained-bert/vocab.txt b/cli/jobs/nebulaml/bert-pretrain-deepspeed/src/pretrained-bert/vocab.txt deleted file mode 100644 index 311ddfb810..0000000000 --- a/cli/jobs/nebulaml/bert-pretrain-deepspeed/src/pretrained-bert/vocab.txt +++ /dev/null @@ -1,30522 +0,0 @@ -[PAD] -[UNK] -[CLS] -[SEP] -[MASK] - - -! -" -# -$ -% -& -' -( -) -* -+ -, -- -. -/ -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -: -; -< -= -> -? -@ -[ -\ -] -^ -_ -` -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z -{ -| -} -~ -¡ -¢ -£ -¥ -§ -¨ -© -« -® -° -± -² -³ -´ -µ -· -¹ -º -» -¼ -½ -¿ -× -ß -æ -ð -÷ -ø -þ -đ -ħ -ı -ł -ŋ -œ -ſ -ǀ -ǁ -ǝ -ɐ -ɑ -ɒ -ɔ -ɕ -ɘ -ə -ɛ -ɡ -ɣ -ɪ -ɫ -ɹ -ɾ -ʁ -ʃ -ʊ -ʒ -ʔ -ʰ -ʲ -ʷ -ʻ -ʼ -ʾ -ʿ -ˈ -ˊ -ˌ -ː -α -β -γ -δ -ε -ζ -η -θ -ι -κ -λ -μ -ν -ξ -ο -π -ρ -ς -σ -τ -υ -φ -χ -ψ -ω -ϙ -а -б -в -г -д -е -ж -з -и -к -л -м -н -о -п -р -с -т -у -ф -х -ц -ч -ш -щ -ъ -ы -ь -э -ю -я -є -і -ј -ћ -қ -ң -ү -ә -ա -գ -ե -ի -լ -կ -հ -մ -յ -ն -ո -ս -տ -ր -ւ -א -ב -ג -ד -ה -ו -ז -ח -ט -י -כ -ל -ם -מ -ן -נ -ס -ע -פ -צ -ק -ר -ש -ת -، -ء -ا -ب -ة -ت -ث -ج -ح -خ -د -ذ -ر -ز -س -ش -ص -ض -ط -ظ -ع -غ -ـ -ف -ق -ك -ل -م -ن -ه -و -ى -ي -ٱ -پ -چ -ڈ -ک -ڭ -گ -ھ -ہ -ی -ە -ܐ -ܝ -ނ -ރ -އ -މ -ދ -अ -आ -इ -ई -ए -क -ख -ग -घ -च -ज -झ -ट -ड -ण -त -थ -द -ध -न -प -फ -ब -भ -म -य -र -ल -ळ -व -श -ष -स -ह -ा -ि -ी -ो -। -॥ -ং -আ -ই -ক -গ -ঙ -চ -জ -ঞ -ট -ড -ত -দ -ন -প -ফ -ব -ম -য -র -ল -শ -স -হ -া -ি -ী -ে -ਾ -ର -ା -க -ச -ட -ண -த -ந -ன -ப -ம -ய -ர -ற -ல -ள -வ -ா -ி -ு -ூ -ே -ை -ರ -ക -ല -ാ -ി -ක -න -ම -ර -ල -ව -ස -ා -ෙ -ก -ข -ค -ง -จ -ช -ฐ -ณ -ด -ต -ท -ธ -น -บ -ป -ผ -พ -ม -ย -ร -ล -ว -ศ -ส -ห -อ -ะ -า -ำ -เ -แ -โ -ไ -་ -ა -ე -ი -ლ -მ -რ -ᄀ -ᄁ -ᄂ -ᄃ -ᄄ -ᄅ -ᄆ -ᄇ -ᄉ -ᄊ -ᄋ -ᄌ -ᄎ -ᄏ -ᄐ -ᄑ -ᄒ -ᅡ -ᅢ -ᅣ -ᅥ -ᅦ -ᅧ -ᅨ -ᅩ -ᅪ -ᅬ -ᅭ -ᅮ -ᅯ -ᅱ -ᅲ -ᅳ -ᅴ -ᅵ -ᆨ -ᆫ -ᆯ -ᆷ -ᆸ -ᆼ -ን -አ -‐ -‑ -– -— -― -‘ -’ -“ -” -„ -† -‡ -• -… -′ -″ -› -⁄ -₤ -€ -₹ -℃ -№ -™ -⅓ -← -↑ -→ -− -√ -∞ -∴ -≈ -≡ -≤ -≥ -⋅ -─ -│ -└ -├ -█ -● -☆ -♥ -♦ -♭ -♯ -⟨ -⟩ -ⴰ -ⵜ -、 -。 -〈 -〉 -《 -》 -「 -」 -『 -』 -〜 -あ -い -う -え -お -か -き -く -け -こ -さ -し -す -せ -そ -た -ち -っ -つ -て -と -な -に -の -は -ひ -ふ -ほ -ま -み -め -も -ゃ -や -よ -ら -り -る -れ -わ -を -ん -ァ -ア -ィ -イ -ウ -ェ -エ -オ -カ -キ -ク -ケ -コ -サ -シ -ス -セ -ソ -タ -チ -ッ -ツ -テ -ト -ナ -ニ -ネ -ノ -ハ -ヒ -フ -ヘ -ホ -マ -ミ -ム -メ -モ -ャ -ュ -ユ -ョ -ラ -リ -ル -レ -ロ -ワ -ン -・ -ー -ㅅ -ㅇ -一 -三 -上 -下 -不 -世 -东 -中 -主 -之 -九 -也 -乡 -事 -二 -五 -井 -交 -京 -人 -仁 -代 -伊 -会 -佐 -佛 -你 -保 -信 -元 -光 -全 -八 -公 -共 -关 -兴 -内 -出 -別 -利 -前 -劇 -力 -加 -動 -化 -北 -区 -十 -千 -協 -南 -原 -县 -口 -古 -台 -史 -号 -司 -合 -吉 -同 -名 -向 -君 -周 -味 -命 -和 -四 -回 -国 -國 -土 -在 -地 -城 -基 -堂 -報 -士 -夏 -外 -多 -夜 -夢 -大 -天 -太 -夫 -头 -奇 -奈 -女 -好 -子 -字 -学 -學 -宇 -安 -宗 -定 -宝 -宮 -家 -寨 -寺 -寿 -小 -局 -山 -岡 -岩 -峰 -島 -川 -州 -工 -市 -帝 -平 -年 -幸 -庄 -店 -康 -建 -式 -张 -後 -徐 -徳 -德 -心 -志 -悲 -愛 -成 -我 -戦 -房 -所 -手 -打 -摩 -政 -教 -文 -新 -方 -族 -日 -明 -星 -春 -時 -智 -曲 -書 -最 -月 -有 -朝 -木 -本 -朱 -李 -村 -東 -松 -林 -校 -株 -根 -桥 -森 -楼 -楽 -歌 -正 -武 -死 -氏 -民 -水 -永 -江 -池 -沈 -沙 -河 -治 -泉 -法 -津 -洪 -活 -流 -海 -清 -渡 -渭 -港 -湖 -溪 -漢 -火 -為 -無 -版 -物 -玉 -王 -理 -琴 -生 -田 -由 -男 -町 -画 -白 -百 -的 -皇 -直 -県 -真 -知 -石 -研 -社 -神 -福 -科 -秦 -究 -空 -立 -章 -竹 -第 -等 -系 -紀 -經 -縣 -红 -美 -義 -老 -者 -自 -航 -良 -色 -花 -若 -英 -草 -華 -藏 -藤 -行 -街 -西 -要 -観 -角 -言 -記 -話 -語 -譜 -谷 -豆 -赤 -越 -跡 -路 -通 -速 -遊 -道 -郎 -部 -都 -里 -重 -野 -金 -镇 -長 -长 -門 -関 -门 -阳 -阿 -院 -陵 -陽 -隆 -雄 -集 -雲 -青 -韓 -音 -風 -食 -香 -馬 -駅 -马 -高 -麻 -黃 -黄 -齋 -龍 -龙 -ꯀ -ꯁ -ꯂ -ꯃ -ꯄ -ꯅ -ꯆ -ꯇ -ꯈ -ꯉ -ꯊ -ꯌ -ꯍ -ꯎ -ꯏ -ꯔ -ꯕ -ꯛ -ꯜ -ꯝ -ꯟ -ꯡ -ꯢ -ꯣ -ꯤ -ꯦ -ꯧ -ꯩ -ﷺ -! -( -) -, -: -~ - -##8 -##0 -##6 -##h -##a -##b -##e -##n -##y -##o -##z -##i -##t -##l -##g -##u -##s -##k -##d -##r -##f -##c -##2 -##p -##w -##j -##v -##m -##1 -##9 -##3 -##5 -##न -##र -##ी -##7 -##x -##л -##о -##и -##4 -##ह -##ा -##स -##ि -##q -##° -##р -##м -##а -##е -##д -##ر -##ا -##س -##ن -##ð -##ل -##ق -##ت -##ب -##ꯃ -##ꯟ -##ك -##ア -##ー -##カ -##イ -##フ -##ス -##ʼ -##п -##б -##у -##る -##ල -##म -##भ -##ß -##ᅥ -##ᆫ -##ᄑ -##ᅮ -##ᆼ -##ᄀ -##ᅵ -##ܐ -##া -##¢ -##æ -##с -##н -##в -##т -##к -##ณ -##ป -##ร -##ะ -##ᆨ -##ᄋ -##ᅳ -##ᆷ -##ᆯ -##ᅡ -##ł -##و -##ی -##α -##ζ -##ϙ -##σ -##τ -##ν -##ι -##μ -##ᅯ -##ᄎ -##ே -##ற -##க -##ா -##ள -##ⴰ -##г -##ф -##ч -##ध -##ш -##℃ -##ي -##ه -##ı -##ァ -##レ -##ン -##← -##ネ -##ル -##ע -##ו -##ל -##ת -##ø -##ম -##দ -##ᄐ -##ᅩ -##ᄒ -##ᅴ -##י -##ז -##ד -##ʊ -##ə -##ס -##ィ -##د -##थ -##ɐ -##ˈ -##ո -##ե -##ր -##տ -##ի -##υ -##ς -##า -##บ -##น -##ไ -##ท -##ย -##² -##ы -##ム -##メ -##€ -##ى -##ع -##ز -##ח -##מ -##ש -##× -##ɛ -##ŋ -##ћ -##ь -##ᅢ -##ᄌ -##ক -##х -##ъ -##ง -##อ -##ผ -##ด -##¹ -##ο -##テ -##シ -##ョ -##ส -##พ -##ר -##א -##ם -##प -##ल -##ッ -##ج -##ה -##ק -##く -##て -##ᄂ -##ᅧ -##ᄉ -##ᄃ -##ة -##© -##ᅣ -##ᅪ -##ᅦ -##λ -##ω -##œ -##≤ -##ː -##ำ -##त -##の -##я -##נ -##श -##ग -##ন -##য -##व -##і -##ј -##ꯍ -##ꯣ -##ꯡ -##ف -##з -##ρ -##η -##ে -##כ -##ح -##ط -##م -##ە -##ک -##ष -##ट -##¼ -##に -##な -##っ -##し -##ま -##た -##ん -##う -##ঞ -##চ -##গ -##ড -##ذ -##ش -##け -##স -##ি -##ε -##ж -##ュ -##エ -##モ -##タ -##ᄆ -##ᆸ -##ц -##ड -##ᅬ -##ᅨ -##す -##ാ -##χ -##´ -##ত -##হ -##צ -##ʰ -##ю -##キ -##ノ -##ᄇ -##ರ -##ص -##ホ -##ト -##オ -##リ -##κ -##£ -##さ -##द -##ब -##ᄅ -##ラ -##ט -##य -##ħ -##ɔ -##ひ -##き -##り -##ދ -##щ -##ロ -##ニ -##よ -##か -##れ -##や -##お -##も -##え -##ա -##ն -##ワ -##þ -##э -##ட -##ம -##ʻ -##đ -##β -##み -##ェ -##ᅭ -##մ -##կ -##জ -##ী -##ব -##ク -##ण -##घ -##ई -##เ -##ก -##फ -##އ -##މ -##⁄ -##ˌ -##ɪ -##ψ -##ㅅ -##ㅇ -##ɾ -##ג -##チ -##ᄁ -##ʲ -##ɘ -##ு -##ர -##ி -##ண -##ม -##ช -##ว -##จ -##ล -##ห -##क -##ो -##ख -##ธ -##ต -##ハ -##ـ -##ა -##მ -##რ -##ე -##ლ -##ი -##つ -##ら -##コ -##ן -##ү -##پ -##い -##θ -##÷ -##ɕ -##ꯩ -##ꯔ -##ꯎ -##ꯕ -##− -##こ -##± -##ফ -##র -##ळ -##π -##せ -##ء -##ウ -##™ -##│ -##└ -##ʒ -##ث -##─ -##گ -##ข -##โ -##ظ -##ല -##ɹ -##ǝ -##ɑ -##ʃ -##→ -##ᄏ -##ʿ -##ɫ -##ξ -##ᄊ -##қ -##ち -##ふ -##ャ -##ツ -##แ -##ค -##ב -##½ -##ज -##ɣ -##γ -##च -##ھ -##ᅲ -##ſ -##├ -##ল -##ʷ -##ର -##غ -##ⵜ -##ট -##ނ -##サ -##ʁ -##⋅ -##ɡ -##ර -##ස -##ෙ -##خ -##マ -##® -##ہ -##º -##ꯀ -##ɒ -##த -##ய -##ꯤ -##յ -##ി -##գ -##வ -##ை -##そ -##ˊ -##δ -##¥ -##ꯈ -##ꯄ -##∴ -##φ -##は -##ض -##³ -##ٱ -##ս -##झ -##ศ -##ฐ -##ゃ -##ᅱ -##න -##ම -##ා -##լ -##☆ -##ꯌ -##ꯜ -##ව -##め -##ல -##ூ -##と -##শ -##ミ -##ケ -##¨ -##ڈ -##∞ -##ப -##ʾ -##ꯧ -##ই -##প -##ソ -##ꯦ -##ക -##ன -##ச -##µ -##ヒ -##ঙ -##ւ -##♥ -##ᄄ -##ܝ -##ꯛ -##פ -##ヘ -##ା -##ந -##ә -##♯ -##ꯝ -##ን -##█ -##ڭ -##ꯂ -##ং -##ਾ -##~ -##є -##セ -##ナ -##ක -##⅓ -##わ -##ほ -##ꯢ -##ʔ -##ꯇ -## -##あ -##ރ -##इ -##ꯏ -##ң -##を -##ꯁ -##ꯅ -##♭ -##چ -##ꯊ -##ꯉ -##अ -##ǁ -##≡ -##ꯆ -##ǀ -##հ -##ユ -##√ -##আ -##er -th -the -##an -##es -##in -##on -##or -##en -##ar -##ed -##at -in -##al -of -##is -##it -##ic -an -##as -and -##om -##ou -##ing -##le -##re -##ion -19 -to -is -##th -##il -##am -##el -##ent -##ol -##st -20 -re -##ct -st -##ch -was -##ur -##ir -##ad -##ers -##iv -##ro -on -fr -##et -##ay -for -it -##id -##ce -##ist -##us -##op -he -##ot -##ow -be -al -##ig -de -ch -##ov -##im -as -##ian -##ly -##ation -##ag -wh -##em -##and -##un -##ut -##ies -from -##oun -##ter -##ia -##ul -sh -##os -com -un -##ces -##ra -##mer -##ish -se -##all -##um -by -##est -201 -##ith -at -200 -##ber -##ther -##art -##ere -pl -##fer -or -are -##av -with -##od -##ck -##ican -ne -##ak -##ap -##ity -##ie -pro -##ain -con -that -##ill -le -amer -en -pe -pr -ar -##ab -sp -##if -##ople -##ip -##feren -##ard -american -##ess -referen -##ath -references -people -##ant -##eb -##igh -##ate -##ld -##ment -199 -mov -##ich -##ud -his -us -te -wor -##ame -##ge -##ary -##ver -##ub -other -##rit -##ort -act -##so -##our -##orn -18 -##og -##ust -play -##ong -##ast -##ount -can -##ial -they -##ell -mar -##ated -##se -man -##ore -##ost -ex -##ive -##ue -198 -##ine -##ound -this -su -##ear -also -tr -##oc -cl -##man -new -ab -di -not -af -##gh -##ight -has -##land -##ok -##ome -web -##ors -##ates -pol -##ph -##ang -##ical -count -##lish -##sit -##ave -197 -##ited -she -gr -col -bo -were -part -comp -ro -fir -websit -##ach -bir -sc -##age -comm -##end -bec -##ik -##ib -##per -##ac -##ect -##ack -##own -##ry -first -##ure -##ord -me -ser -car -born -one -##ey -united -death -##out -##sp -which -##iz -nor -##pt -her -##ths -##ions -##pp -##ond -##ict -mus -ad -##ass -##ember -196 -eng -fo -##ime -kn -##ide -ind -##ans -##ational -states -all -have -##ory -##ern -websites -##ous -##cl -##amp -##wn -births -##du -##ough -liv -##qu -##00 -##ru -there -##ice -city -##ician -##ance -year -##aw -tw -##mp -ph -dis -par -##ater -##ade -jo -##ace -after -ag -##ens -but -had -##ball -cent -##ree -their -deaths -##der -##aus -sou -who -195 -##tern -194 -may -##ision -br -##to -polit -##ics -##io -rep -bl -movies -fl -##ign -ac -pres -world -##lect -call -off -##te -its -foot -193 -##les -##ual -##ug -##ec -per -##ild -some -most -about -##ah -##ts -many -when -known -##ward -mon -football -go -sing -war -res -##act -co -ger -##ick -min -county -##lev -17 -##ire -##ings -##ents -movie -202 -two -cont -##ivers -##ake -##ied -##we -used -called -##iss -bas -work -##ould -writ -##one -##ff -ev -commun -do -rel -##ations -##ence -up -##ery -dr -##ities -sy -##ral -pop -192 -##ments -##ind -##ince -south -##ton -sch -gen -north -##istr -time -##ames -german -##ists -rec -made -##ult -reg -##old -national -##ress -##ail -##ile -##0s -##au -##ite -##ork -##span -found -##ons -##ose -so -living -gre -politician -bu -est -telev -am -actors -pre -##als -will -##oup -##clud -brit -television -sec -dep -state -hist -##ten -char -ear -##rect -more -10 -##apan -includ -japan -died -english -##iver -app -sw -aust -##olog -no -##ition -we -##orm -des -gu -univers -##ike -##uring -##ident -fam -out -into -bet -##ause -elect -##ese -16 -been -jan -##ages -cal -bro -##ne -king -sm -##az -gov -over -direct -po -song -##ague -##ier -sa -##ays -nov -##oll -##ins -15 -group -ge -govern -fe -##gan -air -##duc -distr -##ablish -tra -##ina -team -##ft -##vel -med -##raph -##ix -##ife -##ank -##ubl -alb -mo -ap -lar -cr -##ica -##erson -league -##ury -joh -british -during -award -years -12 -them -series -ma -bel -##inn -offic -hel -##ock -##enn -popul -art -##ase -only -how -##ys -form -cre -ed -cap -town -river -high -##anc -establish -##ep -became -##inal -##hip -because -##tle -president -##urr -##ood -##urn -ter -jun -name -mun -##uary -##son -11 -dif -##eral -att -stud -##ope -##ograph -ju -france -john -##icip -january -york -##ily -la -prov -music -qu -##gr -##uss -university -these -like -district -show -than -him -##ool -row -rele -##imes -2010 -played -album -start -##ale -##int -##ama -##ween -14 -spec -##ench -player -##cess -13 -between -serv -##ism -191 -march -em -thr -##ash -##ley -history -imp -##ann -book -##iel -##up -star -eur -##able -##ci -intern -ital -rom -el -bar -##iet -aug -##ely -person -##air -french -##ouse -lang -municip -end -very -chr -municipal -main -inf -met -##ased -2020 -set -austral -##ional -canad -august -where -again -oct -under -long -second -##ower -##ve -such -str -later -depart -three -rowspan -official -##res -##ob -then -best -mil -exp -sept -num -##way -##yp -school -##ril -##ures -former -arm -record -##ai -##ublic -##til -##cer -law -life -##ction -september -##are -##ober -langu -##iam -july -establishments -##ually -october -##mber -sci -june -sl -##ason -mod -child -care -old -dece -april -champ -##lin -##ister -differ -west -would -##ative -century -##ke -since -lead -sub -##ks -##fore -##ital -december -party -different -europe -##ather -sur -players -won -november -2017 -##emb -related -wom -har -inst -games -##ium -cons -2007 -lo -feb -mat -fil -actor -game -island -until -##ren -je -sim -soc -##ruary -day -inter -if -february -ii -div -def -back -##aint -##amed -area -##pen -four -2014 -sk -ret -released -2006 -##sh -2019 -prof -ang -age -##rop -##ement -land -2008 -ste -list -sport -through -##ron -##eng -club -bus -##oss -department -##und -before -small -##stem -band -loc -bra -2018 -##ble -use -member -##ious -##ants -##ri -house -phil -international -russ -acc -family -##ss -2005 -government -kill -inv -system -##ology -##bert -##ced -##ets -well -##let -region -make -water -##ats -il -##ott -general -career -gl -##thern -2009 -number -ob -##ms -rock -ever -##ution -21 -25 -##ating -red -same -##cy -hum -dem -singer -2004 -##its -anim -compan -rem -##gest -christ -000 -mill -geor -2016 -##ography -val -my -##cc -##ural -##ise -2011 -often -##amm -2015 -mu -produc -30 -##ull -##ange -east -cities -typ -##row -mak -2000 -##ds -190 -did -2001 -im -being -bat -##ung -##ont -ann -id -champions -near -##ink -calif -wal -californ -##co -##bo -##omet -california -japanese -based -##over -##ever -##ines -##tain -website -##ator -##ae -##ject -you -actress -2013 -right -both -##oph -leg -season -stat -2003 -vo -any -##ific -named -afr -##yl -roman -##ollow -early -tre -orig -##iew -##els -port -language -place -bre -vill -cor -indian -pages -lit -own -organ -charact -each -miss -##ives -2012 -##gy -##dom -mag -build -ass -country -england -dec -tex -italian -24 -great -ep -population -winn -##ee -2021 -oper -started -##ove -##ought -lu -rev -ir -germany -follow -run -now -ec -add -lond -##ger -arch -mich -sen -examp -##ark -2002 -disc -##bur -london -comed -said -arr -against -##ches -get -##ty -22 -black -profess -##yn -##red -pa -##ony -around -help -nat -while -const -women -23 -sal -du -##velop -##ize -cro -last -inc -usually -supp -av -perf -however -maj -ke -fin -ra -##ourn -develop -##eter -tak -##ular -mid -common -oly -##ene -minister -##ric -sever -au -olymp -live -##elf -##ries -##ful -colspan -##ison -produ -vide -##ane -class -pat -including -politicians -189 -children -head -way -kingdom -##ody -home -##ians -##ved -municipality -##ara -int -1999 -26 -cour -power -sol -##ling -rad -appear -bi -##atic -##ana -hol -communes -curr -big -musician -28 -could -##other -god -design -##iness -wr -import -##ains -sum -##ago -##ried -##istan -mount -trans -compos -major -##inc -several -##br -film -commune -lou -just -repres -chur -contr -far -##lo -##omin -mal -cup -##anish -##ones -progr -championship -##ined -##ield -27 -what -var -##ode -park -caus -san -##ows -province -thir -##uk -cancer -william -open -cast -##ner -##by -vir -memb -gold -super -##itz -down -##ret -##ible -canadian -good -##onom -eas -prot -##ille -sign -attack -##estern -colle -business -##ized -del -1998 -vol -video -##ipp -ber -large -29 -youn -india -times -mer -every -america -mass -##cr -members -##ze -##day -swed -##ler -ext -capital -1996 -phys -lived -another -chang -tit -rich -cens -public -pub -australia -vers -top -188 -represent -##augh -white -son -##cle -important -pass -vi -men -##ute -##omb -current -##ington -##ues -company -##he -directed -##ney -left -pal -writer -1997 -saint -emp -popular -31 -li -example -##ico -##ald -order -began -million -camp -census -adv -short -##ham -spe -##ax -##ilt -drama -1995 -##min -##adem -present -publish -##que -dav -created -conf -tem -milit -original -##writ -##ature -186 -##lex -church -educ -##ting -famous -##ka -1994 -##tal -dise -##soci -center -mark -##na -histor -##ata -uk -line -giv -##esh -suc -##cept -even -real -species -##ams -somet -writers -civ -associ -tur -##ief -win -hock -don -##erv -prime -much -grand -books -##ps -bill -hockey -##ois -bur -##ford -cult -mor -democ -ref -ant -groups -##side -sometimes -union -director -central -academ -1992 -political -187 -cath -mex -spanish -gra -##ived -australian -##ven -countries -means -still -angel -manag -scient -##app -##aster -light -##fect -##ett -##ices -los -russian -civil -1993 -##me -##burg -##aur -roy -include -six -army -see -written -100 -flor -mean -##ider -republic -songs -sea -human -##istics -military -##ior -occ -married -##ida -econom -##ided -gir -pos -coun -##ruct -enter -auth -##thers -james -##thing -1990 -father -##idge -##ma -tal -third -##ena -western -activ -special -##ract -hon -##led -comedy -##eople -support -comput -earth -heart -##ino -wind -##xt -##gin -paul -fre -built -1991 -##ves -##ky -chem -sold -##ania -##sc -gree -journ -largest -##ted -cer -hal -stand -##pl -rh -##tt -college -lab -five -kh -ver -stage -cy -cam -body -jack -##itar -louis -vict -##aut -does -##aces -contin -sil -##chn -towns -##ides -canada -km -texas -##rog -##ansas -story -wrest -northern -chic -engine -total -democr -division -##rote -bal -##umb -cat -##ots -que -##ired -1989 -disease -braz -professional -fem -george -##ush -epis -hy -##uck -color -mart -need -er -chicago -hand -##yle -##ael -kore -want -conn -proble -wrote -final -single -pak -went -rece -brazil -awards -mur -##epend -deb -aut -##af -governor -##ators -##iod -dut -independ -franc -##berg -austr -##ius -social -came -lat -given -green -tour -arg -vis -##arn -young -##ars -footballer -jew -seat -become -battle -##ya -china -1988 -wash -1987 -success -##bs -net -word -ve -along -term -author -rul -things -works -food -##ency -took -185 -stre -card -ent -ice -##alth -voice -albums -summer -election -1986 -equ -moved -##ases -modern -natural -expl -fire -##work -resp -sun -medal -period -##gress -type -##be -##gether -rail -fed -##ids -hall -##ised -using -look -mont -##yr -held -##aced -eff -statistics -##ges -fri -ast -got -european -together -mad -character -compet -footballers -1983 -##ness -following -hard -1980 -trad -##ring -##ories -science -scott -dev -municipalities -##ends -1984 -techn -##olution -rest -mot -##ville -##par -sn -##por -events -chin -1979 -return -##uel -##raft -atl -guitar -put -##che -killed -bad -##ured -robert -turn -##gar -court -royal -station -##ival -pakistan -sym -road -role -##osed -1985 -parts -hen -1981 -radio -relig -##ady -##iety -##cil -##port -aged -squ -##ula -journal -david -##ano -southern -jose -love -fun -take -secret -level -1982 -##aving -##ally -crit -sov -italy -tran -worked -##amb -founded -ide -few -free -descr -washington -african -cop -published -next -##avy -##ctor -ru -event -hor -stop -muse -ben -served -##ta -1976 -fif -mother -jud -plays -##ilar -##uted -##itch -##ells -thom -##ero -scre -musical -sports -##ek -site -ath -personal -##ander -association -hot -pur -##itions -fall -connect -cov -leader -sent -musicians -sex -ho -village -##ota -plan -christian -sat -space -sound -cant -##ij -broad -1972 -find -##writer -little -sit -##omes -##olic -chinese -emper -nu -##iction -mc -press -days -side -perform -cir -1974 -den -making -angeles -##oth -kar -##lands -stars -ce -##ote -night -##dle -##arch -council -dou -belie -africa -##ically -math -virgin -ele -inform -##antic -##ility -caused -less -version -tv -beg -ur -##ien -democratic -et -illin -det -lot -similar -illinois -non -pict -asia -paint -museum -feat -dutch -1975 -che -local -lake -empire -##ple -carol -tom -184 -charles -##itzer -sam -ox -michael -girl -pun -1973 -fail -florida -##ization -say -richard -announ -post -wat -rob -ham -georg -reach -wood -##eth -1978 -##rug -point -resear -records -ten -those -alex -collect -ara -##ways -pet -host -cle -mel -conv -research -hill -mary -ze -admin -producer -office -artist -today -1977 -grow -##lu -thought -coast -##ster -frank -##osp -prev -fish -characters -##aj -##rael -control -places -bank -dan -fact -bord -fight -phot -lim -1970 -money -without -rights -dam -academy -test -##osh -32 -1971 -repl -50 -##go -computer -##iers -az -dest -eastern -##line -##ude -rap -territ -spain -##emat -read -##nes -building -##ying -beh -soviet -prize -islands -##ona -israel -speak -opp -hit -animals -lost -mem -service -##ropical -1969 -seven -##west -style -must -1968 -led -novel -mac -sportsp -prem -emperor -##ental -kansas -##ified -fer -sportspeople -tor -##ially -##cient -20th -force -key -peter -middle -cross -paris -dist -oh -screen -mult -##ki -away -##odes -teach -sour -exper -accord -settle -strong -poet -##ged -##itional -##ump -40 -kind -183 -due -nomin -report -others -race -shar -allow -prim -wil -singers -prefect -exec -##atives -republ -##oles -##aves -ancient -##rew -elected -1967 -penn -protect -bay -makes -instit -let -sant -received -cla -process -blue -gar -lib -##ror -clim -track -##ye -greek -administr -ol -late -storm -##eration -##sy -mexico -sweden -tro -##ued -olympic -daugh -ball -##ention -information -##de -switzer -trib -bor -surv -##ais -should -1964 -switzerland -fur -title -pan -##ergy -problems -alth -turk -located -parl -gal -low -wall -##lic -pap -produced -##asc -constit -struct -##ado -composer -##onia -norm -33 -formed -catholic -network -oce -##den -##ani -languages -although -##ning -indust -alm -effect -##oma -poss -pers -hu -round -know -brown -according -##overed -##gypt -wife -change -areas -##oms -os -cit -##iga -community -shows -pen -gall -consider -seen -virginia -##ours -parliam -hap -##gian -##ari -describ -hurr -coach -egypt -treat -##ape -##ored -##ender -##ross -181 -health -playing -##cent -##wood -hold -##ead -though -thomas -nob -##ha -henry -##ket -frog -mathemat -##ita -ha -republican -hurrican -chief -##att -##ops -der -mos -society -##adu -parliament -##ained -1966 -news -##cast -prom -half -##my -ship -congress -queen -gun -ill -scientists -##ults -education -##ended -##ument -1960 -##of -##anch -##bc -##ground -1945 -ok -##iana -##med -teams -##osoph -##olf -##nd -##thod -words -##ards -##icle -changed -tenn -dra -##oy -voc -energy -uses -##ga -pu -##ished -esp -tropical -joined -philosoph -buildings -prefecture -182 -birth -##eball -hun -blood -34 -eth -match -mix -mountain -1965 -engineer -includes -35 -mostly -woman -cur -prison -##ini -never -police -transp -female -##ength -among -bab -sand -baseball -earl -train -culture -##ipt -airport -les -bud -cand -happen -##ris -##ba -swedish -##lam -russia -1963 -product -come -##oon -defe -36 -iran -francis -##hl -dead -too -full -##ii -##asons -carolina -##une -announced -field -##osaur -##ura -##etic -##field -olympics -model -##aland -boy -gave -##ersey -development -##fl -iv -winners -##ims -viol -brother -types -##wards -action -chart -##ylv -prince -##za -mr -argent -sar -basket -episode -hung -##ient -whe -project -bul -learn -within -##era -##ements -entertain -commit -##aly -lawy -comes -1956 -##ox -mic -##lad -##ylvania -##craft -plant -sho -van -##anger -##akes -##gen -##aries -##the -appeared -counties -result -better -jer -##ances -hosp -##uc -inh -bc -1961 -notes -program -rac -##ima -scot -production -##olk -daughter -distrib -##isc -federal -winning -victor -map -##uit -##ission -think -iss -included -once -##ogn -##eland -break -movement -cann -1962 -##aff -tim -arab -##icles -scottish -dog -##ste -lives -1950 -georgia -jersey -cho -having -penns -forces -##ef -basketball -instead -separ -##idents -##ada -##ological -1947 -chemical -pie -##igan -##ests -##abit -gh -jewish -geography -##wer -pot -lower -##bers -##iff -pennsylvania -view -comb -ohio -wanted -almost -martin -ocean -board -##bon -##bor -liter -street -brazilian -1948 -coll -independent -##view -perfor -mach -die -dig -##ube -developed -keep -kent -pain -##awa -intro -bang -cond -##ban -##lete -##ware -##gb -meaning -cost -established -##shire -hurricane -flight -aver -##ondiss -front -##ither -##wa -##itect -es -179 -arrondiss -1946 -##oming -##ique -select -portug -songwriter -##ming -says -##arian -metal -done -##lor -ire -polish -##get -chann -1951 -180 -canton -##ference -becomes -crime -rank -influ -pian -appro -bern -lee -1957 -jul -object -medic -nick -##sych -fest -iii -architect -##aria -gro -##ador -plants -theat -ireland -studio -sus -1958 -1959 -premier -90 -##cul -irish -dyn -refer -week -1940 -smith -##east -past -##eder -invol -representatives -1942 -##ints -fox -1944 -settlements -working -##ym -##ca -1949 -box -covid -transport -names -wrestling -gard -bridge -##ols -columb -1954 -cra -ground -study -##ellow -bass -eight -proper -1930 -##mon -travel -colon -temper -winter -studied -1939 -position -##ny -mater -##itor -##iest -##iple -1952 -magaz -fort -##ters -failure -##pre -companies -##ification -##agon -##ato -1941 -som -##gg -1936 -react -islam -max -##ences -nar -zealand -nether -above -incre -able -##iation -border -##ared -media -70 -psych -##etts -condu -##50 -1934 -##col -##roy -taken -##men -##achus -burn -anth -highest -stan -wild -rain -##ala -sem -##vir -centre -transl -electric -##elling -review -##ih -##ament -considered -##istry -1955 -soft -1929 -##iger -austrian -1990s -1933 -##isters -netherlands -give -jim -meas -here -qual -sir -decl -fly -massachus -vot -##ologist -chair -ul -##ola -currently -joseph -massachusetts -heavy -##uts -murder -ins -priv -mayor -release -libr -##ev -glob -biography -corn -requ -manager -pictures -pas -stra -latin -hospital -might -##tar -##off -##ination -talk -month -##ems -##zo -districts -fiction -move -secretary -korean -performan -average -eat -material -##self -##oul -##acy -opera -##eds -citiz -cause -37 -dark -territory -wars -walk -ven -ww -jean -writing -pay -edward -haw -least -##ulf -tree -broadcast -##ston -cell -rivers -revolution -senate -38 -##ately -##isl -himself -1931 -fa -##ishop -fig -1935 -friends -##ils -lord -opened -##eld -aud -##leg -##reg -certain -ros -various -data -1943 -wales -cart -##vers -##owa -asian -korea -drug -1953 -dar -1932 -castle -hom -valley -case -parish -golden -##less -pac -austria -nucle -1938 -1937 -mir -##sel -retired -##atory -1928 -silver -##stein -relations -carl -numbers -goal -professor -happ -op -##board -bomb -cycl -railway -takes -tar -obs -##itan -always -medical -across -80 -##ja -successful -lik -##20 -lie -##mit -complet -text -##gl -doc -##ua -conserv -advent -1980s -jackson -liber -##vin -da -meet -ren -##ested -inside -fore -either -##umn -bob -##10 -exist -nations -your -close -micro -scotland -##unch -##face -originally -##ently -climate -stad -##ouri -1926 -##used -mainly -##oo -recorded -##iy -mah -native -roles -drum -students -19th -39 -amount -animated -60 -ba -leaders -symb -princ -performed -##inted -500 -jr -##zech -##ably -##azz -male -journalist -something -fle -philipp -##hol -method -##utive -michigan -deg -1927 -ident -##any -magn -crash -czech -changes -orch -45 -reb -ask -artists -candid -results -nominated -envir -recogn -##urs -page -amb -schools -kr -##itive -villages -beat -##ression -##ending -sav -##abeth -months -espec -##aka -##aign -##erb -aff -##oh -presidential -theory -fourth -mountains -channel -fut -##ole -arts -bav -missouri -##ruction -cab -especially -##form -mexican -ray -pope -entertainment -prin -cele -seats -serie -iowa -systems -nobel -assemb -##iev -capt -##ques -##bour -##utes -outside -instr -astron -lin -##aker -##band -range -doctor -environ -##ette -##ales -##ella -ended -64 -mas -circ -goes -causes -##ores -##dy -services -institute -tax -pract -##mar -##com -1925 -domin -peace -gradu -##hi -disney -size -##inning -##ively -idea -anton -##read -destroy -atlantic -gas -sequ -##owers -plat -bron -178 -magazine -previous -wid -cub -shown -aircraft -##uments -ukra -viet -honor -campaign -fac -##wan -##ert -duke -##ios -abb -programs -saf -square -1970s -##apt -fame -brand -##ias -##oke -hyd -prob -log -festival -champion -##elle -maria -##stand -going -electron -##iles -speed -##igr -lay -dynast -esc -hus -##li -signed -nav -##orf -##outh -market -helped -britain -enough -fair -background -ken -independence -added -birds -##och -danc -soon -ca -inj -musl -dance -##la -stone -windows -dre -laws -pred -our -pit -##kyo -library -prop -imm -rout -religious -friend -internet -further -foreign -cannot -##lah -rome -vice -reached -sister -possible -spir -##ites -##play -##rel -sax -##jan -naz -mess -htt -northwest -ak -upper -##enc -##edia -date -tun -programm -regular -invent -bost -animal -alp -ash -1924 -42 -roll -##ivid -replaced -##ex -diff -syn -bon -pil -seasons -kil -##aska -stadium -stories -2000s -concer -73 -##agn -##ense -agre -chap -theatre -minor -tokyo -environment -##ume -87 -electr -75 -newsp -1960s -killing -technology -##urt -74 -moon -89 -##nam -singles -committe -##11 -##illa -decided -allowed -organization -jac -##ko -##aged -88 -cover -newspap -miles -hop -##rey -##icular -86 -##idence -quick -bands -rod -##mark -1923 -hous -stay -boston -orchest -##ux -##oto -interview -points -stock -1922 -cut -leon -assembly -historical -##inson -poland -##gn -oklah -ran -##ounds -oil -commer -autom -ark -episodes -oklahoma -##ensive -divided -ut -executive -individ -invest -2010s -00 -##win -returned -tourn -##har -officer -dom -inse -##ened -wel -##cher -forms -##anda -##empt -cas -trade -barb -wwe -surr -bell -cases -##heim -oldest -parents -performance -ron -features -longer -ach -flag -##airs -length -winner -##chester -louisiana -pacific -rang -smaller -standard -saw -##cel -cry -celebr -minnes -92 -2022 -1900 -founder -master -##sen -clos -individual -##12 -##aki -legisl -ram -minnesota -##lines -##ateg -gallery -brain -ts -traditional -source -except -feel -##bach -##teen -simp -ta -43 -aqu -##uf -##establish -gramm -beginning -fifa -##ipl -##lications -din -tam -##ped -goals -egg -gian -conc -scr -dynasty -true -avail -##ati -##ips -bavaria -##orial -bow -discovered -rule -##use -soldiers -columbia -religion -block -introduc -##ucky -##rd -committee -commission -177 -jazz -##don -assist -align -charl -critic -##iro -activist -atlant -##well -origin -yellow -85 -disestablish -tall -chile -eliz -vocals -1920 -bbc -arth -##ights -inhabit -famil -awarded -##yan -ther -humans -francisco -ring -ris -steph -launch -sel -appo -kid -##wh -##oor -table -online -designed -alexander -collection -cook -berlin -65 -formula -studies -genus -shot -examples -cold -fast -iron -##chan -##hr -surface -83 -##ladesh -96 -bangladesh -ic -code -someone -nhl -rat -##uch -neigh -##aine -##iller -84 -lady -##riage -od -##isions -albert -shoot -neg -command -48 -deep -regions -bru -##iding -##endo -hero -gets -##ected -##icult -##agan -kentucky -championships -citizens -fel -private -jones -99 -##hood -pier -contains -job -literature -fc -mcc -##ria -##ience -1921 -arrondissement -available -##bit -medicine -77 -##ades -stations -##for -swiss -oppos -##wor -spring -54 -95 -emer -lgb -lawyer -jes -##ali -jeff -pand -pick -##rid -lgbt -ey -continued -mario -higher -rog -future -deput -##people -##inces -rules -91 -97 -motor -jon -sug -armen -kings -79 -##ken -bol -cells -lov -dun -alab -running -starring -##tario -forest -romantic -finished -##ira -##writers -beach -wik -##can -weap -ontario -introduced -usa -particular -economic -##ropol -##mann -44 -anti -##rick -politics -businesspeople -##adel -##selves -ess -1918 -believed -beaut -nuclear -##rupt -41 -93 -bg -believe -joe -face -##inet -larger -princess -alabama -46 -bishop -captain -##ela -told -legal -##ji -structures -provinces -coup -##lers -##rich -victoria -ministers -##color -1st -kong -navy -02 -ways -82 -quest -nation -software -letter -nine -visit -fant -ly -##ouch -##mond -businessman -##illy -##ros -##ora -elizabeth -constitution -fal -##idae -accident -59 -johnson -47 -commonly -self -##din -##loy -98 -bund -themselves -described -happened -bgcolor -##akh -access -below -von -##tim -article -debut -athlet -sud -##iced -tried -particip -1919 -senator -daniel -foc -fict -relationship -##va -directors -racing -labor -76 -##aught -##elly -##iter -difficult -americans -promot -brought -temperature -selected -attempt -78 -feet -santa -subject -inhabitants -##ila -##mont -bh -www -claim -##ving -affect -painter -##urity -unit -spread -active -sexual -org -freed -impro -alph -presidents -belgian -behav -##ito -throughout -##19 -myth -paraly -belong -##str -occup -##inger -tenness -66 -##rial -##ova -starting -##onto -kim -mob -matches -##aps -##burgh -gener -biggest -disestablishments -fictional -##ker -mole -corpor -justice -nint -##ipe -##mas -##inian -##itt -guard -itself -acting -tennessee -suic -indones -plann -63 -72 -harry -04 -##war -##oud -additional -py -cru -lines -edition -##ffer -husband -nic -##emon -owned -kenn -base -ah -wide -reading -distributed -swe -##ift -carbon -industry -94 -##zy -metropol -nintendo -##iment -sle -physics -tell -##gu -sab -##kh -##elled -problem -nature -##ram -fav -##anz -##anth -mississ -08 -quar -##ippi -communic -airlines -remov -sources -arthur -elev -supre -##ads -azerb -##ders -##aining -marriage -evolution -williams -vote -schol -bull -stro -conference -ideas -basic -azerbai -##emic -create -clubs -51 -##iving -watch -67 -ko -grew -astronom -57 -planet -foss -temple -lett -##osph -accept -ky -paralymp -reason -profile -##asa -herit -##mir -alf -polic -crick -reported -##ential -##onsin -israeli -03 -branch -calais -52 -ira -hours -49 -##uth -raised -girls -wisc -nap -300 -wear -indiana -mission -spok -needed -andrew -bod -argentine -spl -wisconsin -enc -photograph -document -thous -multiple -thousand -path -enem -kir -##phia -administrative -families -ott -mand -##tra -##aven -generally -producers -behind -##imin -followed -damage -marsh -luc -toronto -heritage -leading -fu -pul -primary -older -wa -68 -ago -nord -spr -arkansas -##bridge -passeng -mississippi -##ule -1917 -suggest -orchestra -rose -write -##itude -harv -elections -interest -06 -##inct -##eas -##enty -cultural -rot -anne -abs -comedian -##aya -leave -km² -whole -mamm -hund -philadel -physic -jane -machine -dro -##icted -palace -07 -81 -degree -##nel -##enna -chairman -link -security -quickly -hir -##bury -##asy -eventually -philadelphia -##uries -gives -1901 -rub -tournament -sac -##utions -bird -holy -##iac -##phy -economy -##hib -##vi -poor -addition -extre -mosc -deal -appointed -fifth -bah -era -jesus -physical -normal -blues -infect -snow -terms -chris -stanley -dipl -fat -##jo -controll -##amin -veh -wolf -engineering -foundation -lor -tel -probably -liver -weather -officially -sky -skin -regional -defeated -vietnam -##ingu -editor -farm -lung -##kn -anal -complete -taking -greece -clear -entire -ethn -bit -trump -staff -express -loire -constant -05 -71 -fund -na -##ew -##ability -##enced -units -composers -##omp -##istic -hundred -southwest -tay -andre -why -inn -##osl -##rison -kept -medalists -destroyed -categ -##wide -mathematician -deputy -##her -godd -lind -##neum -competition -conduct -http -letters -pra -mechan -baby -rug -ib -##aud -metropolitan -##uese -join -55 -##uten -##abad -turkey -singap -portuguese -folk -jur -tells -notable -budd -rather -##ees -rand -organiz -paper -##da -insp -slov -bronze -alt -purp -chess -respons -##hamm -##kins -niger -khan -las -moscow -dak -slow -heat -digital -singapore -calend -##ients -cham -simple -ple -structure -dry -hin -haut -billion -guitarist -abd -urban -norwe -hair -##ika -protest -trees -##cial -earn -##onic -##ctions -runs -driver -try -greatest -explor -2nd -step -complex -tony -##orpor -1950s -tai -brook -altern -lew -ali -syd -carr -##zz -asked -##ava -trial -176 -supreme -##bi -becoming -piano -brothers -##uses -tennis -soul -##erve -peak -element -alpes -cust -tower -##ustr -58 -norwegian -##ili -nam -sev -leaves -##ologists -pneum -section -contract -begins -grant -sett -bond -jews -symbol -bush -##ghan -equal -##roit -belg -##pr -attacks -elements -swim -cd -danger -mike -reign -alg -cos -films -ruled -queens -##aga -horror -parties -soccer -j1 -uef -##aded -southeast -teen -##han -##ties -flu -marie -youth -##gle -##oln -upon -powerful -resign -clar -emb -sail -fighting -##iot -##real -function -##eria -orange -diam -administration -##western -goddess -##ready -reviews -suffer -##ourg -crown -marc -hong -##una -hay -terror -##uan -philippines -moving -##edral -hem -newspaper -##iny -unl -gab -detroit -teacher -turkish -finn -init -croat -estim -kash -##reat -imper -subst -contest -chemistry -kur -ot -hart -##oman -69 -han -ped -adventure -tele -judge -wilson -spirit -shr -colorado -pack -redu -##ados -universities -uefa -tas -manchester -wee -##icated -princip -01 -lif -##fall -spent -legend -hungarian -##chen -student -sydney -sites -kinds -troops -memorial -sciences -towards -already -fought -ga -surround -monarch -historian -##ields -cathedral -diplom -afghan -borough -53 -wing -positive -picture -carry -62 -draw -suff -56 -starts -##ilities -09 -serb -understand -##ho -bos -rab -##sex -si -##een -pneumonia -1914 -adop -##ancy -earlier -room -charge -##icut -##vert -piece -thriller -hat -offer -stroke -##isa -confir -turned -##ami -400 -ban -observ -##ires -gil -distance -placed -adult -whose -appearance -effects -separate -islamic -bed -mp -cars -##head -calendar -critics -double -stephen -garden -rio -recording -route -freedom -##ship -shap -nearly -dakota -involved -passed -bott -kat -classical -freder -training -figure -graph -mut -tag -##edy -flat -greater -cel -cin -dougl -loss -##rian -illustr -value -construction -scul -getting -fantasy -forced -falls -sleep -##onne -spee -scientific -##orter -1912 -scored -boys -minutes -slav -suicide -commercial -##station -steve -##ners -screenwriter -dream -christoph -##liga -##pped -bundes -##rence -mom -connecticut -150 -##iption -gay -stopped -disord -generation -175 -##ipur -oxford -pandemic -kal -##worth -designated -##iov -universe -tail -morn -exc -##rogen -walter -portugal -donald -iraq -belgium -poly -greg -wrestler -galax -##orporated -liga -starred -frequ -##eding -meant -##ension -##tic -mixed -sin -tu -search -##izes -bach -##uation -##val -argentina -cred -fred -christmas -horn -spoken -dial -norway -dies -depression -account -sid -sund -##ilton -junior -majority -##cks -holly -acid -note -egyptian -ships -meters -flood -products -ful -treaty -##esc -philosopher -nag -sad -##ran -##astic -ford -##icks -listed -complications -gor -dor -##cest -##ables -1915 -grammy -chall -compounds -eyes -taylor -##iday -##unt -solo -pressure -manuf -cm -lyn -disp -apart -ty -sher -houses -174 -weak -supported -liberal -manufact -agric -hell -##sa -hills -diss -worldwide -educators -weeks -##isation -finally -deter -kilomet -fash -candidate -communities -##enez -earthqu -cree -fell -reform -##back -susp -mobile -##kin -##pa -frogs -ax -desp -kaz -weight -##known -sculpt -colomb -investig -bact -thing -recent -industrial -gang -##ising -pieces -measure -playstation -workers -kan -historic -azerbaijan -##oid -lawyers -1916 -opt -activists -iranian -ethnic -shape -lands -##30 -##wick -##ori -lawrence -##regon -##alia -cav -douglas -lock -##df -bring -drink -##uki -liqu -##iting -##overy -vac -comic -seg -communist -platform -##anna -linc -pad -assistant -signif -concept -billboard -##water -draft -clin -agricult -ariz -eggs -oregon -closed -concert -finland -merc -lux -store -wheel -##arm -chamber -##eg -inte -##hire -##aux -insect -lincoln -eye -resist -taiwan -##wr -lith -plane -kw -##onian -##sey -appl -ern -featured -fill -commander -weapons -pic -lists -powers -##ebec -management -maryland -dal -##weight -ais -unknown -##ema -bot -fashion -charts -merged -raj -ambass -arizona -split -##hs -ancest -versions -ans -##ysis -crew -image -eld -affairs -##rie -twenty -mathematics -lieuten -##key -showed -northeast -dail -ross -jam -simply -hour -##ez -respect -produce -plot -intell -und -mol -highway -location -represented -quebec -solar -global -contrib -horse -etc -dur -##asters -limited -touch -volley -bib -rugby -conqu -hans -apple -steel -competed -mis -om -1911 -1913 -models -operation -voices -bear -trains -##bourne -##iab -cool -##iques -##atures -atom -covered -hear -catal -morning -##18 -peru -headqu -imperial -##oe -##15 -progress -feature -doll -ay -zone -brian -programming -##stone -##oka -nom -1910 -financ -montreal -lieutenant -dv -colour -status -tradition -hawai -formerly -alco -welsh -18th -doing -personality -##osis -actually -##change -lad -fruit -treatment -##more -##enge -ends -##aled -nep -scientist -hotel -mind -pron -solid -composed -federation -philosophy -muslim -salt -##zen -ow -##iance -arn -widely -##cycl -ku -##eman -divor -##hammad -mi -guide -younger -nickn -intellig -jord -quarter -##po -mars -3rd -##rus -subur -architecture -maur -kennedy -representative -##esis -##ighter -scale -bag -exhib -clev -##fort -##14 -poll -aer -physicist -dim -oak -##box -oxy -##iba -governors -wins -##ify -bulgar -launched -mythology -##ras -flows -##ashi -corporation -shah -evidence -labour -cambridge -hindu -romania -hind -detail -##16 -dj -##cha -specific -pierre -sang -##gers -brad -conservative -##xim -flow -##iful -remains -cartoon -nik -##ti -denmark -thin -160 -completed -comics -label -##ills -##osaurus -drugs -neighbor -speech -##fc -##arters -##itation -conditions -##oli -arms -organizations -raw -##down -madrid -paintings -mort -defin -whether -conductor -extra -patrick -kl -##inary -creation -glass -buried -##house -##asing -electronic -##hold -bio -samp -##onde -danish -kelly -##pe -##osen -significan -##athan -bright -confirmed -ell -vocal -gord -lam -nearby -##book -painting -antonio -volleyball -strength -objects -removed -##oper -bak -copies -ukraine -drive -##iology -##13 -succe -##chi -resour -punk -ker -##olas -contain -influence -manipur -dinosaur -pink -##uay -archae -numer -yam -##usion -dol -daily -winds -likely -bible -##hu -adam -referred -cleveland -##ises -lewis -prevent -##tles -gods -confl -pig -guy -afghanistan -pir -comment -##ensity -finish -buddh -therefore -christopher -operating -polo -arrest -colleges -##igen -automob -threat -##soft -property -##wal -taught -##met -##lets -jenn -hang -johann -attract -appears -##dam -illness -academic -bound -hou -##jab -ones -karl -dat -economics -surg -rate -ton -##ione -cher -associated -activities -anthony -pok -easily -tamil -defense -##rane -##iva -##ardo -170 -universal -paralympics -fix -rud -##ager -##under -veget -cow -##enz -issues -##ares -maps -dc -dragon -belief -emmy -practice -marine -crim -1908 -vel -bundesliga -##alo -sha -alaska -allows -giving -globe -aisne -score -vienna -penins -twice -rapid -172 -victory -needs -##eastern -confeder -serving -##iat -voiced -discography -alan -##enburg -hex -senior -sequel -simon -cu -memory -##onse -coming -gene -microsoft -venez -##sl -ward -##oria -##vis -conflict -##olis -prepar -bros -happens -alphab -trying -opening -chosen -persian -hungary -bought -agency -fried -instruments -dall -mammals -ka -sed -pilot -hip -##iders -##duction -laur -oxygen -matter -journalists -ale -really -contro -approxim -ukrainian -beautiful -previously -hers -##ikh -pho -malays -computers -##ding -##tery -kon -##aro -articles -romance -recip -novels -##onder -nazi -wave -reference -diet -doct -missing -##17 -creek -asp -entered -helps -bodies -grass -volume -beng -168 -##iform -anna -wy -reve -sides -plate -arrested -indonesia -##light -roger -despite -controvers -fern -##state -##cules -##iop -##ws -knight -##ledge -levels -titles -prisoners -category -philip -attacked -transit -punjab -nich -acts -saying -spect -ye -##iden -overall -direction -dna -policy -course -bun -athlete -prec -alexand -##onent -##arus -bou -bart -howard -physician -ski -##erved -theor -refers -vs -##ista -longest -giant -sons -##ivity -priest -jordan -reasons -consist -drums -sain -##ration -##ulation -owner -sixth -puer -prote -regard -kam -alfred -lyr -ages -speaking -##con -##eda -##otte -print -convention -sie -materials -via -pitts -lic -ded -desert -quant -races -yout -cricket -push -wed -rober -images -rhin -##medi -cass -yet -##asion -##oca -sell -happy -jay -arrondissements -quite -##bed -salv -ronald -controlled -##mo -diagn -##ault -growing -1940s -cape -##rig -bure -wants -##orts -finnish -shir -cloth -crow -wings -##rad -trust -173 -##iang -21st -rangers -mes -1930s -restaur -assass -motion -ry -##ray -sugar -##ects -adams -warner -calv -graduated -tyr -thus -bey -##phone -##encies -##oral -serious -else -1903 -pin -##olved -neur -guine -recognized -juan -declared -transfer -looks -dick -##cyclop -churches -1905 -habit -alumn -coal -sympt -orbit -column -replace -send -##ging -connected -shin -##ante -allen -violin -brid -address -davis -hamilton -bottom -colony -ranked -mouth -margar -managers -percent -evil -##ening -consists -virt -exact -##eing -edge -departement -muslims -fear -agent -remained -gulf -##ety -##where -fields -1907 -discovery -normally -defence -captured -enemy -##dorf -trip -puerto -##arl -##ayer -explos -protection -##town -wit -reagan -##anced -illeg -prix -##anks -cardinal -males -easy -lakes -encyclop -##pool -vert -somme -##enne -syl -##oses -buff -rick -##lyn -songwriters -##fish -##40 -ip -barcel -myst -libert -osc -stu -mps -##feld -##road -##fff -growth -theme -jump -##yth -##sk -females -jess -##isms -leaving -centuries -hands -sweet -dub -##hatt -##ingen -planned -##porary -nebr -concent -traff -condition -##die -maxim -sri -##tha -indic -baltim -nebraska -##enh -poetry -earthquake -tan -##lan -166 -earned -sard -whit -mention -##orney -##iko -headquarters -##char -chlor -spin -magic -covers -bruce -##zh -peninsula -legs -rural -calcul -authority -financial -##avia -variety -venezuel -romanian -duch -passengers -##ocol -bene -##mouth -alone -issue -hamp -##put -defeat -domest -significant -bureau -alumni -baltimore -##ando -##oster -herb -buy -personnel -looking -flying -scor -##inth -aired -hp -atlanta -grey -gray -##aceous -walt -nepal -fit -nfl -neut -sched -##anny -device -reaction -##ello -officers -criminal -##eline -annual -dogs -##do -parks -diseases -hugh -##uct -users -sense -bry -eag -votes -unlike -alphabet -lion -1906 -##wart -poets -soldier -femin -borders -properties -ny -##alem -##iti -purpose -frederick -toy -display -geograph -attorney -classification -perman -carn -barcelona -1909 -clean -cryst -##otes -4th -exchange -arrang -cum -warm -miller -directly -melbourne -alcohol -##cano -calvados -sor -brun -completely -behavior -eleven -responsible -sod -##ester -forests -circuit -shop -provide -russell -kevin -holds -summ -satell -sr -##pton -liquid -lyrics -eric -hyp -capit -dangerous -orthod -adopted -selling -activity -hits -##je -lev -singing -pianist -studios -dancer -hollywood -pays -##haw -orle -##osphere -torn -leaf -carib -tribut -houston -dallas -fresh -colors -carried -comedians -historians -##ception -##oration -occurr -pittsburgh -##ruck -broke -1904 -gon -##imb -##aho -underground -pitch -distribution -##uz -naval -billy -flower -safety -hors -ober -##cia -decision -wrong -##inia -strip -supporting -franklin -constitu -electricity -favor -##udi -tries -everyone -experience -vent -##ido -question -orleans -nut -##time -ability -designer -phoen -clo -120 -orthodox -##omm -playwr -hosted -##kes -##wall -twin -pattern -jerry -##div -munich -armenian -couple -symptoms -worth -engines -meeting -sounds -##hattan -aar -sick -##arily -discuss -lav -pakistani -approach -hid -##bre -##adesh -metro -invented -##omy -whom -jimmy -##oshi -lots -rice -repro -goog -agreement -price -correct -molec -user -rare -##ao -pull -##atar -animation -biology -renamed -mong -##eless -##utional -catch -sah -##illes -guest -harris -squad -vik -zh -ign -bann -alliance -escape -negative -dh -paid -territories -datab -1902 -##bean -learning -gordon -vinc -##car -floor -extrem -17th -##wich -##eded -buck -pow -##oa -sales -increase -youtube -nit -dvd -shel -manhattan -##sky -milan -response -circle -immigr -##gas -1800 -clay -waters -save -thousands -hawaii -johnny -analysis -##uits -remember -semi -cuba -nak -grav -tracks -rise -boeing -roads -tol -atm -carter -claimed -##ping -increased -ss -##oro -crimes -repe -share -norman -recently -##25 -nev -bath -bacteria -luxemb -##ibility -bigger -concern -galaxy -##zburg -knowledge -##elli -##usalem -cord -file -uncle -brief -christians -cert -resc -operations -ballet -sv -sao -kills -aviation -employ -jerusalem -methods -diamond -guinea -trou -margaret -arc -ordered -describe -principal -##tered -description -##iber -forward -##agen -matthe -ghost -##eller -google -5th -tib -##elson -wonder -managed -cret -##kee -normand -chanc -banks -choice -cabinet -density -scene -twel -symph -shut -##ogen -16th -##asia -##obi -rocks -faith -dir -tob -plans -genetic -opposite -osaka -##iano -##aph -contact -lies -warri -armed -socialist -bour -##tv -clark -core -demon -gam -##usc -treas -huge -anderson -167 -percy -theater -impact -listing -wikip -##mission -##cles -164 -fuel -pp -anything -construct -ambassador -caribbean -##23 -diego -environmental -utah -lank -##enue -serve -lack -trin -jobs -agreed -##bl -##tm -dress -synd -##ascular -fab -zo -worship -lasted -matt -everything -interact -classic -insects -intelligence -hampshire -fans -sulf -chain -##aware -miam -##mat -debuted -moves -compared -trav -##ascar -straight -dates -rav -##ifer -##di -milk -1899 -custom -emerg -##urg -height -ted -holland -##izing -fro -##fe -partner -arabic -alternative -miami -##eme -##emia -samuel -stated -##aver -archb -flowers -speaker -combined -##oku -##anthrop -twelve -breath -entry -applied -resources -scar -finals -volcano -butter -desc -attended -perfect -uns -hydro -focus -rene -tools -shad -##sters -content -expand -bapt -novelist -gib -##uda -household -##holm -estimated -##sequ -relat -organized -anat -meite -##iph -retire -##unk -cris -officials -doub -guns -##xual -acted -neck -adapt -pronoun -##atherine -fan -chrom -jacob -boat -broken -appearances -cbs -rif -phill -throne -harm -script -thick -begin -southwestern -iceland -technical -bes -##alle -hug -##ni -cretaceous -eu -idaho -follows -incumb -instrument -maz -##rat -ecu -earliest -coron -wrestlers -##gi -rid -##hel -sure -translated -yug -amend -##umin -##ods -imag -failed -executed -ryan -highly -maine -divisions -corps -shooting -cem -##26 -##ighth -muhammad -montana -##iley -monte -rapper -learned -fossil -rough -determ -difference -slavery -chancell -##mund -##uri -##24 -##racy -pret -surviv -influenced -defined -hunt -kashmir -coc -rena -61 -jama -gran -moore -dominican -teaching -birthday -eug -mit -param -cycle -##nia -##ocard -bran -metres -detect -revolutionary -disorder -equip -incumbent -yok -##cht -##angel -amaz -tourism -proposed -continu -gamb -panc -sty -163 -emot -##ontin -palest -meitei -robinson -synth -scen -ruler -vit -pel -thro -medals -provided -string -advis -##heimer -##rees -##ferences -nba -meg -commonwe -ethiop -hydrogen -lud -vas -ending -infant -reality -walls -sens -ministry -titled -mental -sult -##assic -1895 -##uku -rhyth -christianity -##wig -township -jefferson -commonwealth -infar -explorer -##iya -settlement -##lement -cantons -dave -kra -attention -1896 -camer -violence -suffolk -infarction -counter -visual -peng -vic -##28 -##ailand -accepted -mm -##sec -##27 -meat -##oba -rgb -trail -archive -##oir -##stad -departments -documentary -surgery -conver -baron -delaware -presented -alger -demand -peters -zero -##esar -##rops -plus -##uma -quality -clinton -equipment -hur -##leman -sarah -immedi -runner -gironde -##fri -165 -poison -virus -lan -heb -1890 -shaped -items -defend -hunter -##aling -screenwriters -candidates -phoenix -acqu -cooper -myocard -benj -saturn -##adi -franch -coff -tribe -illegal -remain -continue -ivan -database -thunder -joint -sul -personalities -featuring -adventures -journey -install -carlos -jason -heard -169 -nbc -##wald -stayed -ll -##incorporated -liked -formation -##bro -satur -emergency -##hen -throw -door -poems -tribes -##rom -alz -unincorporated -extended -colombia -myocardial -achiev -gained -zur -respir -diplomat -ecuador -sau -vehicles -##mitted -crashes -##yt -domestic -ain -mist -ni -occurs -autonom -thailand -unc -barry -trophy -congressional -electrical -cot -##opes -austin -sets -1898 -lanc -##apolis -wikipedia -napole -6th -##igg -soph -##leton -risk -mans -attach -cad -sess -wet -##ovic -adults -register -purple -barbara -agriculture -##ered -nevada -audio -resistance -clothing -vlad -ft -murr -##iven -invasion -battles -victims -divorced -hope -surn -tab -truck -nothing -159 -videos -required -tox -800 -cere -flash -switch -creating -separated -particularly -philanthrop -standing -fleet -slaves -1897 -nelson -##dale -##ouver -leban -##iki -saxony -##ctic -oliver -productions -moment -benjamin -j2 -dod -stream -luxembourg -lig -tang -soil -photos -lap -##enberg -ens -singh -noble -sunday -maintain -##rene -zoo -teeth -##via -abc -scholar -seventh -resigned -oz -shared -fine -kang -pont -##page -nomination -presid -extinct -eating -sword -mediter -senators -injured -armenia -codes -debuts -costa -vincent -nasa -advert -patri -scholars -600 -kit -1893 -experien -##ranean -archbishop -jar -##ask -filip -typically -essex -grown -pair -marin -heavyweight -##ilia -gust -um -disapp -positions -functions -kin -##bing -##yd -moroc -infection -##esco -alice -withd -110 -bald -anime -wayne -literary -approximately -malaysia -kids -pav -medium -michel -opposition -gate -mig -presenter -movements -rhone -##ieval -yugosl -rum -##no -##ovich -##agues -actions -155 -charlotte -ri -xbox -drivers -appe -terr -biograph -massac -marshall -##mes -liverpool -##itis -shortly -mitch -##tr -economist -tunis -affected -matthew -##haus -##cker -pruss -1889 -survey -ech -vanc -##thal -alle -belarus -luther -hurt -##urer -syria -publishing -morgan -ottoman -aw -alive -reserve -painters -performing -7th -rou -charlie -denn -rush -intel -critical -bernard -onto -colonial -pip -##29 -spy -gary -1920s -punish -safe -doctors -pengu -##oni -##atch -pole -stev -heads -advance -cous -fellow -lingu -occur -mrs -##oz -communication -##osa -particles -##66 -##nie -guil -accomp -harvard -inspired -bibli -neighborhood -traffic -ov -slight -phone -behavi -sultan -painted -insee -selena -mediterranean -mall -partners -residents -dram -vancouver -migr -##ente -##stan -prel -saudi -##born -secondary -##onym -advanced -roberts -cemet -identity -pyren -spider -symbols -##etime -ble -arrived -changing -require -kath -phr -moz -revol -transform -cemetery -kiss -pale -pyrenees -gender -fossils -clock -ian -kaw -medieval -filmography -transportation -700 -treated -benn -yu -##athers -hitler -herself -##enos -strik -##illo -nash -##iens -##emen -##akia -##erne -weigh -slave -experim -chilean -oscar -##mun -orders -##ourt -1892 -convicted -stor -##aceae -finds -hud -calls -automobiles -dedicated -mentioned -tibet -##olo -radiation -ja -wag -158 -temp -coastal -cats -##eno -13th -classes -copper -##achi -##cla -eleph -##mingham -croatian -waves -##gau -##roph -heav -cardiov -##lication -translation -permanent -faster -##mi -arena -walker -bones -##imir -##ogne -##auc -cathol -afc -cardiovascular -haz -orb -birmingham -##eva -##rell -beauty -##bir -hab -monster -showing -reflect -grace -narrow -rein -comune -##ibly -seattle -worst -compar -constitutional -target -gur -wur -noted -rhythm -interp -wright -joan -devices -robin -brooklyn -##inals -turns -diction -alzheimer -kap -stores -##amo -dean -plain -abra -contem -pregn -electoral -rhine -por -##rome -shark -founding -presidency -##agu -outer -subdiv -degrees -vehicle -urug -250 -statist -florence -##iere -##arians -hein -remaining -causing -##wise -##itors -##ogy -##ichi -serge -larry -signs -promoted -springs -provides -tong -##phal -flav -uruguay -catholics -batman -##asaki -isa -##umes -serial -provinc -fishing -##ls -##wers -153 -queensland -##orig -1861 -ultim -seem -feed -nigeria -##rates -anch -atoms -remix -stockholm -htm -heaven -tehs -knights -courts -continental -hyper -magnetic -chance -necess -elimin -stein -protected -layer -satellite -indigen -inventor -retirement -chancellor -bin -##21 -##utt -breast -##iable -admir -1891 -honours -##oxide -diagnosed -stick -flo -##bul -##ele -suffered -boss -nas -pione -sung -blind -##ruption -pokemon -gather -##pur -adap -occas -damaged -facult -ral -income -1888 -##omot -shall -legislative -aborig -scand -poem -chron -youngest -visited -franchise -kum -frances -indigenous -mystery -##issance -depending -passes -campus -leadership -rah -compil -carp -choose -##berry -kick -democracy -ske -prey -baker -ich -##lem -##ivia -revealed -supply -tourist -check -figures -gone -suz -discover -easier -committed -ig -##ogue -murray -goth -##amese -colonies -##uster -wol -##eh -##rap -seb -spot -serbian -fully -gym -homes -##orph -##imed -8th -15th -democrat -hole -##riel -knock -astronaut -amph -149 -angl -jacques -divers -mul -##rh -theod -130 -1887 -looked -mercury -sisters -##stre -##room -nickel -mate -cyp -##ivals -knew -voted -parishes -sardinia -##othy -ferr -dish -values -physiology -purch -pent -##joy -##emony -drummer -##force -sak -##organ -yorkshire -barn -##iji -##igo -subt -useful -##atsu -martial -hebrew -tat -steam -1885 -issued -opin -##hard -internal -goods -clothes -encyclopedia -mail -ist -chose -##aco -limit -machines -##ei -##king -pradesh -154 -governments -newspapers -styles -1863 -benef -amendment -hend -liz -felt -vaud -##oda -compound -1894 -naturalized -message -thor -talking -settled -answer -bone -avoid -guer -capac -foods -anthrop -cha -stewart -composition -railroad -psychology -tornado -respiratory -gain -mine -none -##ju -didn -##rain -##ously -smart -owners -##load -mud -seconds -##resp -warren -mongol -abraham -##erc -##eston -##pson -##anic -athletics -unique -arsen -##lie -blog -156 -killer -mck -##kan -##chem -##igi -##clus -subs -views -##standing -wine -##kir -unus -##rah -therm -arnold -occurred -##ture -chick -marit -##enson -##ollo -1883 -elder -bug -ms -##uge -playoff -joy -expected -achie -alongside -##asm -marry -hundreds -overs -##etics -##ibr -operated -tap -##ulu -shell -triple -recon -aires -honey -gardens -extreme -croatia -playwright -##aza -represents -academics -##oven -prior -experiment -severe -buffalo -hub -veter -leo -horiz -hunting -till -##aws -educator -jonathan -venezuela -##fa -##oux -##bria -##aurus -factory -firm -berg -tiss -wound -##otic -reserv -organic -##cript -holding -cn -anyone -holiday -fisher -##mel -interested -obl -salzburg -thompson -occupied -##life -##rim -##father -descent -describes -molecules -hob -##opter -paralle -combat -closely -individuals -mouse -1882 -conservation -dancing -challeng -kob -##vo -strug -proph -finding -rebell -audience -pc -forb -broadcasting -##friend -bog -mond -##uls -##ption -format -steven -columbus -narr -gross -lenn -##rol -palm -zurich -hann -##ivore -##oker -agree -1000 -honour -reports -rib -chat -##izu -103 -angry -beyond -cote -##bell -##yer -termin -recordings -sharp -beatles -extremely -##uff -##fs -spons -mathematical -slowly -tomb -##uman -10th -1873 -hav -##lies -brig -157 -capture -12th -ceremony -numerous -sort -1865 -julian -symphony -kab -provence -sacr -tiger -luis -selection -filipino -provincial -##ifying -1862 -##aug -##amoto -obtain -luck -persons -gabriel -mack -bobby -https -diab -hond -lah -tort -griff -meets -branches -kle -rach -sic -abbre -atomic -bibliography -1864 -nan -##bal -##istol -upd -charged -##cious -obama -1880 -eaten -succeeded -cardiac -judges -kidney -bridges -##gie -fired -depend -supposed -speakers -crisis -slightly -##eta -weekly -abbey -##iser -seeds -newton -estate -breed -1879 -planning -bil -nest -vul -deliver -dust -1860 -surrounded -gol -##icial -##ache -chemist -guitarists -broadway -escap -electrons -bowl -##hawks -napoleon -arabia -tod -reject -smallest -passenger -confederate -##burn -##ici -##iri -stands -inner -buenos -guilty -tend -##rum -index -regul -astero -multi -wish -##ivan -##aste -partly -exped -organisation -vend -shim -organisms -chand -valle -portra -cards -murdered -sloven -terrorist -##alty -##ector -monkey -1886 -receive -survive -saints -##isch -distingu -pancre -cod -##cing -##eto -minute -serbia -ds -##vian -leagues -peop -14th -finance -##agne -##owski -adapted -tig -abol -wealth -gi -josh -basis -1870 -photo -nicholas -##bar -##rance -##aku -disaster -coord -decide -##oshima -keyboard -participated -offered -##ternal -camps -indonesian -aj -iw -##imp -basin -madison -atmosphere -lose -harrison -haute -kyoto -cort -suggested -peoples -cloud -childhood -louise -sag -tip -witch -server -101 -##onents -drag -delhi -distinct -thinking -linux -tet -##ebra -soap -outstanding -cuban -situation -dropped -aur -lis -tube -vision -##kar -manga -blo -compete -baden -enemies -spiritual -bulgaria -fighter -lect -truth -applications -geomet -refused -definition -rar -wii -##ithm -catherine -##antine -differences -mete -burns -##etes -##aging -##akers -rocket -silent -autobi -potter -kun -##rix -1868 -soundtra -saturday -formal -landsc -1881 -aquit -demonstr -##su -gren -##text -eddie -11th -oxid -inher -filled -##icient -suit -epic -thirty -claims -declar -maced -rit -tank -##iant -harold -1871 -orient -survived -immediately -jet -##agh -enjoy -##boards -##uka -connection -dinosaurs -gent -##uv -##omer -moh -graham -athens -nur -yonne -##gio -##uto -emir -decre -publisher -opponent -mant -mick -chel -paulo -cable -edin -trouble -pied -reception -canal -allies -breaking -pronounced -contemporary -statistical -ralph -ng -mayors -gymn -roots -leads -cyclone -integr -lanka -dennis -dictionary -perc -camera -challenge -pure -printed -protestant -aude -va -ingr -grad -operas -duck -weapon -exhibition -shock -##lington -daughters -dwar -##ea -##anne -##ographer -##ielder -signal -documents -plastic -loop -stuart -ps -solution -##elor -tri -##inner -educational -sudden -yosh -##anes -##ears -appar -thirteen -industries -kilometers -##helm -faculty -##iago -##rot -serves -strike -performances -autonomous -3d -1867 -walking -vladimir -sib -tick -relative -125 -raf -caught -danny -burg -essay -laboratory -rings -##rod -collab -mask -interpret -allied -nova -evening -talent -teachers -papers -360 -cinc -##lon -byz -corresp -engineers -encour -assassin -eagle -instruct -sind -##donald -rival -apollo -protests -pupp -details -correspond -##set -##ipped -chapter -belt -1884 -aquitaine -wire -##zer -##orus -##omi -##rep -sheep -##place -injury -celebrated -friedrich -bengal -shakes -actual -perh -140 -stef -sequence -sentence -pancreatic -mik -##aba -voting -##gov -##ierra -jup -##ieties -expensive -files -representing -##etta -hammer -fixed -parallel -rating -vin -##bu -##fam -andy -ready -southeastern -importance -motors -picard -sop -stal -perry -coat -locomot -perhaps -##anni -introduction -leop -marks -##acing -resident -superh -popularity -links -exactly -aid -lamb -tub -romans -losing -oxide -involving -##ohama -astronomer -aboriginal -##fully -1872 -projects -surrounding -concentration -edinburgh -##jar -accused -parker -convers -photographer -otto -900 -kos -##ader -beck -##yler -1878 -raymond -altitude -beliefs -yokohama -coffee -disab -##ublin -absor -##ocaust -shore -advant -bened -ferdin -concerts -lisa -canc -constell -grande -francois -investigation -aub -lob -rico -sig -ordin -athletes -massacre -cincinn -golf -stages -phyl -##igny -sentenced -toxic -behaviour -##33 -##urance -holocaust -##80 -##oned -nee -virtual -unesco -mini -basil -portray -cincinnati -eighth -nok -##unity -##sea -##60 -usd -mode -nazis -##aman -stress -promin -ard -##ahl -skull -possibly -questions -ernest -maurice -9th -lil -##45 -##fta -conducted -nickname -friday -adj -avenue -continues -##bow -##gate -stom -chocol -arist -trained -barr -1857 -horses -renaissance -lem -shan -1875 -sovere -jupiter -ibn -sovereign -commons -blackhawks -paras -vermont -housing -drop -hut -##uke -##ographic -honorary -##prise -crystal -porn -##furt -##elia -chal -##ieres -cousin -hamm -lag -##eed -##uy -geneva -religions -kg -nerv -loy -surname -geoff -airpl -citizen -##ns -##ario -cream -wilhelm -networks -temperatures -existed -hugo -bic -og -##anche -keith -darwin -dict -chan -chase -uss -##oki -boh -minim -voy -decor -shadow -ein -backing -anglo -assign -biological -potential -##encer -##velt -volcan -commerce -lt -len -sale -##gra -##opa -overview -rulers -pi -tin -##vent -rear -luke -adding -##etti -tommy -broadcasters -kilometres -##sch -##ridge -##22 -1869 -planets -curt -linked -gosp -##agi -##uled -congo -meteor -ki -arde -amy -landing -hurricanes -##pear -##idi -##izz -gill -giov -##eton -vietn -laz -ug -##geon -roose -marked -streets -choir -telesc -fib -##gom -##wi -profit -##minster -museums -eis -terry -typical -snake -vess -##bus -##iations -philosophers -petersburg -vietnamese -amp -mend -reich -competitions -random -lloy -tok -prague -1876 -tests -kurd -bachelor -jennifer -eb -sap -santiago -crypt -shakespear -lane -inches -##esse -##oga -devil -reduced -abu -root -increasing -##greg -##elles -maximum -gw -pere -ridge -##nik -predators -marvel -##aca -helen -##48 -105 -##boy -assault -roosevelt -dy -ion -genre -interior -annivers -wallace -corner -##rin -deer -##hips -visitors -syndrome -tamp -##ui -claude -garcia -kazakh -bub -##xx -offices -belongs -kre -##enders -creator -rocky -mcm -mile -thorn -##rac -amster -measured -identified -rolling -mitchell -lun -pup -##sm -##rate -##isph -larg -skills -sitcom -##borough -feeling -bosnia -gn -kay -tir -ori -spong -smok -interstate -cultures -situ -tallest -unusual -shakespeare -jail -##35 -##ionally -equation -false -amsterdam -gand -##esa -tea -tales -possess -boxer -graduate -stomach -amounts -manuel -147 -merch -collins -rescue -descend -jum -pod -yank -stones -neil -artific -attend -novelists -combination -pine -##sole -latter -molecular -cotton -britt -airline -refuge -paralympic -ride -145 -incorporated -camb -legislature -sharks -bafta -rodrig -herbert -induct -mining -simpson -cosm -anniversary -dublin -hed -developing -wwf -morocco -bish -cave -jin -##maker -##iring -denver -cyclist -##inned -strange -statue -patients -parkinson -cattle -communications -lac -##umi -phen -grandm -1877 -1859 -stronger -algor -bac -beet -##ishi -nigerian -attached -##olph -107 -retrie -rogers -jurassic -hauts -eliminated -soundtrack -nokia -masters -talks -sebast -lon -isab -##landers -turner -nominations -capacity -bold -##raction -delta -murphy -restr -1848 -gregory -wyoming -relatively -##fly -franz -glas -venice -dialect -suburb -superhero -foster -horm -##iling -forg -##ensis -angle -burned -caesar -progressive -scenes -ost -##hn -##asts -publications -ceo -pedro -lloyd -yang -##away -##osexual -fi -##oping -##imar -produces -antar -doesn -sanders -ottawa -bulgarian -ardeche -kel -##ige -##estone -massive -sz -##roid -##apy -particle -metals -democrats -existence -cec -##oly -copy -colonel -aber -##fare -##ingham -chop -console -obit -krist -commissioner -duchess -expedition -##come -##99 -endings -opport -elementary -respectively -marath -phase -murders -factors -##ymph -subjects -ric -tusc -abuse -crimin -statement -cinema -sample -font -##ering -genes -craig -kidn -##si -##fia -reun -##berto -grows -chemicals -improve -pyr -sony -##gne -##uh -##atra -seal -cler -piedmont -jav -##cod -beij -unable -apost -constituency -biographical -##agre -prefer -115 -sussex -athletic -sacred -pleas -dame -##erse -mann -returns -heroes -ferdinand -##oons -##gal -theolog -##amps -sierra -##ppe -##works -ruth -bangladeshi -picardie -finger -##gow -##omon -##neth -farming -characteristics -sodium -fuku -##atin -##ality -##oust -##irus -##uez -monaster -gif -mull -proof -pep -campbell -montgom -disorders -tool -northeastern -laure -indians -##arth -worn -##manuel -evans -necessary -##low -bears -warsaw -hudson -kor -##otyp -bever -##overs -pearl -cock -lomb -##usa -exerc -germans -1850 -primarily -salvador -habitat -dad -nin -##sburg -classified -auto -injuries -skiing -favorite -ethiopia -chocolate -montgomery -charges -standards -programme -tyrol -##net -disasters -businesses -pse -##arin -allier -148 -sculptor -hearing -coordin -hide -##idad -application -granted -restaurant -cash -lope -ment -##yo -##lock -##inum -##itus -bread -cricketer -dating -analy -bee -dioxide -haiti -rebu -fluid -ladies -##bb -##zi -##aking -brom -setting -stops -violent -focused -writes -##ussion -periods -giants -html -fung -grade -##endorf -##ahn -franco -buddhist -utt -##37 -reactions -gilbert -criminals -beijing -nancy -##night -##ateur -techniques -hidden -ou -##van -exam -128 -valent -incident -vii -swimming -initially -vacc -botan -regarded -yugoslavia -##stream -morris -banned -vern -##ski -currency -ranking -guardian -kate -##kov -##inus -tot -chest -##illon -trig -##ifications -patient -rhode -nadu -##icia -toward -##idel -##ogo -studying -helping -duty -falling -cheese -neighbour -roof -glac -hosts -hemisph -##ieu -##anto -residence -crist -traded -mixing -alpine -roughly -##stadt -declaration -pseud -sean -edwards -consult -fourteen -texts -isaac -portland -illustrated -phrase -pon -airways -vegas -collected -magazines -reduce -thinks -survivor -loud -1858 -tehsil -bast -fruits -##cese -evolved -1874 -##oslov -##fcfff -##38 -pries -##iolet -127 -presenters -disappear -bam -dio -remove -1866 -testament -##uchi -farmers -mechanics -wagner -admiral -wes -polar -expans -lightning -keyboards -burning -capitals -forever -westminster -ports -budget -chapel -witness -##47 -northwestern -advoc -nico -rey -##uro -chate -marg -containing -smo -accur -raid -equival -atlantiques -withdra -storage -tul -##caster -parad -substance -keral -##nell -##contin -stret -purposes -aube -lions -pill -##46 -##onso -heir -alberta -##world -baptist -legacy -ceremon -spacecraft -sarthe -cair -helic -literal -headed -relationships -loan -empress -giovanni -lopez -##inction -eve -czechoslov -tributary -##pers -##asi -emplo -protein -quantum -##aire -beth -producing -downtown -factor -crossing -recognition -##gart -##iled -##umed -edited -fairy -tasman -cameron -##icas -##rete -##ete -##ection -moons -armies -atlas -henri -vamp -##emberg -driving -leonard -harvey -nicknamed -ud -improv -kerala -gast -##ched -aband -origins -liberation -kop -pm -##mers -institutions -tiny -evangel -1854 -vary -minority -rebellion -artificial -##49 -chore -##outs -guild -mainland -collections -cooking -lebanon -##umental -##estyle -plains -statesman -##psons -occitan -authors -bullet -hiroshima -miy -##acher -##enses -routes -typh -eugene -drew -mold -##ictions -berke -ml -wien -butler -volun -institution -sudan -frequency -nomine -jak -##ining -##ication -cheap -petro -lifetime -##rine -##amas -##arta -authorities -susan -ranges -telephone -infin -passing -##atoes -mechanical -infantry -##ht -isol -##ebo -para -funeral -tum -##aa -prost -strat -expos -volt -picked -drinking -gonz -inaug -reed -##abe -##ials -adolf -##ologies -imprison -surrender -flanders -##ses -##fo -##anian -exists -parag -angels -##apped -planes -wearing -lip -nun -##wing -##abi -presence -simpsons -aux -resulted -graduating -cruz -understanding -largely -##uania -##fielder -##ilo -##iah -parent -elli -villa -keys -allowing -closer -loved -writings -1851 -derived -##rising -homer -task -berkeley -kas -kurt -riv -frag -kirk -frequently -agricultural -controversial -hog -##fast -dee -fires -##ttemberg -resulting -diameter -lithuania -precip -liberty -nort -##amer -indo -empt -scheduled -tuscany -nascar -##gs -fever -auver -richmond -woods -tale -##rik -anarch -##aments -knows -visible -byzantine -fold -rus -##oj -##erd -disk -keeping -opens -siege -neutral -cauc -wiz -##oted -chad -adela -##issa -courage -maple -punishment -##pat -deuts -cliff -viii -kenneth -grammar -celtic -##isha -gameplay -technique -absol -excell -buddhism -##aks -afterwards -rifle -wurttemberg -ninet -##eon -##lying -tru -emperors -effective -anthem -percussion -##woman -marx -exclus -successor -##ienne -caval -rachel -wool -ya -compl -miner -albania -fifteen -watson -ahmed -dum -vast -sees -##odia -infl -myster -storms -##encia -ancestry -amphib -auvergne -cul -jor -##folk -chi -spoke -offers -subsequ -designs -crops -viv -##eim -##mad -##ilis -stable -seems -109 -1856 -measures -philanthropist -iz -rept -entrep -insurance -restaurants -temples -benedict -rated -##dles -##ocene -cole -##ails -chromos -paramount -cp -##36 -neph -laura -fertil -wildlife -nix -ply -##atem -##cket -pray -discip -sans -forum -##grade -alpha -mim -brah -boris -lowest -prisoner -premiered -logic -##ovision -asc -102 -circum -generals -suffering -graphics -tributaries -##family -bent -fitz -sull -vil -hisp -##active -135 -##rett -##ipping -computing -claud -astronomy -hughes -dw -riding -tyler -##rano -frame -chen -trek -decisions -occupation -warriors -expanded -hired -##atics -##ovo -quart -linear -trading -psychiat -electronics -algeria -rica -seoul -colored -processes -abdul -brooks -controversy -bid -##child -shield -arctic -monsters -gerald -pool -infr -uran -diesel -johannes -##iformes -thai -centen -depos -138 -skill -terminal -jamaica -##orous -monument -grandfather -controls -boxing -ace -##itter -testing -drawn -prussia -rarely -mtv -##alt -##uba -coins -surf -##lett -maker -##cription -panama -anchor -ordinary -dia -##jani -mineral -imposs -elis -grave -suggests -##rill -breaks -cornwall -dimens -gravity -prominent -sut -##edd -waste -spend -genera -canadiens -defensive -##apping -cardinals -channels -qualified -rising -linda -##eenth -accompan -caf -edu -##pm -dement -##odon -recomm -meter -researchers -organs -carrying -hemisphere -nose -pam -ord -blow -releases -locations -dawn -cyprus -expansion -entreprene -rip -wheat -philippe -improved -suburbs -summers -chicken -diabetes -##opl -newly -tested -dancers -steps -swimmer -elephant -beer -eurovision -holes -km2 -navig -odd -pirates -explosion -normandy -opinion -jag -tic -##etical -##aye -##ortion -##ocent -estonian -1855 -todd -volcanic -rig -tut -##gang -##uil -junction -simpl -sight -guatem -harmon -rainfall -chuck -strateg -wells -##itarian -festivals -tunnel -repeated -ced -##ozo -##raine -usual -destruction -facilities -saxoph -amazon -sebastian -##ading -consp -arrow -intended -tourists -promote -observed -entirely -ludwig -ultimate -shaw -trent -##aha -influential -approved -settlers -threatened -wedding -crowd -toul -##ovsky -uniform -pharm -volunte -vow -##esp -##lee -depends -taxes -bruins -constructed -pioneer -oc -##cott -ree -##ching -##ayan -prostate -##phalia -edmund -wooden -successfully -janeiro -eco -##zon -##most -fract -##enda -revenge -wounded -gem -##hof -##enzo -137 -explain -jsl -nou -##agua -129 -##asha -supports -execution -##oi -alien -meth -norfolk -basse -amateur -reaching -recipients -landscape -isle -shorter -seed -spaces -glen -ruling -returning -cdp -dolph -##pro -crus -hamburg -mixture -sandy -experiments -precipitation -marion -##ressed -crust -##hran -bryan -reprodu -brittany -manus -##hausen -tissue -gum -hack -civilization -parliamentary -vor -stored -warning -##bek -subtropical -playoffs -ninth -##elin -manit -108 -##insky -expression -nikol -advantage -nurs -alto -worker -publication -boundary -vegetables -gospel -##emy -craw -bombing -magnitude -saved -fernando -mg -##kt -sections -colours -euro -attempts -bats -ing -unter -herze -westphalia -millions -supern -skeleton -kom -##lings -##aran -chamb -boats -brab -michelle -hardware -locality -anglican -wake -anto -smack -photography -multip -1815 -ghana -julia -propos -criticism -traditionally -portrait -impossible -##essa -educated -succession -expert -platinum -recipient -occitanie -##bn -##coming -##anie -bristol -proteins -gustav -baldwin -adelaide -a3 -furn -gates -nice -##acle -##tons -106 -strings -instrumental -oakland -anatomy -dementia -gaz -mild -decade -archives -convent -axis -biochem -##keeper -gott -ville -##recht -118 -efforts -surviving -normandie -wang -##amic -##amics -##amental -spencer -sixteen -silk -glasgow -wick -resort -populations -117 -landfall -talked -737 -prelate -wend -heath -monten -104 -edgar -moder -accel -homepage -antib -lesb -friendly -photographs -nashville -herzegov -jury -sust -°c -##umer -##rib -pete -processing -criticized -warrior -uz -##ned -##iper -galaxies -tigers -##erm -##erts -##roe -##ibi -charity -hood -scope -calling -mats -##aji -healthy -qualification -lennon -tampa -##yon -download -ranger -##39 -##rec -proced -decades -summit -##odeon -##body -##pie -justin -1812 -miguel -##db -##ective -crocod -derby -goalt -pilgr -coalition -sank -##erg -##uter -##udes -##auer -invited -argument -converted -aus -gy -##eness -ald -##esty -guar -sonic -##monary -ions -##eri -##ulate -shops -context -seeing -opposed -nickelodeon -jaw -sara -disability -window -handed -administrator -masc -tou -##dot -otherwise -##inae -139 -entrance -##monton -transferred -aaron -herzegovina -daw -gel -##eo -##cop -##atus -##star -chiba -worse -cargo -raise -equivalent -dos -stim -primera -##licated -julie -hanging -mph -struck -asks -centers -edmonton -decides -chloride -dwarf -nixon -mam -##ben -##cs -##andez -medalist -cumbria -determined -ff -joins -proved -##unda -malta -taxonom -distinguished -retrieved -rican -taste -##orian -##itals -item -agents -##aukee -accidental -penguins -##idal -span -##ilda -##ennes -listen -olympians -azur -physicists -initial -principles -credits -constellation -eg -##inations -##ebob -bless -implement -strict -titan -initi -wu -external -fewer -ernst -pyram -manitoba -##nan -beau -##ipts -flour -principle -bishops -biden -pist -##ctional -genetics -runners -saxon -license -calvin -sitting -geometry -faces -##adors -americas -spelled -laid -consort -involves -schedule -sophie -##cra -##core -##imet -lex -brows -##brid -confed -lyon -naples -achievement -xi -zimb -##agawa -prayer -harbor -lights -1836 -penalty -arranged -session -gul -sik -creates -humid -1852 -bordered -throat -hipp -iod -java -theories -muscle -reporter -verb -felix -evolutionary -smackdown -taj -##olith -##urai -##ardy -##pping -directory -crush -146 -bert -continent -coaches -julius -istan -wis -unf -pros -exile -schw -swan -betty -feud -milw -powered -reaches -yas -##itic -alumin -##crib -drafted -istanbul -cay -gers -deleg -variable -stanford -existing -dreams -owen -plymouth -cs -das -dup -##rical -##isf -##oko -germ -1821 -colombian -confederation -ned -spar -formally -##inters -malay -1853 -1837 -santos -affair -hinduism -milwaukee -mason -fifty -hindi -lug -##riet -ana -stab -##ifically -battery -strait -fastest -springfield -capitol -renew -literally -nipp -estonia -charter -forming -legends -1843 -utc -##ords -bruss -extension -erik -tours -turning -airports -##orate -##ifa -creative -solve -putting -1844 -hawks -lords -homosexual -andrea -peaked -algebra -substit -intellect -gav -sized -##isted -trian -##quin -phones -glenn -traditions -##amba -domain -rabbit -jub -tied -##rose -tehran -syrian -##osaurs -geoffrey -850 -hier -lak -piet -qur -rhe -##ulated -##berger -usage -adrian -associate -enters -broadcaster -flights -celebrity -shrine -households -##asis -emma -camero -devon -badly -denis -venus -legendary -scoring -uttar -##zan -##oney -##uta -carlo -batter -##yll -##colm -pulitzer -monarchy -archaeological -heavily -obtained -##imo -clan -##icts -viruses -attempted -rays -##cat -signals -historically -jackie -1849 -ecl -kem -notably -consum -loves -monday -spongebob -gp -tie -##enko -##istle -119 -servant -sulfur -sicily -dale -palestine -priests -brussels -dated -ras -complicated -##onds -recre -subpre -buses -malcolm -confess -temporary -1845 -somers -immune -casual -promotion -joining -prepared -pitcher -##gary -neb -resolution -balance -lotus -keeps -##commun -feelings -credited -alexandria -herbivore -subprefect -ek -##itary -ster -portion -followers -nicolas -fuj -immigrants -assigned -##uaries -oral -relatives -creatures -##ullah -contrast -confused -snakes -1830 -digest -appeal -nominee -ahead -telling -##hra -alberto -seas -believes -mosque -##break -reproduction -phenom -kham -acids -offensive -feathers -skier -automobile -summary -bourbon -pregnant -ritual -smooth -126 -castile -facts -limits -blocks -hypoth -sullivan -triangle -nad -##rons -##ampton -##illery -debate -messages -fellows -molecule -nathan -riot -monk -invaded -reds -supplies -patent -manufacturing -feminist -employe -mou -ore -trick -dix -genres -##aughter -1847 -girlfriend -regularly -ravens -heinrich -pepper -employees -ranch -##inks -##andro -notice -farms -civilian -explan -purs -dollars -minerals -jamm -##zig -orland -agencies -layers -bolivia -credit -##iar -##oric -twins -minne -##ographers -##ographics -##keep -supporters -1846 -clement -defeating -travels -##bat -##uni -leap -spots -trio -georges -protocol -connects -1838 -azerbaijani -mathematicians -tibetan -gm -yale -##oan -##lar -##aru -aragon -abst -1840 -ultra -chelsea -paraguay -##hill -spa -martha -tril -morph -khal -bloom -kazakhstan -phosph -calgary -lucy -juda -goddesses -sidney -divorce -mast -##ailles -##rated -##esia -##ede -alk -demographics -delay -leslie -struggle -conspir -pump -unless -preserv -emil -midfielder -rhodes -assassination -powder -vendee -nac -##arr -styl -##chw -##enger -radical -##ushima -preparation -abbrevi -leopold -disabilities -ura -##umber -companion -luigi -fundamental -commentator -patterns -##rent -alv -shog -sector -##avian -enforce -monica -frankfurt -covering -cairo -##noon -contained -116 -buddy -entertainers -requires -mirror -interesting -apartment -scandal -fing -idol -1841 -sindh -##here -anders -##opp -seine -error -##ipei -conquered -cancelled -fasc -lod -##dp -##34 -leeds -circul -destro -width -plateau -ingred -vall -##rams -##izer -inters -1819 -babies -suzuki -entrepreneur -tone -wides -yemen -itv -acoust -themes -equations -explained -robot -crazy -kenya -tournaments -telescope -ticino -jas -vag -wer -##rest -towers -monuments -mines -##olds -wait -trials -indicate -experiences -bennett -elisabeth -aids -nem -##tz -afternoon -blu -output -submar -##going -drown -constantin -fukuoka -faced -##chang -heights -##iman -##anga -britann -skys -hearts -##ija -taipei -graphic -catalonia -monastery -fis -hok -lets -##aser -##omagn -##utation -##avirus -rooms -hercules -endanger -##agonist -measurement -theoretical -subdivisions -gig -pdf -##bruck -quiet -subway -landed -georgian -nicar -shopping -brabant -koch -##phis -##acles -bras -organism -latvia -savoy -lucas -carroll -riots -binary -pall -##hem -##arte -leuk -##oks -##ahan -announce -traveled -ltd -hollow -1500 -quad -##ciation -##ynth -olive -tribute -costs -sculpture -oblast -diocese -orlando -minneapolis -##gender -jungle -argued -scotia -geographic -lamp -##fb -##mare -##icz -##imal -neither -norse -resemb -doors -everyday -closest -adaptation -tad -##bles -##mill -reef -repr -genoc -cultiv -1839 -##flower -psychological -crowned -jesse -acquired -judaism -cun -##isse -##icus -consequ -farmer -##iii -dissol -asteroid -cogn -hod -kot -##hd -chip -artillery -instance -aviv -turb -victim -automatic -transition -sainte -registered -compilation -theodore -inducted -##fr -##mish -nept -##pert -guards -duo -dances -soprano -thanks -specifically -roma -effort -strongly -cornell -ambassadors -tunisia -horizont -nephew -intellectual -gc -##miss -marqu -honors -harder -phillips -##lemania -gymnast -dong -hij -rag -flew -essential -siber -guatemala -##apest -penis -mccart -gut -gap -##mus -##uru -preserved -quint -budapest -arabian -likes -categories -namib -lithuan -peruvian -ellen -diana -rol -editions -breeding -variations -screenplay -jeremy -princeton -negot -policies -randy -mozart -empty -dying -##eric -##aters -##asian -##apse -wore -manila -barber -harper -turin -autumn -marcus -kart -wc -##dal -forty -canon -carson -equator -troy -drawing -remembered -vat -emily -rash -##07 -##enza -##oves -schm -smash -metall -greatly -profession -wrestlemania -increases -infectious -substances -shapes -##claimed -suddenly -theologian -##ulus -marketing -instructions -airplane -endangered -gases -tooth -stol -mayenne -feeding -servants -easter -artistic -windsor -kawasaki -overseas -bj -jp -mcl -pix -##zing -##rist -thames -boot -##assium -attraction -subsp -##yla -filmed -fights -nervous -pover -sask -util -##san -##dr -##fi -##vet -ish -##akov -muscles -cohen -barack -expect -societies -potassium -teenage -dorothy -##eli -warfare -palae -##oyd -facebook -centenarians -hak -chester -strate -sporting -defeats -grove -##while -weakened -printing -wess -##tor -##icon -##istere -136 -1789 -dialects -stevens -maritime -hull -##sson -##ku -##32 -##iane -diag -aggress -soci -legion -passage -crashed -waiting -lynn -gothic -tape -##enstein -##atically -sole -avatar -receiving -copa -architects -ahmad -ji -yes -##wyn -prohib -##ondo -twitter -relief -sox -berm -##eshire -secretaries -inqu -##isons -beings -##bery -##ecies -mohammad -skater -railways -350 -therapy -oriental -mlb -yo -##rors -##cal -##pass -barrel -grandson -chev -ratio -dalmat -achieved -escaped -mickey -cou -kdot -mumb -clause -providing -hoff -1801 -qualifying -bringing -##rimination -strategy -kane -moll -mikh -##gard -##idays -##anton -colin -depth -emph -divine -retail -##unden -mills -libya -wolfgang -slovakia -rodriguez -chateau -kara -##roll -##fried -shak -clou -shares -republicans -widow -platforms -ancestors -##point -opportunity -bip -saving -tact -winters -##aming -##agar -neu -perp -##plays -##castle -fortune -electromagn -abandoned -##bie -##onna -##igar -##agers -##apa -components -skating -owns -roller -veloc -nitrogen -lahore -absolute -stall -floyd -##igne -ferry -survival -##pread -krish -eagles -poverty -ik -vector -iso -orn -marcel -roth -carey -solomon -appearing -1790 -sherman -diamonds -travelled -advertising -##olithic -aim -dil -gius -##gon -##inter -##olar -##ashire -merit -markets -memphis -embass -duchy -vampire -##enter -shiva -nationalist -geology -discrimination -advice -madag -hardy -monthly -rebel -briefly -lancashire -hondur -widespread -##oire -cox -regent -highness -missions -transgender -karen -zeus -princes -investment -dollar -pushed -detective -lithuanian -pairs -##cill -communists -matters -rosen -clarke -iraqi -inaugur -genocide -erupt -lymph -ank -##abwe -tech -surrey -finistere -territorial -persp -##uccess -unsuccess -kobe -bism -hain -nass -caps -latest -feder -experts -oslo -1813 -philippine -generations -lungs -giuse -erect -##kre -##rant -##fire -plates -neo -##acco -##strom -carrier -bend -bits -fol -rut -##yu -patron -attacking -mcdonald -rape -##boro -friendship -wiki -contributions -palestinian -vessels -fay -tight -##iner -##uns -quit -collaps -vertical -strikes -czechoslovakia -madagascar -jos -wich -##09 -##kshire -indianapolis -refere -celebration -bahamas -gradually -##zel -##iso -##rets -chu -carries -allan -blake -specim -athe -1832 -weekend -crack -descendants -sas -##hall -##hyd -##itled -alc -fled -##cium -operates -moral -sexually -1831 -bayern -shangh -somerset -##stood -plural -##avi -peas -repair -smoke -oceania -bombs -originated -efcfff -##kel -##wana -##isie -##etch -deck -##awi -flies -hels -chronic -corp -entering -duties -functional -cinemat -replacement -saskat -roc -rockef -255 -shortened -economists -innov -bonds -determine -penguin -nig -sizes -##chair -##usters -adr -joel -valu -operate -cycling -shooter -bicy -ios -tens -wo -##sd -isot -##otten -##eback -canter -marian -##ahu -regiment -corruption -topics -woody -replacing -1835 -tsun -cartoonist -dug -prosec -##ubs -##ahi -1818 -libraries -logo -##egro -indicates -rejected -mb -##case -shear -extensive -donkey -pretty -reptiles -jen -lover -inject -client -##chez -councils -caroline -1842 -candy -abbott -##strong -eldest -finishing -survivors -urawa -mumbai -karn -laugh -nc -##found -input -##acht -drives -##ettes -verdy -strongest -1814 -mccartney -##bral -##mate -##alus -##ebrates -engaged -bris -dock -tract -salam -puzz -resignation -manufacturer -forbes -emirates -diver -haven -lub -noun -tune -##hart -##eer -reward -chak -monaco -nato -tenth -flags -bhut -jessica -coronavirus -montenegro -rockefeller -##44 -leather -##erner -##aze -##otted -patric -voters -hil -kad -ti -##storm -plague -swing -naked -zimbabwe -dere -fen -zones -##unes -##inda -##riers -midnight -councill -shoes -homo -treasure -slovenia -dent -zar -##asse -##urb -spelling -tram -##anca -doug -1820 -aargau -alleged -gandhi -algorithm -bm -dion -rna -##alis -hey -conson -meh -prove -harbour -remake -nights -nobody -identify -concrete -expressed -macedonia -giuseppe -dys -stur -clif -scales -drinks -schles -airbus -superv -crossed -deputies -watched -astronomers -assassinated -fbi -wre -##enbach -##derland -bark -sensit -respond -guitars -esper -aspects -delivered -loyal -voltage -##vich -##orrh -##aken -edwin -##cycle -valencia -merced -1824 -bavarian -citizenship -elevation -innocent -slovak -flooding -volcanoes -exercise -toulouse -fraud -##athy -incidents -restored -magnet -quarterback -attractions -arsenal -tasmania -shanghai -gest -##kers -toys -##olan -gram -establishment -attrib -exposed -##istances -lesser -processor -hopkins -powell -##ytes -dramatic -distances -pag -qatar -##dum -beeth -chir -europeans -signature -##tingham -meanings -##ucker -##aja -measuring -lorenzo -bedford -dispute -consisting -venezuelan -geographical -gad -rams -theology -phon -berry -pointed -happening -collapse -##ihara -watching -jets -lok -piz -violet -xia -##wil -stem -##quer -matrix -denomin -administer -essays -shirley -trinity -cambodia -canterbury -cc -wak -##ych -##mons -##rea -##urst -chains -chorus -transmission -1833 -boroughs -moss -##nu -##omo -reh -##etz -##ogs -driven -motorcycle -physicians -arrangement -bruns -pulled -comparison -acoustic -tanz -yuk -##lif -##iture -orph -##cca -##isexual -wald -graves -greeks -melod -falcon -##uting -armstrong -millenn -kingdoms -graub -visiting -conquest -doubles -besides -cerebral -pomp -welf -stamp -beating -alps -evac -practical -dissolved -guerr -jammu -leukemia -kyr -##ean -reorgan -bea -teleg -phara -brass -ecology -##ucks -streams -balls -promised -performer -ranks -legislators -constantine -juris -reconst -graubunden -hos -rates -##clusion -flames -symmet -flowering -advisor -abbreviated -wals -sheph -helm -organis -turbo -visc -firearm -puts -rainfor -celebrate -observatory -shuttle -thrown -obituaries -mikhail -ved -##alu -##icate -congest -blair -belize -epid -superior -extinction -copyr -oceanic -beaches -fluor -leafs -voyage -rebuilt -trilogy -mum -oss -##55 -##arez -annie -compositions -nucleus -typhoon -chambers -kok -zen -zamb -shi -plum -prep -saur -bars -sally -holmes -##loo -versus -tomatoes -tenor -tribal -determin -marathon -mog -raz -wav -##gel -##cus -alleg -centres -##ausen -maya -permission -apply -villain -motorway -rubber -shawn -spectrum -prostit -beethoven -dome -tons -xx -##zzo -anj -sco -monroe -detailed -yamag -tobacco -breathing -occasionally -gift -mice -pour -rope -##hand -##krit -##mm -unp -##cester -amar -noah -explains -factories -1803 -rotten -sevent -rabb -icelandic -einstein -kron -och -##08 -##ctica -consec -milton -verse -burton -morrison -duncan -infections -tables -##aton -comfort -cancel -herm -##ensen -##ugu -glory -affects -pulmonary -mammal -trinidad -achievements -experienced -doming -hesse -nwa -##var -rect -##ird -admitted -emmanuel -biologist -urdu -panel -hospitals -translator -losses -samples -rapidly -hag -kern -##mu -regime -europa -batt -publishers -technologies -funk -oliv -1829 -racer -introdu -marshal -responsibility -wheelchair -vocalist -interactive -sessions -wichita -mits -##fts -##atively -##orne -##ropri -azte -accounts -patriarch -fiv -fate -ming -##rt -##anmar -##arts -terra -114 -defender -passion -memoir -1806 -rainbow -facing -cherry -violinist -griffith -klein -dip -dors -gan -rats -##gets -##esi -##uli -pear -pride -twinned -112 -chef -fighters -adopt -flowing -installed -orbital -pharaoh -cure -dorm -dash -pound -rust -rex -##osy -guad -venom -affili -andrews -bradley -stalin -curtis -##otype -lesbian -aunt -ci -##dia -##ilian -prizes -referendum -castro -papua -identical -sleeping -knockout -kitch -##lain -##dan -thy -##alay -mang -manual -scores -beta -electro -##anco -instruction -holidays -meanwhile -cannon -fortress -celeb -hautes -shogun -neptune -dow -dans -##erver -##ande -spiders -preced -internationally -christine -##ructure -enterprise -readers -haj -rider -ves -##rina -reyn -unified -##allow -outl -winston -periodic -lessons -bassist -practices -prophet -minimum -welfare -gear -kol -rent -##iad -##iration -component -blade -coral -tsar -gim -lich -##mal -android -##imate -##unciation -##avan -newfound -compact -blank -perth -instant -dayton -obst -competing -drain -trucks -trigger -uzbek -subspecies -dual -heter -jura -##eres -##alam -##reh -##ules -sait -multipl -1793 -declares -emeritus -valuable -eats -##jun -monitor -basel -revenue -holstein -fallen -1792 -sediment -oxidation -lying -##nut -##lis -als -swift -understood -##lette -##inki -##flies -##ipeg -uruguayan -situations -antarctica -conspiracy -dess -fusion -##anta -nottingham -modified -turtles -timber -1795 -goalkeeper -mahar -collabor -portrayed -sanskrit -brunswick -consecutive -bisexual -fm -lp -mau -##uve -inch -anast -##utor -atlet -##onda -trap -augustus -avi -##kaido -receives -naturally -pianists -request -fairly -participants -belonged -terrorism -earthquakes -manipuri -mongolia -clouds -telegraph -wander -##rite -newcastle -##ahon -volumes -gibson -beverly -hort -kul -pab -shif -prices -marina -bases -pref -seneg -persia -cnn -ticket -yard -##arium -##osing -claus -calcium -holder -hotels -psychologist -circus -##harmon -praised -taiwanese -nurse -stefan -450 -holl -pounds -vie -manner -releg -burial -climbing -mrt -participate -bacter -ussr -ninja -sue -##jing -##stown -stood -1600 -grain -racial -timothy -freshwater -##opods -whale -marne -hash -##iza -surgeon -portal -specialized -handle -restoration -##affe -somalia -##what -henderson -fivb -aes -fog -sack -##lag -proport -clara -coop -##ffield -##azi -overth -victorian -1900s -ratings -eugen -kurdish -saitama -dais -ling -##mates -##enon -conclud -scat -scout -mead -dominant -cutting -atlantique -focuses -eruption -##bane -##atural -tosh -##osta -rook -conscious -purchased -autobiography -grandmother -iphone -nile -##atel -leip -peer -marco -diary -partial -janet -openly -superman -1791 -naming -dirty -continuous -lebanese -refugees -paradise -horizontal -ub -##anor -##elt -##ieth -hermann -deport -cran -1822 -prair -bengali -sauce -revolt -infrast -atletico -boul -fuk -gau -vish -vest -##horn -##ław -##reen -ears -##alez -underwater -battal -designers -burrow -martine -1834 -viewers -facility -hatch -##chaft -frost -tongue -kumar -cavalry -bermuda -dag -fritz -herald -coy -natal -dukes -courses -flora -sno -bombings -lyric -benefits -merchant -fingers -patricia -bout -cork -hiv -nih -##lore -2020s -eva -scheme -ecw -filmmaker -penny -1810 -##ucing -architectural -concerto -lorraine -clearly -stripes -experimental -##anas -##imer -twent -managing -radioactive -grounds -partnership -excellent -ered -##kow -shells -somewhat -infected -benz -connecting -miranda -customs -pollution -nutri -butterfly -koppen -bourg -hul -##eff -##aches -brick -angola -angela -connor -mighty -appoint -suspected -wizard -copyright -##bank -##rag -##card -##alan -dew -bills -graf -honored -rosa -sailor -customers -helicopter -##ozoic -ecole -fug -##xide -##thy -leak -dischar -desire -winnipeg -biologists -sanchez -civilians -probability -tao -jumping -jamaican -mv -rally -tav -yun -##onaut -##isi -sega -indu -parade -governing -belle -gloria -decay -middles -professionally -firef -drake -##ymes -microsc -jeffrey -kenny -1800s -emotions -gonzalez -senegal -##vic -infer -forth -##ovan -comet -##orship -calder -##ennial -##anning -verde -flemish -pregnancy -shallow -basilica -bard -flex -misc -pf -pia -##oque -##31 -frib -shift -sup -##ango -highways -slip -brew -properly -beetles -exclusive -ink -##olia -##ogan -grim -herman -evan -relation -seek -thoughts -astronomical -bruno -discussion -locomotives -##now -##atur -##bern -##angers -##quet -reputation -gerard -helena -editors -classics -counsel -1828 -##rises -meetings -cabin -buddha -consisted -immigration -##bird -narrator -valleys -ix -nh -xv -##bes -pocket -##quez -##ushu -##chewan -conductors -algae -ancestor -inherited -abstract -##gus -##rice -##inas -##urers -##using -##ourse -##redited -palmer -1827 -deadly -conquer -##borne -perez -taxonomy -saskatchewan -##lia -##imi -sings -subfamily -humanity -jewel -timeline -contestant -rudolf -belarusian -acceler -wcw -hes -nure -tah -isbn -wei -myanmar -breeds -variation -chemists -1825 -pilots -surgical -alexandra -roberto -monkeys -mia -til -tough -##eus -##tes -##isible -chit -unic -abel -indus -warwick -berne -translations -saxe -fatal -concepts -laurent -theorem -veteran -##iciency -lunar -laureate -ceremonies -pilgrim -aph -dul -zam -##lio -willie -provision -freeman -climb -observer -asking -jamie -certified -nicole -enforcement -##kreis -fribourg -cip -tac -##hist -##cz -##cio -##cup -##vor -##unct -##rities -##ernal -laf -diving -organised -arrives -##lingen -palatin -burma -mccl -bonus -cuis -maintained -achieve -newfoundland -rw -##06 -pla -abroad -##ibal -disco -moist -consumer -jef -interests -interface -membership -publicly -labels -kirby -rafael -norton -namibia -bhar -f1 -##ische -##aders -partially -worlds -smell -feels -begun -hamlet -prefectures -babyl -icon -diplomatic -resource -golfer -mercedes -celebrities -bce -vibr -beast -corb -discontin -representation -##ushi -satisf -santo -reacts -coverage -##enhagen -palatinate -cater -hate -kub -anx -whales -unity -undert -armour -differently -everybody -teachings -1826 -liege -aquatics -baku -tonight -aberde -guadal -gael -##57 -stern -shizu -unoff -belgrade -intent -traveling -homeland -hartford -bind -bail -lup -##order -upgr -##cessor -##mberg -curse -stafford -kuwa -bleeding -francesco -puppet -infrastructure -sends -wider -##court -##vious -##onist -bean -##emann -##ipel -sheffield -##rup -brings -flint -1700 -noise -geological -getty -barbados -rises -annex -boundaries -malaysian -abolished -embassy -dud -kak -kyle -tanks -##olin -carmen -indie -glou -speaks -claire -pulse -suspended -travelling -reservoir -nicaragua -cly -tus -##200 -##eni -forec -beans -notation -musk -schwar -##ront -versailles -hym -circular -corporate -cruel -emotional -linguistics -fungi -helsinki -organisations -leipzig -dot -mt -tent -vista -##uing -ants -##thus -##ivisie -comte -plaza -##aby -poles -creature -hardcore -judg -postal -dhaka -velocity -honduras -eredivisie -gale -pud -##stra -##adeus -mayer -cob -eps -rounds -interviews -belonging -pronunciation -intelligent -attracted -playwrights -ligue -sink -##gia -##cor -##ener -inland -newman -counts -founders -christina -operator -brandon -carniv -prussian -ferrari -nago -vine -##anti -gramp -joey -populated -angles -developer -variant -samurai -##flix -1809 -nicol -exploration -equality -maurit -constantinople -hi -rita -saga -##jara -##mb -##sten -##opy -valid -berger -entitled -moses -wheels -habitats -schleswig -hik -raced -sending -trop -##eries -##enas -##icing -##ilder -prest -##antes -abortion -remote -topic -khy -hybrid -reader -oceans -cyclones -recognised -bom -yards -##pin -##ellar -doo -secure -populous -emigr -impression -animator -netflix -hook -1817 -1804 -mechanism -clarinet -illustrator -ae -vig -##lav -##idt -##alla -polyn -germanic -interv -##riages -patt -judicial -aquatic -frozen -cameroon -ingredients -millennium -cake -hd -sikh -##iwa -##uous -##px -##imony -##abited -willis -124 -arrival -liu -maxwell -bailey -conflicts -withdrew -maritimes -nebula -derek -##yard -##right -##inity -mugh -mali -efficient -sendai -forbid -accurate -spending -meiji -bankrupt -borrow -difficulty -mandela -folklore -mcmahon -nagoya -bik -##oibi -heated -spark -notre -warn -cups -pierce -suicides -bahrain -andreas -aleks -ero -fet -nes -##aso -##thum -stutt -##raid -##auf -johnston -reducing -anywhere -aval -copenhagen -lima -sharing -sailing -sheriff -bradford -ellis -stuff -phillip -dependent -##harmonic -eh -qing -##unn -##acio -knife -parth -upcoming -genesis -recru -amanda -outbreak -kingston -revival -irving -davies -squir -lithium -amazing -ethiopian -advertis -lisbon -jorge -ahl -cors -ole -tup -toler -##ayama -perm -emin -##eneuve -hyder -bronx -spiral -spirits -##zhou -adjust -vernon -ora -sach -stere -##idian -muss -capable -valais -garonne -tasks -reformed -catalan -rahman -subsequently -pyramid -cooperation -shizuoka -pant -wise -##sv -##fu -##alom -##oline -proce -manage -funny -decline -cooked -encry -please -libertarian -joshua -collaboration -aluminium -df -kee -lent -pbs -##jay -##eros -##adh -##rov -buzz -1807 -##osystem -nie -vid -##aise -##jin -thread -spart -recover -##otta -obvious -##gyz -##eroy -destination -##yeong -harvest -trumpet -dimensional -wessex -congestive -bess -jake -koh -omar -pwi -##des -##omorph -##cho -presents -##irect -113 -organist -shoulder -1808 -juliet -hawk -homeless -dishes -lucky -administered -pets -##vas -##chet -##angu -##amma -archipel -##afa -##hammed -interpretation -antibiot -kyrgyz -fake -higg -kend -yan -##atha -directions -240 -raising -solutions -richest -turtle -##archy -##borg -concord -segunda -updated -lenin -referee -lum -md -tues -##alf -##orno -roh -drunk -guess -##urring -whereas -##undy -##ungs -stopping -structural -mahm -legislation -hemorrh -cameras -elephants -discoveries -stolen -erit -mys -##len -##enov -deals -afraid -##spring -##icking -amber -governed -baroque -divide -arrive -##icester -hunger -cartoons -anthropology -disappeared -crawford -submarine -gior -##notes -seth -traged -commod -genome -crom -##obia -reverse -1870s -1823 -separation -accidentally -regarding -sustain -vatican -espan -lens -##thon -##amine -##igs -##odyn -basque -##ammy -1798 -pastor -clarence -tunisian -pest -##hed -##agus -shon -##ckland -cluster -abilities -sheet -adri -disagre -##actic -glad -micha -salem -berkshire -richardson -##centen -requirements -veterans -wesley -##continent -nippon -##cin -##apor -sperm -sportsc -straw -lynch -hindus -coronation -##rahim -residential -brisbane -aka -lighter -sous -vau -##eaux -##mail -dear -knox -recovered -timor -bernie -stadiums -package -brigade -abbrev -nuremberg -bamb -hast -nir -pter -##nas -##days -##stop -wasn -##opus -conce -button -coin -edit -edges -1910s -##ylan -greenland -memories -ult -##elda -segment -contributed -syll -vitamin -guru -strugg -khyber -cesar -##edo -fris -enh -encl -marines -grid -popes -soy -##inski -albany -quin -supercenten -1880s -speeds -karachi -shinto -terrest -orbits -benefit -integrated -hormones -imprisoned -nacional -wax -yar -rever -beef -##illing -clown -yearly -##gradu -brave -archie -varies -sanct -talib -knowing -analog -vikings -synthesis -appeals -purchase -explanation -iber -lj -##chu -fork -heating -##rab -dres -mature -amongst -##iffe -embry -disputed -viking -chromosomes -oy -##nian -##yg -thumb -##adays -##idan -mank -noticed -##rys -sofia -info -milky -skiers -magical -antwer -judy -1805 -accessed -arcade -asteroids -siblings -strategic -moderate -prairie -ai -##master -sta -shre -sham -marse -##sein -##tenham -##ilyn -quot -export -##engths -humor -nowadays -pawn -ducks -squid -limestone -1788 -campaigns -logan -boliv -optim -contribution -treasury -shepherd -nt -tj -ties -##vae -##asma -stir -stuck -##otam -leicester -##ipse -blast -edo -wellington -##cheon -josef -recognize -vacu -shirt -percentage -whitney -elliott -lal -zinc -##nar -##cos -thurs -toad -##amy -##chten -##adder -##idd -##terdam -somewhere -saff -legally -loses -hof -jacqu -exception -reforms -relativity -##brook -lancaster -shimizu -aure -kann -lance -##zin -##leen -whis -##okes -grun -##ibe -saar -mainstream -malt -researcher -1816 -gros -observations -dictators -abbreviation -ses -tack -##jat -##onica -beam -##emp -##ifies -##phones -mush -slide -invention -anymore -connections -persu -nobility -congressman -viewed -durham -elsewhere -slovenian -martinez -hence -vand -toled -fraser -##ulum -preston -agn -acute -111 -slam -surprise -mya -hyde -ideal -darkness -prede -neighb -celest -smoking -accompanied -browser -##ropriate -jol -nerve -##jor -conrad -sheikh -##eryl -##elyn -madonna -entertainer -programmes -assistance -drawings -wednes -migration -bicycle -terrestrial -caves -jab -mug -yah -zu -##fit -##anu -##stead -##etto -##see -allah -schl -directing -##ierre -traits -lasting -martyr -1811 -abolition -locomotive -electromagnetic -antwerp -##atan -##etown -heather -##lywood -silence -cameo -hallow -##yev -galile -gardner -cubs -propag -commanded -alternate -columns -doubt -griffin -enjoyed -yankees -mysterious -supernatural -fount -kof -zomb -##ivo -plac -abdom -brut -swallow -skir -##boys -announcer -roses -odys -communicate -wears -floods -pablo -dinner -jelly -lily -sne -##05 -##chio -bride -monet -highland -landmark -sunlight -judith -jerome -influences -lifestyle -##optera -kidnapped -hypothesis -sensitive -flet -gly -hbo -lago -pagan -riley -##gent -##orgh -inten -##roc -##opod -##igata -exclud -amor -weimar -elvis -eleanor -romeo -1776 -hayes -philanthropists -boc -gog -kai -nud -tarn -##pes -##itian -##leum -isles -##opic -bei -##ebe -crater -portable -filming -antlers -macdonald -antiqu -slavic -hedge -neighbouring -bis -cia -hank -##ithms -##ebody -worm -indies -phant -overl -134 -philharmonic -nearest -villeneuve -flyers -rebec -estimates -diversity -subprefecture -toledo -della -nab -pend -##eka -##cock -##sted -##chel -alder -chenn -seals -##illion -cologne -prehist -baltic -journalism -temperate -ruby -monarchs -gamba -suitable -preservation -britannica -stuttgart -##aan -##bound -##coms -##cube -##wang -##eny -##urrect -arlington -caliph -##azar -posthum -governorate -hosting -espn -bullets -1797 -medicines -reacting -sprint -segreg -##enzie -fernandez -concerns -encouraged -##jee -sect -peg -boom -hered -acres -wein -milano -retiring -avo -churchill -crab -happiness -##bourg -rotterdam -hello -columnist -fitness -constituencies -goaltender -cac -##gae -##cu -##imedia -uncer -##ako -commem -adm -##idea -guests -apoc -explo -sociology -ruins -banking -ethics -ashley -teenager -toll -lombardy -circumst -germain -ioc -pond -werner -exit -doom -jung -##ambig -approval -boxes -ownership -gentle -furniture -khamba -perspective -walsh -reynolds -twentieth -##chus -##okee -waterloo -remark -tempor -antip -1850s -sitcoms -distant -theatrical -predict -kanagawa -aerosp -doctrine -punjabi -satellites -archipelago -dell -ongoing -unh -convert -##ardi -##velle -missile -ecosystem -sanf -##atti -drove -chlorine -barrier -tsunami -jurisd -cuts -##asso -spo -triang -parma -artem -grab -pakht -sharon -waltz -sergei -infinite -prosper -unofficial -cb -sams -tud -zagre -##iate -##koping -##utz -suite -newark -scots -##cluse -greene -strand -consoles -statues -cros -##cripts -##ombe -chiefs -promise -fault -tuber -synthes -##iveness -marinos -##onymous -sergeant -sponsored -craft -dylan -hms -sits -##hor -##mith -##icit -##iments -shane -marble -##ibo -brent -recovery -mao -editing -armor -retro -decoration -burke -cyan -debt -##beck -brandenburg -favou -algerian -occasions -aristoc -uganda -##smith -jill -rost -sass -##gir -##once -fran -##unkh -##tering -marilyn -121 -impress -barnes -load -assess -auckland -1890s -teaches -collision -cello -sequels -contestants -nutrit -mohamed -##cribed -chennai -zagreb -gond -porter -##ulia -counted -worksh -interc -castles -torres -logical -navar -automatically -hawaiian -fabric -niigata -wavel -uzbekistan -tuesday -burr -##oard -##py -##annon -samoa -announces -primitive -theaters -somebody -alfonso -3000 -concerned -accomplish -sacrament -antoine -fischer -pizza -tanzania -cott -katherine -##esian -##apur -ley -spell -##ifest -##issel -relay -starr -defended -porto -premiere -coached -curve -costume -medici -hospitalized -sailors -pigs -licens -evangelical -petroleum -tajik -jal -##bons -##onn -deity -lever -worms -clergy -guang -anger -patrol -realized -devils -melody -fergus -atmosph -continuing -wolves -kosovo -cecil -##keepers -kuwait -hyderabad -intense -##unkhwa -yak -##entin -##ivery -##immer -shrew -##oco -##acks -phd -offspring -##uga -secrets -swamp -##cius -230 -silva -toured -frequent -breathe -mansion -exposure -auxili -pakhtunkhwa -gos -gome -glands -lemon -lift -rous -stap -##opotam -proud -marl -brush -larvae -squar -protects -digit -brands -oilers -estimate -neighboring -tornadoes -jed -ns -wimb -##equ -andes -##sts -##iry -disambig -mayflower -uprising -releasing -interchange -defending -suppl -madh -##iences -cyclists -manufacturers -hutch -cafe -cena -ding -pasc -##atas -sting -bees -##iku -sworn -trapped -metre -makers -mercy -graz -paying -controller -inherit -wisdom -bhutan -cuisine -nied -##86 -##bage -##kk -##olism -hector -##icho -##endra -hert -feast -antig -similarly -describing -1775 -horns -rhineland -rouge -gomez -crop -fathers -ile -kamp -sapp -##nn -##lich -centimet -coleman -upset -greens -metaph -delivery -verd -camel -garfield -turkmen -1799 -optical -alcoh -michaels -ket -miz -ome -raven -##gun -##arms -deaf -meps -disl -displays -trace -belly -eden -##ashtra -##engo -grover -747 -apparent -choreographer -zambia -vacuum -casc -rode -ratt -sok -##chur -##adian -##cesters -rochester -harsh -intensity -specialist -continents -hunters -learns -practiced -piper -recommended -##chwitz -fletcher -##cestershire -pard -vince -zel -°f -##run -##word -ally -unex -marvin -##quel -144 -##oby -accidents -runway -madame -posts -premiers -boyfriend -toyota -subsidi -disambiguation -eph -fah -kais -oath -xiv -yours -##elius -##ctive -preserve -secular -austen -brock -filed -developers -##bye -everywhere -templ -motiv -urs -danube -##marks -predator -chapman -ranging -loving -inspector -vegetation -opinions -triassic -kurdistan -dough -##jev -beak -sep -ate -pars -overd -albanian -johns -italia -##otti -rockets -demol -##ledon -limburg -50° -##ofen -okin -##caa -qualify -renal -coloured -igor -isolated -cz -ef -jh -##onte -##atz -##iler -beats -##esset -carm -knee -stras -revers -supporter -burk -frontier -1796 -observation -scandin -diverse -tatto -preferred -##graduate -jong -lill -mitt -##atops -##ikovsky -consul -assumed -radar -1860s -##beth -etienne -gallen -selen -ez -vick -vale -##atro -##itta -##itudes -herz -##eca -schne -elite -coaching -semic -ambul -disestablished -carbonate -conducting -uncredited -hokkaido -kitchen -discontinued -hemorrhage -cigar -dai -fiji -##kok -##erie -threw -heal -chau -spears -bois -noct -belfast -##inness -gamecube -##ometer -radi -freestyle -latvian -1794 -1802 -welcome -commissioned -encoun -shelter -certific -gifts -circulation -relegated -ces -eul -wives -##list -##escent -hepat -shab -como -leh -schum -nationalism -##ieri -metab -signing -turks -cardiff -oswald -dominated -falc -employed -sulfate -lutheran -seemed -hannah -dart -kiev -rim -sg -##lov -##vement -##atu -##rove -##emi -mangan -abund -##000 -recurring -este -moor -maiden -##yson -rivera -##ussia -steal -##aek -handball -aston -viola -chapters -harmony -navigation -roland -rebecca -##ов -##orrow -##idon -##bergh -proto -enz -agu -##gren -hara -brack -25th -royalty -crosses -audiences -rebels -speedway -container -floors -iwata -fellowship -siberia -maharashtra -anxiety -forbidden -halloween -homet -jug -mash -sells -yuan -##0e -##aic -##nels -##oning -##atist -##alp -##reats -##andra -worried -hern -knesset -schiz -gujar -outd -impe -redd -bret -discovers -baloch -bunny -displayed -##adeshiko -momentum -pornographic -withdrawal -dolphins -##craper -aimed -dhar -ely -nadeshiko -oita -tes -##wear -stays -shipping -carac -##ouses -parody -flesh -dried -feather -edith -##aires -donna -scholarship -cedar -prepare -rookie -marseille -winger -##bone -##itational -anah -##iani -##iants -trench -communism -mafia -creed -juvent -harp -##ophy -socialism -tomas -involve -inspiration -observances -baghd -lankan -rifles -challenges -terminology -aberdeen -jacqueline -wednesday -qa -vital -yus -##yar -##aren -##reuth -asth -##illed -brill -##tenberg -juven -quinn -intermedi -obso -##aea -##loaded -sexuality -nordic -wimbledon -pom -##urus -booth -aggreg -soup -goat -220 -lasts -mounted -standings -arguments -utrecht -funds -headquar -nineteenth -bing -kag -ncaa -##yen -##icious -##olid -spotted -##ende -brien -amadeus -overt -retreat -confusion -eras -autism -visits -destiny -hitting -tribune -missionary -switched -mozamb -wealthy -750 -hare -##iors -##worm -##etically -arte -bladder -monks -varieties -cyber -tomorrow -philosophical -predicted -hemings -sieg -bolog -niel -pose -toile -reid -spher -fee -maid -slim -fuller -antony -negro -##oslav -omn -scorer -honda -mgm -announcement -kell -##bin -##itone -newer -roch -dismiss -brady -apar -##linder -collecting -rapids -rankings -jeanne -1787 -measurements -integer -lauren -robertson -vulner -muller -uranium -battalion -auxiliary -hr -##fred -##care -##itiba -whites -##raf -##umont -##akk -usb -afford -allison -flute -moth -juice -mainten -##obe -humph -farther -motto -mandarin -lucerne -buckingham -cerezo -chronicles -collapsed -grampus -thursday -cage -dfff -diane -rup -vy -##kas -aless -##ulin -shiv -unt -indoor -monter -mohammed -corinth -extens -##aldi -sandwich -moroccan -##kirchen -lonely -sterling -hometown -obsolete -cerv -##bel -##oes -##entially -countess -serp -flam -surin -maggie -salmon -titans -touring -breakfast -layout -kapoor -dictator -iodine -dfffdf -bant -hitch -lass -zach -theft -##theless -##agement -##oso -orth -##ifice -firing -fool -bred -resurrect -apprent -##yss -trevor -midlands -bergen -centered -cylinder -competitive -opponents -ivory -conservatives -peaceful -rotation -bibl -torture -orientation -bohemia -valentine -ust -xvi -##org -##asu -##ceae -##apo -canyon -##space -contra -crane -slang -210 -jules -bonnie -favour -peaks -vegetable -isabella -diaz -##recce -esperanto -canceled -cipher -squirrel -aerospace -##bard -##say -##chin -whate -congreg -canucks -aboard -##erness -murd -satan -petr -holden -blocked -inns -souls -gregorian -konst -sylvia -crystall -synthetic -arsenic -nutrients -taliban -sanfrecce -mk -rapp -##kinson -unfort -##rak -orton -neill -protons -conven -##ifax -carne -flame -buch -howe -##shine -characteristic -hotsp -bordeaux -declined -leonardo -explorers -antarctic -krishna -sapporo -tier -tna -zoe -##xi -##erted -##aroo -##itable -##elo -##avier -##ignon -##sha -halifax -sentences -denied -laser -##escu -cherokee -cromwell -whatever -ges -jaws -lund -##mor -##inos -chips -##imov -helium -teresa -shorts -cannab -lawsuit -ramon -fuji -additionally -eisenh -quartet -reproductive -cognitive -domingo -mughal -unfortun -dmit -feld -pach -andor -##amation -##eling -##adt -##ums -enri -##ustion -##oked -boots -herr -##ancing -busy -performs -deriv -candidacy -commentary -dodgers -phrases -dragons -pleasure -nouvelle -mascot -ack -cbe -cube -dana -fow -nd -vom -wra -##bf -##math -##odies -pean -hassan -maybe -saul -addiction -merger -##fordshire -##appa -birthplace -literacy -kilog -sailed -aeronaut -seriously -distinguish -##arthy -hispanic -aggressive -lymphoma -vaucluse -bore -gia -hale -nina -pts -##ilst -chern -##illac -clash -##auss -evol -reggae -barth -differential -russians -statements -taluk -groß -broadcasts -teenagers -hanover -tailed -renault -flavor -elliot -quran -ornith -atmospheric -pipe -wah -##erman -##leader -spike -resid -relax -symp -##shan -variab -squares -telecommun -succeeds -linguist -ultimately -astronauts -botanical -upgraded -bots -zelda -##etr -##ritic -abe -buying -verses -dense -familiar -participating -underne -viktor -dressed -glacier -##continental -destroying -reconstruction -##jatjara -squarep -impeach -biblical -underneath -bury -cic -eros -hague -pens -wies -##aul -##git -##vana -seiz -neust -##arya -webster -##ecy -##acts -recall -saone -mock -##atories -##araj -bihar -sunshine -covent -performers -panic -faust -##eldon -cathedrals -reserves -conversion -bacon -ricardo -vowel -auschwitz -discharge -provisional -ngu -ria -riders -tann -##ono -##asus -neal -##ieux -boog -emble -mupp -mcg -immunity -rodents -1777 -dealing -rabbi -frederic -answers -commentators -pharmac -##ktop -fey -naw -nuts -vr -zion -##56 -anten -seab -sphere -##acker -daytime -annabeth -variants -curry -##eldorf -jacksonville -abbas -pitt -tunnels -bundest -spreading -ibrahim -gaul -pornography -##chtenstein -anaheim -awar -eternal -kov -ner -tian -yi -##ycl -plasma -compreh -italians -protagonist -involvement -tunes -##antha -kyiv -excav -molly -adaptations -eclipse -schmidt -unsuccessful -schwarz -hanna -nim -##lant -##pi -##wy -##stock -##ersdorf -leigh -##atham -##acan -endors -mating -goldwyn -##appy -liechtenstein -messenger -jurist -mistake -chronicle -decorated -manuscript -dawson -batteries -maintenance -squarepants -bundestag -##arat -##atore -##rock -plug -##odo -adject -laos -defunct -shrines -mutual -pirate -sovereignty -tottenham -portsmouth -uranus -delayed -circumstances -alcoholic -baghdad -fier -gyp -ning -rak -##enes -##iland -shy -basically -median -##cience -augusta -invisible -soloth -explore -penal -integers -mortal -cowboys -beyonce -vertebrates -scorp -correctly -tehsils -autobiographers -decreased -burgundy -coordinates -puzzle -hepatitis -solothurn -650 -bie -hons -irene -lime -##cion -threats -chun -##alliga -pluto -##odor -tromb -##istress -slalom -voiv -presentation -balt -hamlets -banker -racism -walked -immort -orchestras -assisted -feminism -carnivore -havana -airplanes -cheaper -dwight -manuscripts -chevro -gains -wille -##hog -##xy -##edon -anh -##agic -cong -boards -##cession -richards -magnus -bonap -kashima -gravitational -update -clifford -orphan -fins -lava -mell -pes -pau -##iu -##rador -##orious -##lemy -chow -##oded -##apes -truman -##azaki -johan -123 -volks -posted -##ihu -lucia -praise -surnames -terrible -sacrifice -parasites -proposal -anjou -middlesex -eisenhower -dae -pius -zee -##mg -peach -##uddin -##cliffe -coaster -miniser -##erald -steep -ethan -environments -removing -abdullah -klaus -sylv -sahara -customer -advisory -ensemb -olivier -dresden -ecc -nish -oster -tension -##hurst -shen -##ckl -##abl -meyer -sergio -mood -accent -raiders -masses -sunny -digits -circuits -zhang -hydroxide -miniseries -bian -hail -hiding -j3 -morm -sagan -##hy -##nt -##anam -##amura -sticks -shor -marino -##acion -carriers -cray -crem -champagne -loos -incor -viral -obsess -hoped -offering -segments -immediate -decrease -##grave -reunion -advocate -tadp -clyde -erotic -sportscaster -pik -##uana -##mese -thistle -##ionale -seam -byr -plata -##asses -dob -sof -tracy -carefully -confidence -freud -devoted -hoch -raph -1783 -sanga -employee -subdivision -floating -gathering -carpenter -bugs -tissues -imprisonment -tragedy -bust -biss -dund -lia -vaugh -wyn -##jon -##jiang -##42 -thief -##cephal -alam -clip -disgu -amn -metam -##engers -annually -incred -militia -##alypt -venues -bombers -terrorists -sculptures -cleaning -##rophys -algorithms -drops -gag -tate -vissel -##oise -revel -comments -webpage -airing -emi -transmit -##ajo -memor -reporting -persec -inventors -casino -bulls -constantly -reformation -resistant -lambert -neighborhoods -apparently -hormone -wendy -drainage -dino -kav -nests -tast -tina -##eet -##oit -##rang -chee -trium -acron -appropriate -##olls -braun -numbered -obe -titanic -mccarthy -sweat -bosnian -cinem -option -applies -suburban -exhibitions -approximate -regulations -situated -obituary -fitzg -sociologist -bb -lb -pto -##ᅥᆫ -thuring -##inh -chant -shots -byron -##aved -arb -##gee -newport -##ikawa -##oughton -disband -brat -artwork -laun -##aeus -goodbye -associations -beaten -hydra -closing -stephanie -##zyme -suspension -archaeology -eleventh -discussed -repeat -tobago -gaming -harmful -wireless -vessel -nikolai -hierarchy -evacu -phantom -sacramento -cuc -cache -dau -fare -sought -tb -##bad -##mings -##chy -##rosc -bez -##action -formats -landkreis -landmarks -##loid -##axter -satir -prevention -sandra -bangkok -referring -bollywood -princesses -integ -trailer -throwing -ceremonial -vorarl -procedure -casualties -asthma -awareness -chevrolet -duss -kah -koll -nara -oman -sq -wen -yog -thank -thes -##ataka -toads -##umps -##ieg -manages -mothers -122 -maori -emm -gluc -##aneous -membrane -posit -secretly -processors -treaties -tarzan -savage -townships -aristot -crusade -professionals -gaelic -jah -lach -niss -##itimate -##thou -##thood -pepp -click -meal -offs -schwe -registr -weas -guinness -waterfall -##boo -revised -greenwich -shoots -strauss -##aptor -manufactured -succeed -dubai -cpu -seventeen -supercentenarian -bonaparte -a1 -cet -dove -ej -tart -vog -##yst -toes -##thr -##ctal -cheng -roche -##acon -presby -bash -dozen -##ussy -underworld -islanders -assum -mosqu -baba -##iyah -synch -tails -suspect -marsup -wonderful -cocon -chaos -portraits -tubes -possession -vorarlberg -aware -hc -tik -##pill -##olini -whilst -marries -abh -webb -counting -##ikon -##eye -##pta -engra -regulation -apples -noel -deserts -grouped -archdu -##burger -solving -angelo -victories -320 -macmill -##encing -cruise -liquids -extraord -feminists -employment -dirt -lecture -moldova -wreck -cull -gb -gis -hess -kus -naj -qin -tucker -##had -##tail -##las -stam -shu -margin -##assis -aftermath -agnes -loose -protecting -panthers -treatments -murderer -owl -barton -witches -ipod -imaginary -lizards -lamborgh -consciousness -nah -nxt -sis -##oys -##fur -##code -##isan -onwards -##ayev -forget -footage -##ssel -extends -publishes -paula -1784 -tellur -nagasaki -disputes -explosive -beneath -eduardo -bologna -fitzgerald -##fin -##olt -stunt -ort -polym -colore -designation -astrophys -separately -rubin -luxury -interaction -thermal -pleasant -rainforest -avalanche -predecessor -dover -luna -mama -osh -wi -##edge -inver -##emer -lep -leone -tears -carth -##sport -blaz -##ilding -desk -outs -airb -highlands -metric -##otting -##ophys -thomson -borussia -lester -doctorate -1785 -agrees -ronnie -certifications -subsequent -samsung -juventus -macmillan -gat -##zilla -##lay -##uya -##makers -##alous -##rooms -unre -##pps -schu -thrash -missed -radius -respected -hubert -goalkeepers -bellmare -eyed -removal -sprinter -aliens -bottle -catalog -nomen -initiative -intermediate -headquartered -bould -kens -sque -##hal -##pox -recept -fries -enlight -glam -demons -decimal -halo -competitors -circles -immun -dimension -challenged -nursing -artemis -metamorph -lamborghini -colorectal -kry -mie -pang -ال -##far -stating -franche -##abul -canary -excess -##ilde -apes -slope -matth -define -array -burmese -ruther -robots -hampton -arabs -hillary -##adox -flee -sabha -nagar -euph -beetle -saxophone -phenomenon -burkina -biz -faso -hath -hilton -jagu -vib -yong -##aid -##uo -##jav -##eses -incl -deities -chung -##osc -##estrian -spd -compass -joa -contents -relating -132 -wanting -naturalist -olga -potato -condem -targets -sequences -1763 -katie -venture -sponsor -conversation -cryptography -surrendered -volunteers -vagina -euk -henn -kung -##uj -sts -beard -##soon -syph -slayer -listening -obsc -26th -robb -keynes -ensure -patriots -gathered -mantle -disabled -nassau -honshu -incorrect -hig -jou -lut -nl -pork -rc -zap -##opol -cheshire -shire -##odont -##eba -mega -quentin -daylight -216 -totally -watt -melissa -catholicism -payment -casey -principality -bubble -surfaces -rectang -raphael -dair -hick -##rand -itunes -##ogl -southampton -genius -ammun -slower -matilda -demo -22nd -paolo -extent -equally -convection -panther -##theid -flynn -pageant -poisonous -deutsche -discipline -omega -deng -nost -pike -##eur -##atom -##alism -alma -##icians -goose -guam -invade -everett -remn -myel -myers -brett -supermark -sharma -primaries -citation -bowling -explosions -ngc -skyscraper -jenkins -karnataka -tubercul -dora -lump -sf -tired -##bee -##jet -thur -andalus -sto -proclaimed -usher -nationale -mae -mori -cyt -mcdon -ballad -psychologists -cocaine -ajax -absorbed -botanist -favourite -##cklenburg -ammunition -baz -nong -##hop -##wen -thal -##orc -##orah -##ications -##adier -spinning -tradem -whenever -casting -businesswoman -seeking -vez -facto -fleming -dunn -mercia -strengthened -distinctive -restricted -crocodile -delegates -bourgogne -strasbourg -variables -eure -foll -##vy -##elic -##olulu -##adan -alain -miners -travis -muj -bremen -sixty -camden -finale -sunset -collector -reigning -jenny -thunderstorm -outdoor -apartheid -presbyter -cush -gould -lc -mih -nm -##nis -##iem -##orio -##arck -##isition -healing -unst -probe -enroll -mare -mane -trunk -##ocation -adds -simmons -honolulu -privy -vicente -defenders -fukushima -mvp -##hou -##liest -##rington -##part -fres -neuch -northum -calm -133 -myths -##brian -##uko -superst -cataly -possibility -##ucn -wilderness -protestants -diplomats -lionel -##ommod -pipes -##claim -wolver -chromosome -simplified -kyrgyzstan -ferguson -dill -ffff -nue -wagon -xii -##cula -##enau -##itches -hew -unw -tej -manifest -##izations -cald -accommod -trem -23rd -280 -fars -varied -extract -sunderland -azad -olivia -esther -gasoline -##forced -archaeologists -laurence -restrictions -vaccine -consumption -caterpill -prehistoric -mozambique -botswana -tuberculosis -neuchatel -dort -dend -fou -fior -tcha -xavier -##tw -##fel -chrys -##ectors -twil -paradox -coburg -131 -##ype -slash -##eshi -lighting -humanitarian -##ructive -cyrill -protective -independently -1780 -dynasties -1778 -fantastic -generated -funded -calculated -herbiv -inflamm -nigel -tara -uter -wan -wade -witt -##cie -##roft -partition -twist -warming -buk -amm -preach -##ropods -##undi -locally -lego -vera -benin -1840s -attempting -funding -kashiwa -clinical -##books -mesopotam -granada -oxidizing -oriented -dimensions -scattered -eminem -innsbruck -cus -gilles -lus -mcle -pash -rational -rij -tiff -xen -##uer -##crap -invert -205 -unemp -##alli -mecklenburg -indicated -##ormal -gew -proven -subcontinent -sketch -##tym -##elfth -castell -donated -humanities -ballets -punished -damages -packers -guyana -rescued -nobleman -swords -amphibians -judgment -dusseldorf -twilight -unemploy -benson -cdu -het -io -kru -lau -mab -nakh -##bath -##cut -##omed -toe -topped -##ongo -trips -compat -##endium -indeed -twelfth -parana -amino -weber -eds -booker -elena -##iewicz -raja -transmitted -bertr -cyrus -franconia -nightcl -fert -curved -dialogue -aspect -##birds -witnesses -caucasus -consequences -prefix -rutherford -tchaikovsky -aosta -bates -gore -mistress -##bh -##amous -##add -##itya -##ifier -tear -gru -jock -earning -##anka -imdb -duet -malaria -##zek -institut -machin -lovers -kirch -ryuk -ricky -levy -rumble -stevie -essayist -jubilo -##music -subsidiary -aristotle -##crapers -gle -lighth -tuc -wart -##zt -##oris -##than -##thor -##tham -##uti -shannon -cone -scared -cron -emilia -corrupt -empires -bayer -semif -bonn -speeches -searching -brighton -quantity -shadows -lizard -##foot -lexington -recreation -dixon -skyscrapers -aztec -emblem -voivodes -cyrillic -boun -cis -hs -ily -rains -rides -##hot -##itely -reys -##irang -beaver -shia -pratt -doz -143 -##ulla -intest -civic -sunni -garage -rounded -miracle -liberia -taxi -controlling -kanji -numerical -bunker -neutrons -narrative -hijack -fountain -okinawa -aggregate -gland -mish -poul -vip -zhu -##hang -##fus -##mia -##ingly -hect -##andan -tect -bandy -irrig -topeka -associates -specially -labrador -ballot -trainer -theatres -bhag -diagnosis -weighed -pupils -neighbourhood -reproduce -tactics -multiplayer -mankind -shrek -eukary -bench -dah -hai -nude -nils -pants -##rish -##wara -##jak -##arp -##itted -##amon -##aryn -comprom -indirect -1750 -syrac -augsburg -archer -avis -parking -##esha -needle -darling -aquarium -couples -nationwide -occupations -lindsay -reduction -flooded -paleont -striker -clerk -##piece -pietro -anastasia -concluded -ambulance -schizoph -andorra -integral -bite -cited -nay -##estown -##berra -##ieve -spear -ups -##ulture -1200 -##urnal -config -vega -haun -medications -burnt -photographers -absence -bitter -geneal -concentrated -isabel -consultant -saxophonist -sparrow -renewed -daisy -oracle -eritrea -giorgio -odyssey -certificate -dord -hanc -iq -yv -##lord -##pot -##vd -##ml -isis -enzyme -exce -gron -herbs -agr -perce -resol -drought -buchan -majesty -midway -castel -aster -robbery -torped -garland -mixt -707 -renown -savo -1786 -978 -deposits -balochistan -ensemble -aims -cody -dic -pots -rfc -td -##ør -##enta -##adic -frat -##igible -shik -##ostic -exchang -##ikes -acet -barra -takah -passer -errors -devot -hunted -futur -locked -##wright -owens -bouches -banner -ecuadorian -heinz -aurora -enzymes -carnegie -syracuse -schizophren -saves -##onge -##entric -shores -placing -##ocated -clusters -livingstone -interim -30th -assets -downs -grains -characterized -developmental -chamberlain -definitions -calculate -carnival -kathleen -##hardt -thermodyn -horizon -bamboo -diagram -specimens -bambi -nutrition -neustadt -ebert -pace -##isle -##opters -##imon -unn -unix -##illas -canberra -adul -##ignan -##eps -surge -filter -##letter -##ylum -antimony -devast -melan -moselle -prompt -audition -commanders -claiming -commercials -neuros -crystals -##angelo -dodge -heavier -encourage -hammond -extraordinary -cels -dante -gin -iucn -mn -ness -nath -zak -##has -thie -##arina -dex -spinal -grades -registry -stark -careful -grandchild -receiver -mcd -330 -effectively -haitian -eighteen -colonists -conservatory -emerged -kurds -tobias -anthropologist -voyager -improvements -dissolve -bomber -nguyen -comprehensive -3ds -cory -joke -rik -sino -##tys -##onen -alter -##imoto -##andr -unions -marriages -examin -##ryn -mons -servers -email -##itsu -260 -vienne -antio -descript -melt -browns -separates -venue -savann -rolls -snowboard -lyndon -prevented -alexandre -butterflies -opportun -prayers -reelect -fluoride -temporarily -etym -tamb -wink -##58 -andr -shine -##umen -##umar -orion -brend -presley -##onscious -elm -collegi -winchester -safely -hussein -python -cruc -sinks -accounting -poker -cowboy -permanently -remainder -helicopters -volunteer -babylon -hereditary -pt -zack -##lake -##vant -anc -##osi -uncom -##uder -##sei -claws -##anges -##eyer -bram -##inders -alba -crescent -24th -##writing -erin -argue -entity -informal -planted -pottery -1782 -participation -promoting -##zzi -kerry -1707 -velvet -joyce -recommend -councillors -semicond -eccentric -np -rivals -tau -xin -##iq -##fil -fra -conclusion -sheets -##ouss -blamed -genoa -1620 -artif -jealous -interval -arranger -sometime -latitude -beginn -convicts -coastline -wilm -##islav -bolt -marcos -shrubs -omaha -cordoba -ghosts -yugoslav -utility -guerrero -antibiotics -saffron -unexp -manganese -willem -reysol -gou -lumin -poc -paw -##hin -##cular -##59 -##inen -##isk -stoke -forcing -##iston -nec -tek -boo -carved -minds -drom -interrupt -volta -##parents -ideology -behalf -inhabited -orchestral -1770 -ibm -carrie -illnesses -shaft -echo -truly -inauguration -reorganisation -higgins -tudor -syphilis -dz -kand -moul -nj -pistol -##atta -##agram -uncon -conway -thatcher -spies -##techn -elim -intercontinental -braves -villains -developments -malik -montp -reads -egyptians -mountainous -bombard -bolsh -landsl -significance -thorough -gymnastics -consonants -symmetry -telecommunications -tadpoles -collegiate -dive -hust -kod -tl -wounds -##dig -##inced -##raeh -seaw -##fering -boiling -##qui -##lectric -airs -belfort -##ocks -extend -gotten -scream -musicals -republics -renov -ashes -tablet -dolls -buffy -shelley -mazraeh -examination -folded -vishnu -serpent -unfortunately -cag -dd -dams -fiber -hyl -lips -oasis -uly -vort -vass -##hara -##bys -##eras -##inte -##itas -chord -aside -##athi -grants -##acent -perkins -providence -##liner -harriet -stealing -##itsch -remar -marty -restore -distinction -turkic -combine -julio -somali -methodist -1781 -synonym -charleston -theresa -jonkoping -pyg -randolph -freddie -cooling -pollen -mystic -neutron -lavender -kangaroo -ichihara -sacrific -fibers -attributed -viscount -natalie -patterson -cows -ips -panda -##bies -##itory -anita -##esti -neville -peck -mari -exha -clients -boone -##weed -##azu -imam -develops -physi -richter -journals -louisville -painful -medication -fauna -padd -aerial -boulev -poisoning -##kirk -##ithmetic -forgotten -improvement -cliffs -substitute -jurisdiction -pascal -nissan -dairy -hancock -kant -nymph -sins -viva -xiii -##hov -##vey -##ilion -##roads -fry -arithmetic -dias -nationality -depicted -smile -152 -albire -barrow -##ainty -entr -bulld -anthology -kyushu -telugu -##dfdf -rudolph -clayton -sophia -pentec -giul -macedonian -fragments -norte -dolphin -renewable -yamagata -hymn -dfdfdf -bella -lone -oval -wards -##wind -##jack -##stones -levi -manip -flip -trails -undergraduate -iris -flores -battlefield -joseon -punch -convinced -opposing -dunham -confederations -bryant -photosynth -nanjing -grandmaster -fourteenth -##eboard -hogan -deeper -deutsch -exclusively -bmw -encryption -overdose -spherical -cannabis -avispa -fon -fond -kush -niz -tiber -##rage -##mand -##orum -relev -##rob -chor -chol -##estan -singular -gerry -basal -##iera -bare -everg -myspace -assists -freely -veronica -astur -fails -soviets -hunts -transpar -boyd -programmer -narc -gabon -options -hansen -baked -phillies -stevenson -weighs -thornton -biochemist -dudley -forecast -hernandez -acknow -disbanded -dumb -gus -kut -rt -tc -yuri -##geme -##rise -##vre -##vik -##mium -clade -##achim -carriage -adele -##aceans -coeff -mets -lois -dividing -backup -missiles -physically -shortest -halle -latino -explored -halls -cheer -watches -jonas -polymer -salvation -nepalese -phylogen -telescopes -gauge -flexible -enrique -thuringia -##through -elimination -aks -dl -lied -wong -##igl -ches -##agna -##emes -shields -lea -allig -##ffy -##azzo -quarters -walks -irreg -incarn -##erville -firearms -criteria -coven -curitiba -choices -cellular -planetary -dissip -padma -kabul -podcast -conventions -bacterial -bankruptcy -pentecost -biel -fulf -gamma -hild -kow -kish -kolk -rp -ramp -tips -za -zip -##km -##ayashi -condom -spur -##ilee -swab -##grass -sims -rememb -countryside -occasion -episcop -50th -arrangements -sulfide -##balt -constellations -sankt -cunning -abundant -beds -eun -nair -pim -neolithic -cola -butt -noon -italics -simone -skip -bandleader -gloves -christie -28th -labyr -copied -hoover -cephal -samantha -toro -trojan -simpler -protesters -significantly -krakow -chandler -bismuth -hastings -dauph -bertrand -schizophrenia -irregular -aang -bisc -##gil -##76 -thurg -##asan -stack -##irk -##iped -tric -clive -norwich -##ensburg -flux -lax -weston -patch -sonny -occident -equil -astr -collective -willy -corners -draws -addresses -trafficking -concerning -mansfield -corpus -outlaw -maltese -vulnerable -trademark -unexpected -aal -cen -gong -naut -##nitz -##ratic -andres -##stro -stems -##adle -prosp -aryan -cannes -manning -##mans -blizz -perception -geo -##group -elias -makeup -obey -takeoff -settings -botany -adjacent -procedures -destroys -bipolar -snooker -hertfordshire -schneider -410 -gaga -gdp -nr -tand -##olome -204 -stup -chim -##ulates -pollin -carb -gob -regency -buchar -strang -##sheim -builds -##ijuana -execut -puy -ethel -combustion -privile -banana -alej -unconscious -umb -isolation -rituals -inquiry -specimen -lyricist -licensed -albirex -nero -##zone -##gor -##fields -##person -##prises -##ædia -##atl -##amel -##ulator -shang -##akura -conse -##arde -##anted -playable -monts -desktop -invas -treating -silicon -##afar -vegal -efficiency -garner -papal -1774 -editorial -custody -dreaming -polytechn -imagine -benny -habsburg -hubble -lecturer -lagos -ugly -apostles -jakarta -coconut -dortmund -bucharest -vegalta -fury -gop -laus -lunch -##iw -##iated -##frey -##arine -##alysis -##asco -206 -##agos -unders -suk -##ibilities -brune -blanc -himalay -elf -assyr -avon -##loe -deliber -##uckle -pueb -bloody -plantation -dynamic -masks -namely -confirm -clinic -wyatt -descended -clocks -wishes -jumper -daddy -shonan -workshop -gujarat -humphrey -dens -nets -viz -##100 -##anza -dei -chav -##agher -##ithec -##oder -arter -##ornis -charente -162 -attending -burt -woodland -curs -identification -rodney -falk -jennings -lobby -tulsa -khalifa -hodg -inaugurated -appointment -propaganda -unhappy -glucose -kensington -lid -nij -##iol -##lat -##ulas -manor -clap -poit -lars -expelled -backwards -##ophone -treason -goodman -##hei -realm -authent -##maid -##vester -garrett -nevertheless -vanessa -smithson -completing -vacation -filling -kerr -theodor -increasingly -cecilia -pseudonym -vastra -drowning -segregation -northumbria -renowned -mourn -pact -rovers -teng -##ech -##won -##eratops -##arie -##leigh -tow -enthus -sparta -##ogens -##ppen -##ugawa -amelia -guided -metac -ecological -midwest -designing -verlag -1830s -breakthrough -magnific -contracts -mutant -##untary -touchdown -gateway -grossing -escapes -strictly -isotopes -scooby -murdoch -mosquito -voivodeship -boulevard -episcopal -aa -cough -fidel -goss -linn -rs -##kus -##elong -chil -lean -tricks -compens -adolph -256 -companions -270 -capitalism -petrol -balloon -hopes -staying -jacobs -demands -tumor -homosexuality -lodge -rwanda -falcons -resurrection -##abulary -cout -lays -pint -##front -##ан -andhra -toh -##igraph -pey -grind -boost -##endale -##izers -##clamation -appre -eli -ida -irrit -antagonist -balk -##ijk -objective -crafts -weightlif -hanged -alfredo -explosives -transformed -demonstration -integration -blessed -tattoo -anhalt -ffffbf -cfcfff -kv -sars -##cell -##went -##jant -##°c -##alach -ching -##allgau -plato -spiel -##uben -##qual -brenn -reserved -mint -reliable -consado -defines -olympian -merry -310 -mcn -karate -troph -nobles -sandstone -inhib -difficulties -touched -morton -cumber -eucl -demanded -correspondent -infrared -coyotes -ptolemy -consadole -dign -homin -jing -jinn -nos -sd -zor -##jud -androm -##inge -heidel -suck -hasan -livest -disks -cobalt -reginal -weaver -crest -smallpox -##look -##ippers -tops -peterson -raped -strain -kenyan -reportedly -respective -##walk -pushing -ssr -hazel -wurzburg -fraction -goaltenders -councillor -boulder -faw -pony -sinn -wires -##xley -##43 -##sta -stole -stability -##adia -unification -energ -compuls -##ibu -engel -##ologically -edible -organize -caste -silv -storyline -etat -petit -ferment -racecar -##baum -boarding -hawkins -bombay -leonid -celebrations -##hrer -hellen -ipad -jumbo -jumps -sutton -biochemistry -sedimentary -proceed -sanctuary -hedgehog -siegfried -etymology -bows -cope -cyl -dab -hoss -vad -wester -zah -##rous -##wl -##enk -ismail -##elope -##stick -##etian -orator -nect -worcester -engagement -##clair -##tex -##ugg -minh -willing -creators -142 -stranger -##ropshire -gloss -greenhouse -screens -hoh -galicia -holdings -##hao -modeling -hungry -seminary -meetei -pitjant -1779 -unlock -substant -sufficient -illustrations -rivalry -skilled -wanderers -cynth -gour -tidal -vitt -weld -##fun -##vier -##9f -##aten -##ombo -##chat -##eti -chimp -shout -shropshire -##ikov -##eyn -herod -mai -edmond -baritone -salis -##rico -##brand -protoc -royals -samar -melanie -dracula -distributions -potatoes -frontale -julien -payne -riverside -danielle -poorly -abdel -##criptions -burgess -bromide -cinematographer -rabbits -tajikistan -konstant -bells -bunch -cyn -hue -kro -pena -rih -fright -una -prints -##ansson -parach -famine -backs -muk -macbeth -crossover -protector -combines -boxers -insult -##guard -photographic -##print -archaeologist -reveals -##wski -calculus -occurring -troubles -lantern -striking -kumamoto -dalmatia -bismarck -denominations -pilgrimage -undertaker -abdomen -suriname -hotspur -labyrinth -himalayas -bree -cw -coul -dus -oro -pog -rama -vl -##rics -##power -##90 -thou -##inho -##ioni -isn -alley -##abria -hera -##ppard -hades -acre -greet -##obar -everton -rede -blacks -holt -composing -##brunn -destinations -eighty -reactor -rosem -akh -scripts -1772 -swimmers -cricketers -nitrate -pontiac -awak -atheists -inheritance -dexter -heidelberg -cement -lol -nil -nog -rolf -##nay -##dor -##wagen -stain -##chlor -##urities -##ulous -proton -prat -spices -guin -betray -establishing -endemic -augustine -loth -killers -myr -olympia -godzilla -thirds -3166 -jewell -urine -darker -certainly -exceptions -moments -touching -anglia -vowels -firefight -crosby -qaeda -reginald -pitjantjatjara -vu -##hak -##zu -##omiya -frau -##opf -altar -leis -spice -##ikan -php -coch -guides -lai -quotes -emile -prototype -colorless -gotaland -sami -choosing -drummers -5000 -logar -synt -1768 -reduces -locks -defencemen -teddy -necessarily -reeves -kernel -monetary -turkmenistan -bain -garn -h2 -##pard -##post -##watch -##77 -##ormer -##aret -ono -forwards -alten -whitt -##iao -seym -rox -##ibald -joachim -##inde -amir -noir -##ollah -lateral -regionalliga -27th -goldberg -militant -hooker -watts -frankie -osborne -transplant -murderers -insane -citizendium -##iosis -rajas -cooler -finishes -mentor -consonant -cynthia -512 -cern -dru -euth -haf -maus -nolan -pug -rage -tide -vapor -##puts -##asio -stanton -##adena -##ayana -##ulating -ariel -rooney -bloss -append -guys -##ankton -moths -##inny -terrain -##shore -bushr -politically -resting -knowles -bowie -telenov -defendant -vaccines -weiß -acronym -mcdonnell -fertile -opportunities -eich -sina -##yder -##riz -##oram -carmel -##anson -sche -ammon -travers -modes -epstein -michele -billie -graff -alexis -aziz -perseus -newsletter -promises -commands -felipe -nicola -##utenberg -wheeler -napoleonic -tickets -stretch -implementation -innovation -abbottabad -shrews -juvenile -registration -ceb -jod -nk -rack -ruk -render -vaux -##zman -##rs -##violet -seville -##therland -exiled -afro -aden -indy -eddy -1400 -barker -module -245 -helpful -##heit -popularly -conviction -influenza -textile -predomin -1773 -pulp -cummings -constituent -##iggs -psychiatrist -yourself -loosely -doping -eus -foul -pomer -sark -##nic -##yah -209 -##owitz -atari -##ipa -exo -flown -medina -214 -225 -severely -##chev -macro -deadliest -ghaz -microbi -shootings -49ers -##backs -hannibal -gnu -chopin -guarant -nathaniel -overthrow -dictatorship -buchanan -celsius -emanuel -fy -kou -rin -tome -##oot -##frog -ane -reop -##oved -##antly -clare -grams -##assa -##qua -##icki -workplace -141 -canadians -##sses -eccles -olympique -heading -moreno -freight -cycles -ratified -saddam -axe -approaches -corresponding -leopard -repeatedly -spawn -dorset -staffordshire -coventry -launches -presbyterian -mesopotamia -aq -eust -kats -mersey -pamp -piny -vander -##cence -##itate -##thitta -##otr -alison -alvin -chili -asylum -##osity -##mere -##esta -conferences -sutherland -clint -ware -minsk -builder -guid -##obic -departmental -215 -valign -static -incorpor -sonata -##auri -##plus -handel -##arno -halluc -karab -melting -##luor -macau -baller -panels -citrus -crabs -beatrice -elevated -gilgit -dreamworks -domesticated -expanding -granite -streaming -elderly -auburn -bamberg -mimic -jasper -enlighten -joaquin -smithsonian -kins -mord -tire -##rats -##78 -##eson -##orr -##atio -##atism -##icist -##andi -##pha -##aceut -regina -weaker -highlight -surpr -##unga -magdal -raul -bali -nightmare -troop -chapelle -tablets -accessible -occupy -frequencies -investigate -kinder -transformation -brigadier -mackenzie -lectures -absorb -decorations -excellence -ordained -malayalam -injection -salisbury -awa -bount -baj -gull -##bold -##kl -##icity -##isto -##andar -sheldon -##cki -nex -lethal -prisons -ars -##antan -mez -foundations -rows -##intosh -harlem -##yna -solved -29th -palme -realizes -erl -handling -zepp -letting -panch -365 -##theim -dynamo -cornish -captivity -480 -sabbath -fuels -cinder -bearing -ladder -lamar -confederates -1660 -vinyl -laurel -arrows -ultraviolet -sustainable -commemor -##church -irrigation -kolkata -enlightenment -ao -gaining -haus -huss -lust -tones -vhs -##dh -##dn -##cas -##erl -##inga -##athlon -scouts -carbo -retain -governmental -salad -superstar -##ethe -distur -primetime -considerable -jericho -esk -graduation -committees -quarks -feminine -beside -ingrid -mollusc -pinyin -bump -fus -fang -pou -##70 -##atum -##aline -##stag -shank -une -neander -##site -scann -mein -joao -mona -weiss -smur -attitude -harass -loir -valve -walton -##coe -gettys -deborah -##ijah -franks -##ucci -costumes -wwii -cao -absent -cosmon -embedd -1759 -sculptors -ethnicity -lincolnshire -landscapes -orioles -cosmic -czechoslovak -jakob -ffdf -pixar -prostitution -sesame -intercourse -triumph -seymour -gettysburg -mbe -pis -##zes -##lights -##daughter -##ᅡᆼ -thall -##rem -##thouse -heis -prol -##abh -exter -##awar -nationals -apps -trauma -barney -##obox -malawi -erich -##afi -eleg -tenure -semen -##untu -immigrant -faithful -senses -witchcraft -analyst -drowned -toilet -jockey -euthan -cosmonaut -damp -dump -oph -xp -##uco -##pan -##alist -##icide -stella -##uria -##oden -aren -spit -##uble -cane -suv -##adeh -amus -barrett -operational -irwin -justices -mald -bodybu -technological -similarities -puff -microp -rosario -##guer -strokes -missionaries -theorist -regardless -gothenburg -demonstrations -motorsport -reunited -infinity -uniforms -recreational -niece -remarkable -endorsed -byrne -dundee -covenant -ffdf9f -cf -korn -lv -turing -xu -##hun -##yang -##iard -##got -##kowski -##words -##ouk -##agger -whip -unrel -canoe -bela -consent -interf -##ophage -assam -revived -inca -williamson -paler -devi -climates -nepali -oscars -trinita -throws -mongolian -methane -peggy -satirical -wilmington -brunei -livestock -550 -lys -mh -tyson -##lace -##pow -##wat -coma -##raj -marge -##issions -pooh -##aleigh -irv -raff -snyder -macc -combining -privately -navajo -laboratories -1765 -freddy -gorilla -interactions -lazio -beech -marquis -peasants -introduce -mitsub -mussolini -erosion -surprised -embedded -cv -ffe -ping -zimmer -##lass -##geb -##mart -##enheim -##ameral -wasps -##ivism -chill -##cke -afl -compose -gerhard -##onshire -himal -departure -slopes -simult -simulation -##ungen -arche -patton -##nais -refuses -georgetown -libyan -galactic -racist -orbiting -worshipped -gambling -adviser -flavour -occasional -reyk -ghanaian -subprefectures -turbine -gloucester -nirvana -wavelength -poseidon -airbender -##ppsala -ulysses -##aceutical -aki -coca -tatar -uppsala -##bra -##listed -##cially -##alon -##stru -##ulam -##osition -enorm -enjo -birch -herak -permit -recipe -weird -151 -lawn -elbow -strips -maternal -discs -burlington -clever -huang -galleries -gallagher -healthcare -vocabulary -##erback -caul -jesuit -pathanam -katrina -1755 -dimit -licence -unclear -habits -baptiste -framework -dissolves -stirling -reddish -mormon -andromeda -mitsubishi -pathanamthitta -igu -lille -lena -nast -sich -saus -veto -wigg -yere -##aute -##oja -##ersch -ada -buttons -flick -communion -estates -##iero -apolog -maure -juno -internaz -##ometric -operators -godfather -##psis -rhy -##afe -governorates -putin -athena -networking -derbys -bulb -##rella -paths -1758 -tyne -questioned -patriot -michelangelo -advantages -tokugawa -anarchist -fertility -touches -cultivated -hoffman -apprentice -peppers -cakes -ego -fk -hats -jade -joker -kik -vault -##nor -##tel -##gage -##arna -##ilation -##owe -##iae -##rav -pras -supers -disag -##uala -pert -warmer -dole -brooke -##ups -myc -walters -portions -magistr -operative -luft -curric -currents -burials -enterprises -rupert -athen -punct -territoire -640 -danced -commercially -improving -sabres -##iability -sickness -amendments -sultanate -dramas -transformers -phylum -ponte -volkswagen -unstable -lighthouse -internazionale -derbyshire -fats -fuck -hated -lama -sights -yon -##yama -##zac -##iqu -##los -##fal -tosu -##ilon -##uno -maroon -clone -scler -##acular -bricks -austro -emerson -suborder -harps -angu -busiest -235 -gravel -henrik -urb -hostage -convict -##opsis -micron -chooses -collar -wilde -privacy -pasadena -##holder -daniels -consistent -quantities -shelby -mistakes -hazard -knocked -prophecy -##iciary -pursuit -fascist -emphas -zombie -executives -420 -aman -nail -pg -yoga -##bot -##cur -##leep -##idy -chah -sew -##onga -polk -compare -##awan -footnotes -schon -genev -helmet -auguste -##ophil -midland -donau -pauline -camou -handled -woodward -petition -olaf -shoe -akira -documented -billionaire -pedal -shrub -sioux -gorge -jamestown -1723 -confederacy -laurie -tyrann -feared -constructors -kitty -nerves -meteorological -spaniel -equatorial -##ablanca -yerevan -eta -riz -wir -##hes -##kick -##dc -##pian -##arity -##asim -##oule -##iras -##optery -##iak -##alling -##estar -plast -prok -##mental -exeter -grig -bloc -blades -precious -pools -seasonal -247 -socialists -davidson -torre -lifted -dalton -aspx -corey -doubled -wetlands -disappo -cortex -benton -negotiations -##apuram -nocturnal -##javik -aig -cah -gyeong -ij -kitt -senna -uh -yeast -##dio -##jas -##inians -##olly -##aden -##ianism -whedon -shri -plut -##abo -##abar -marijuana -abused -colts -##ateral -desper -moirang -eligible -##ophyll -##after -ruff -karak -cleop -primates -gunpow -tarant -observe -akbar -promotional -kimber -ernie -treasurer -gibral -autonomy -tetrap -chalk -apostolic -##garten -mclaren -rattles -grandchildren -asturias -carbohyd -reykjavik -gunpowder -ave -ks -wond -xona -yash -##kal -##dar -##vill -##vide -stiff -chassis -##osse -seism -ornam -##akis -##ieur -trend -##oken -permitted -minas -drift -scha -geologist -1300 -barium -interference -glow -hume -confident -henriet -believing -mustafa -disturb -osman -ulster -bolton -04° -illustrative -##worthy -nagano -exhibit -jessie -dubbed -levant -focusing -wiener -johannesburg -eduard -gelderland -memoirs -lafay -posthumously -alejandro -metacritic -euc -hm -mound -sb -tach -##88 -##yers -##oge -##sberg -##cad -##onies -##thest -##ovna -lech -##asta -##ellers -diaries -flank -amin -##olla -elsa -loch -##shaw -##boat -constance -##bris -burst -washing -traders -findings -mcph -detected -limbs -340 -1764 -katy -emotion -gwen -theologians -pamela -yamaguchi -demolished -brilliant -carthage -compromise -blizzard -rihanna -lafayette -hone -kinn -lom -mous -viper -##gins -##rians -##anos -##aris -##edias -stamps -forts -alicia -alonso -chong -##iances -shat -shun -clans -boyle -carc -reson -1714 -communaute -##onson -guill -##link -##unders -archibald -##retto -##heed -critically -gunn -josephine -synag -chaplin -##emption -corporations -1757 -tanaka -sorry -catching -subdivided -compiled -pharmaceutical -laksh -phenomena -hainaut -mauritius -thurgau -protocols -cbc -kost -kep -oj -oils -pinned -yaz -##hyn -##wolf -##mod -##oring -togo -##olit -fron -forrest -beir -sepsis -##oding -prose -arid -##ocial -compression -coinc -##leville -poe -themed -elastic -australians -loaded -actresses -auction -marking -sunk -horr -bayreuth -##ascus -huxley -naruto -carlisle -generator -randall -1740 -attractive -hypn -ignored -nineteen -unsuccessfully -jewelry -lagoon -assessment -uterus -equilibr -camoufl -bau -bose -cact -hire -jiang -kud -##ng -##web -##jana -thess -comets -conj -##illera -##endish -mayo -##indy -outcome -elaine -backed -##inkle -intens -composite -haley -verdi -camille -socialite -horace -nonstop -wilder -candle -warships -scratch -nicknames -addressed -calculations -chromium -mistaken -acceleration -pilgrims -crushed -phosphorus -gloucestershire -dismissed -savoie -710 -fia -liam -paz -wand -yen -##tics -##erjee -##orca -##enay -anonymous -##eles -##imeter -comun -##sev -manfred -sumer -claw -carver -##erna -warned -cobb -accus -##berty -natives -transist -downhill -academie -copp -planck -wilts -furious -dynam -nuclei -princely -deeply -essen -ayat -encyclopædia -cordillera -cousins -bubbles -mentions -jubilee -pursue -advertisements -neighbors -jedi -alessandro -configur -syntax -amusement -720 -eman -fanny -gaut -kall -kip -lager -lengths -mok -pv -pah -vascular -##oin -##fax -##ск -##anus -##leine -whal -##iege -##orted -##ikal -norr -adoles -resorts -intention -solids -superc -supervis -visitor -1820s -condens -saxons -legislatures -weakness -linguistic -pioneers -1492 -keyboardist -marxist -skeletons -digestive -eruptions -isotope -affiliated -shogunate -kofu -antiquity -ursula -nielsen -thesis -gewog -unemployment -baxter -cary -dahl -giles -henson -jain -kc -pins -woo -xl -yor -##iast -##41 -##qi -##oue -stig -stake -##aghan -##race -enab -##ebrate -newest -allmusic -mina -##iversity -genetically -euros -accum -magician -verbs -francaise -explodes -explicit -hawth -agreements -1771 -generic -axel -cavendish -ancestral -pigment -##metics -neurons -shining -treasures -steinburg -unfair -siberian -##girl -bissau -cunningham -gibraltar -510 -lapt -seng -tant -wig -##zai -##making -##ᅡᆫ -##esy -##omac -##elas -reck -sten -##etten -chh -plenty -conan -peps -##achal -serbs -slender -##shi -salts -avant -##otherapy -filmfare -##heads -##hampton -reformer -washed -freezing -benoit -damascus -puberty -requested -bombed -##iyama -cables -generate -bahia -##chenko -1754 -dvds -##ulsion -updates -monasteries -##ilisi -crocodiles -allegedly -aviator -ultram -antigua -acknowled -spielberg -##video -wiltshire -cid -gd -hl -kass -kris -raleigh -trit -ufc -yev -##75 -##aram -ante -##rown -##otrop -shed -##osto -complement -##ensky -##eco -evalu -depot -fein -traps -1100 -subd -instinct -consens -remind -supplement -extending -granddaughter -tribun -risks -suprem -fluids -doris -spectrosc -reflected -reinforce -bohem -nouns -myself -groningen -lausanne -rajasthan -beirut -cfb -gt -hound -yield -##ska -##cam -##cover -##79 -anand -##stit -stun -ascle -shade -sealed -leuc -mariana -marguer -trim -goats -moody -juniors -quoted -metz -##obal -##obact -modest -harb -consolid -skat -skaters -realize -harding -judiciary -stadt -kras -bronch -affecting -cruiser -fulk -steelers -quarterf -installation -prelimin -float -dyke -stratford -consumers -shifted -carnivorous -mahmud -condemned -jinnah -aneury -bounty -rattlesn -b2 -boll -ei -gug -hau -iro -rov -rans -rover -sint -tilt -##lene -##wo -hep -##tera -##rach -neh -neum -arbit -polling -meals -herd -##ansk -disqual -monty -coats -caled -specified -eurasia -elijah -landes -##ylus -keen -burundi -seaf -occult -entities -robbie -ballroom -projection -disneyland -casa -slee -seventy -bottles -gorbach -investigated -sylvester -gonna -rhein -sturm -deportivo -supplied -##ophageal -bai -ginger -gutenberg -sot -sart -##zed -##sar -##vinsky -inac -##ouard -##iona -toch -rejo -##idis -shake -##ishing -atp -##odles -##abil -##ichord -teatro -marm -##izo -twain -1730 -evelyn -stripe -divides -salary -sumo -31st -cease -boycott -semitic -##utsu -messier -wikimedia -dealer -1760 -1769 -omiya -nomadic -jointly -aluminum -responded -celestial -tbilisi -ammonia -karabakh -dm -gom -kes -rms -tain -zag -##uctor -##anium -enlar -aria -##usted -##erno -aggl -##ileen -livingston -depict -traces -longtime -##riot -archdio -avignon -midfield -##aneously -intra -hyogo -vanilla -reactive -dominic -inventions -660 -kohl -spray -naomi -linden -1603 -maximilian -massacres -thirteenth -mendel -kidnapping -javier -complain -brahms -hispani -intersection -zarago -adoption -nieder -haunted -occidental -thallium -palermo -lans -null -rott -rca -siam -tern -waff -yuc -##bred -##lop -##park -##journ -203 -stitch -wass -##addin -mecca -seren -nora -overcome -formations -els -retre -bandico -olympus -entries -garrison -##ochond -430 -shrimp -threatening -recipes -adaptive -montevideo -##hofen -algebraic -guerrilla -venomous -kaiser -wrapped -kollam -tiffany -nightclub -520 -cuck -dye -eury -mw -taur -##rada -##ße -##anim -##ischer -##iste -debris -compress -scal -scro -norris -##adec -warrant -madeleine -amal -elector -albion -careers -retained -##shot -animators -##isei -##coon -decomp -confuc -poster -petty -sharif -ballads -##bao -completion -concertos -preventing -matthews -fishermen -gabrielle -merchants -glaciers -withdraw -inaugural -psychiatric -metallic -helmut -pearson -aesth -dewey -annexed -metabolism -baltistan -torpedo -##luoride -zaragoza -411 -ett -hides -ike -icc -lakers -rai -##will -##на -thang -##itated -##itative -##ilies -frie -unde -byrd -trich -phel -flats -##ignac -syrup -##ysz -barre -##areth -loans -221 -##heart -frey -cambrian -exploded -cleared -torch -institutes -eighteenth -rainy -spokes -baha -catalogue -continuously -dramatist -suzanne -hammers -ampl -attendance -iodide -gavin -andersen -burrows -hulk -cottage -nectar -cinderella -cleopatra -archdiocese -caffe -cindy -gaza -hiber -tyn -##iom -##sium -##rier -##may -##cheng -##ritis -suce -clair -colt -##ernity -britney -arturo -rowing -bari -barons -infobox -slogan -busan -croix -takeshi -sanam -mermaid -campuses -advances -tempo -antis -khor -murcia -##uela -replaces -persist -translators -stravinsky -mahab -printer -cubic -injuring -##atiya -atlantis -##foret -picks -calendars -reigned -fulton -investigations -precision -pulling -##iferous -ichthy -challenger -grenada -crustaceans -multiplication -kronos -intervention -##jevo -impeachment -machinery -enormous -disagree -glee -oth -tm -vectors -##alk -##omial -##entina -heian -alarm -##igious -ung -##umoto -playboy -manson -tristan -##aci -phases -acclaimed -gotha -minus -syed -##indi -socks -bett -safer -steele -212 -254 -decis -duel -downloaded -extrat -sonn -palat -catches -debates -visa -macos -processed -climbed -charted -breakdown -figur -orchid -abbot -akron -accepts -reveal -seller -oblig -sorted -##rodite -##otypes -russo -horton -emigrated -warnings -iberian -evergreen -cumberland -extratropical -aero -bp -corm -dach -mister -##zzy -##prising -##ер -ingen -##amins -stats -##usz -asleep -##avers -joints -scho -northampton -guiana -1621 -octopus -steady -##ossa -royale -finalist -##otechn -failing -woodlands -323 -halfway -mountaineer -##mingway -rosal -liefering -graduates -exploring -mutiny -gangster -yamamoto -zhong -targeted -bloggers -##arthur -wizards -dorsal -borrowed -aleksandr -encounter -euler -adjective -semifinals -herakles -bony -dand -dok -hahn -kaf -ler -sage -tung -ure -##kaya -##mie -##arg -##lea -##idium -chatham -##agara -comuni -sey -##ongs -1713 -recycl -guilla -saud -edison -romney -sket -muh -##atorial -professors -grandparents -ceramic -##umba -authorized -bullying -cornelius -daim -hydr -investors -rotating -siem -4000 -industrialist -lockheed -preparing -defenceman -approached -gambia -##rophic -shocked -loyalty -kreis -miyamoto -claudio -conventional -sustained -pyramids -rainforests -binding -adriatic -robbins -andalusia -invertebrates -zeppelin -equilibrium -bok -hib -kahn -lour -mj -mens -siv -tends -tires -uph -woj -yel -##hock -##hoven -##uen -##pu -##itism -##amar -spends -##acey -deposed -albat -moose -lawson -##lined -harald -angus -magma -##aldo -dynamics -bordering -chairperson -audrey -mirrors -captures -rodgers -heroin -promotes -##zyg -trumpeter -andrei -dolj -bridget -vulcan -##bridg -kemp -rasp -allegations -microscope -warwickshire -galileo -uncertain -navarre -fist -kuala -lange -sall -uci -vod -##botten -##anity -##ayat -shing -neon -##aky -prud -teens -##very -coloss -##quist -paren -coined -161 -intercept -skyl -everest -##ophobia -deciding -salon -raids -tito -histories -burger -activism -cambridg -erd -handbook -equals -clem -325 -##uca -cartridge -furthermore -minorities -responses -resigns -ryu -catchment -converts -sticky -hendrix -chandra -fontaine -claudia -epidemic -derivative -boogie -dozens -artifacts -photosynthesis -preliminary -cambridgeshire -dorn -geb -kis -pich -##non -##fold -##ented -##olles -##iris -##advant -forens -aly -abig -mep -norma -prejud -charlem -##vez -skane -skinner -numbering -vald -##onyms -##zej -conflu -grands -antique -finalists -##bez -oxides -huff -##ihan -chairs -interviewed -##rosse -dealt -mechanisms -excited -polls -warmest -tombs -talented -frisian -vaughan -thunderstorms -ryukyu -thieves -brendan -eucalypt -marguerite -charlemagne -cds -dice -fok -jae -lenses -nod -rss -roster -samb -##65 -##dong -##combe -##anov -##ersion -hemingway -##emin -##utors -##rae -##mani -abram -notorious -##ibar -meade -adler -##class -disadvant -brag -##igned -guern -meditation -belmont -showa -interling -christchurch -##ometry -238 -censorship -morales -grapes -balkan -competitor -snap -ivy -otter -polyph -locke -shaun -1737 -virtue -geographer -caucus -inflation -reefs -determination -affiliate -cascade -bianca -hydraul -thermodynamics -bolshev -maldives -aneurysm -2d -bans -bae -eck -kyl -kwan -uc -yoshi -##bet -##giving -##cats -##pop -##icy -##opathy -heil -aladdin -##imus -##emar -##emaker -ares -##icher -sheila -carcin -joanna -blanch -periph -doyle -feeds -thrust -impressed -##aius -ska -253 -finest -voluntary -313 -315 -##beat -criter -etrus -frankenstein -administrators -ethical -##theus -370 -ambush -scrip -hiroshi -organizing -1766 -grasses -terrier -interpreted -harmonica -sarajevo -espana -fowler -devotion -dromae -magnificent -trophies -suceava -abigail -ditch -jokes -kiy -pank -pemb -ritch -u2 -wahl -yves -##held -##nav -##lund -##dog -##erre -##itat -forsy -##ianus -shelf -leiden -ensl -##eburg -##ikk -carly -indre -##irez -estuary -edouard -impair -romero -##aira -deform -statute -luci -##araja -sumatra -antilles -erie -veins -squash -screw -behave -bangal -bernhard -ulrich -paso -380 -billionaires -acidic -owls -toni -gifu -fragment -gazette -grunge -caracas -buckinghamshire -corinthians -ejac -synchron -gorbachev -511 -bum -dung -kuk -nails -tigr -vitor -##89 -##hey -##nard -##pad -##iten -##reck -reza -deux -chatt -##emont -unite -nevis -nebul -##ifers -##antz -exams -clips -adel -allerg -blown -coel -deploy -alban -profits -213 -mukh -imphal -winnie -blackburn -##ippe -berries -menstru -palau -greenwood -sniper -coasts -faction -olm -mariah -caspian -introducing -lovely -expressions -memorials -containers -dolly -1724 -printers -squadron -perfectly -anatom -imagination -prelude -anchors -bergman -tokushima -stretched -emilio -proportion -induced -mauritania -bolivar -mushroom -hutchinson -congregation -riaa -tejano -inflammation -stupid -guillaume -bike -fff -hott -kans -laps -leroy -nand -vans -wad -##bol -##uce -##pit -##oros -##aring -##aluation -##omeration -friction -alert -chis -chitt -##akar -##ichte -##oges -trends -scra -##eyron -##quil -perl -regen -britten -eurasian -exponent -##letic -222 -patty -specials -ceramics -fireworks -destructive -macle -paints -##colo -ull -##oulos -probable -ramirez -criticised -therap -mutation -1748 -1743 -exhibited -jenner -grasslands -nichols -vertebra -ventforet -zoology -##birth -mckin -ribbon -fungus -accuracy -fayette -ipswich -cain -fiat -rue -renn -wadi -wacker -yad -yates -##zia -##gger -##sf -##cies -##ст -##isp -207 -stairs -##igue -leima -##esses -##ogly -sued -##angi -##acia -mei -admission -twisted -##ologue -overij -crete -munch -undergo -jeop -211 -avengers -##ombeh -palaces -angelica -franconian -##pora -ruiz -mustard -survives -vener -opium -ambros -##married -taller -eliza -armenians -acceptance -kazan -ellipt -sellers -zhou -tolombeh -mitochond -tango -terminus -claudius -beaumont -thyroid -uncert -hectares -relevant -tohoku -wage -xena -##nau -##fires -##puff -##vall -theological -##arius -inequ -##amus -##sty -staged -##chool -##rots -##illation -enrico -peel -peab -exod -suarez -troll -clot -diagon -roe -perennial -monsoon -rely -1066 -appalach -weaving -guiller -invaders -demig -anyway -breton -pais -duct -olympiad -transc -goldman -vijay -tense -##osha -papa -##flow -bulk -##ihad -375 -payments -##2012 -reno -dominica -mccall -lindsey -alternating -baking -nichol -huntington -vasco -cortez -superheroes -##patrick -rigid -yukon -reverend -scandinavia -##worms -agglomeration -711 -a2 -bov -bia -gund -lh -pern -rory -rae -sos -##nius -##jong -##ен -tha -##onium -##aras -##isen -##esters -sufi -triv -trout -gras -musket -adobe -##oupe -creole -bests -corre -incom -bias -imported -transf -meridian -314 -realistic -badge -urinary -samson -##ariat -haifa -declare -renee -cosmetics -wingers -celine -1749 -numerals -soda -pitched -acquisition -kitts -reichstag -conqueror -yahoo -selenium -seizures -ningthou -enrolled -dordogne -transparent -alligator -nexus -asclep -peabody -bair -dros -fag -nib -pinn -rie -sorts -zw -##69 -##cot -##cery -##cci -##von -##inational -##arro -##oum -stair -##opia -alike -chey -shet -##ansky -thereafter -##isson -crude -##urya -munster -bravo -romano -operatic -arriving -malone -titus -campeon -burbank -astro -snack -benito -damaging -ferm -cellul -darren -mirza -magnets -##rosis -koz -dropping -singaporean -complexity -hartley -sheridan -aeros -pigeon -syndicated -pentagon -##kirch -msn -##nikov -consulting -simplest -alvarez -neighbours -rappers -vezina -savannah -consensus -bandicoot -overijssel -baja -dillon -ged -jog -pisa -rosh -uber -##hole -##yz -##vili -##mant -##oura -##etus -##opolis -hears -##imat -unve -##este -##odle -prodig -canvas -clad -abn -dieter -polyp -##endon -##iker -##outi -##izen -engage -brie -171 -1715 -schul -esta -willow -terence -quito -##obo -westward -surprising -corrid -contrad -buren -silly -santana -tracking -sands -groove -semin -reviewed -qualities -dominion -goalscor -quarry -banjo -sedan -metroid -mariner -byzant -sapph -apostle -ascension -fetus -tolerance -kilograms -immortal -caldwell -hispaniola -albatross -bion -gs -kato -lilly -nell -ngo -pett -puck -sip -tae -yer -##how -##hika -##kien -##das -##vard -##esth -##orp -##oug -##ingh -styr -degen -chia -whist -leaven -peac -spill -sheppard -commuter -alla -1711 -nationalists -munchen -##annes -submitted -surpass -default -218 -##ophus -massif -royce -ruined -checks -lowered -travelers -maharaj -taxon -domen -icons -elevator -linking -pleist -surroundings -mp3 -1745 -pushes -nakamura -geneticist -tortured -regulate -clifton -collaborated -ubuntu -brewster -sinking -aurel -leisure -molluscs -aort -e3 -kard -niet -rath -vos -##buch -##game -##itious -toby -##staff -waseda -##owl -##allo -##umo -sponge -##udo -##sohn -##ellan -scary -##ibles -carinth -disrupt -1720 -lara -rao -wrist -312 -##naissance -humanist -koreans -starship -damon -shoulders -mosaic -brotherhood -avery -islamabad -babergh -captive -##como -prediction -foreigners -nazism -celebrating -ruben -hayden -bananas -1741 -oaks -brightest -strengthen -alec -controversies -certification -sulaw -llc -maintaining -rumors -tipton -geometric -gentleman -aubin -gwyn -merchand -marginal -comfortable -nhc -advertisement -beginnings -landslide -aik -gok -kame -##64 -##jah -##mot -##arf -208 -stub -##otine -##oway -deh -comeback -unmarried -artery -##arden -##achen -hermes -##ruz -upside -weights -marching -##obu -slot -loz -batista -solvent -##hea -100th -##ushan -argues -##arnia -##yrus -madras -detach -robson -##yeh -okayama -viewing -combinations -bernstein -transported -venge -commissioners -promotions -metropolis -alcoholism -wonderland -cadmium -kangle -gastro -humidity -stabbed -styled -emphasis -bolivian -schumann -cinematic -descriptions -tekken -yucatan -pleistocene -sulawesi -bly -mow -mott -naur -nantes -rd -ropes -##names -##witz -##ان -##alyp -##stage -##chuk -##ayy -ala -chich -##emed -seych -lept -arbor -sper -trol -colbert -scare -##anski -broom -##gram -strata -considers -217 -magnes -epile -epoch -nowhere -vars -presenting -reflex -balanced -snout -horst -sentai -watan -photon -considering -saras -insulin -opus -essentially -shirts -lavigne -respiration -kinetic -comparative -chronology -carpet -essayists -gospels -coordinate -vilnius -##oplasm -lugano -bacterium -proportional -corbyn -quincy -centimeters -genealogy -semiconductor -euthanasia -skateboard -confluence -jeopardy -uncertainty -bison -maver -pence -pott -rko -sj -xxx -##hm -##gars -##para -##onal -##atri -##irs -hemp -sear -conjun -##abilities -excer -##avez -grip -##ansen -bla -offshore -basement -##aille -greco -johansson -elk -subc -stepped -redemption -##ometers -##ynski -colorful -sounded -pressed -hamid -pointing -prevents -esophageal -psychoan -##islava -mccain -emerging -controllers -sinclair -mi² -buddhists -rudd -1729 -1689 -fears -symphonies -hydrocar -vitamins -megal -breasts -barnum -smartphones -reconnaissance -rebellions -tackle -skirt -triangular -evacuated -tellurium -lumpur -consecr -cebu -bolshevik -ct -fide -kj -keb -keller -nurn -pesh -wanna -zed -##croft -##wala -##ativity -##lel -reims -##oped -##oplan -shand -##odz -tres -##ocar -##acus -##ptic -##erney -goa -evaluation -trait -##ographical -elab -landform -244 -unionist -actively -explaining -competes -ceph -ceased -multimedia -40th -garry -curtain -anthems -venetian -capturing -linz -iconic -bruck -spreads -diplomacy -graphical -##ranos -solidarity -precise -salvia -gibbs -anatolia -pavel -reflection -parasitic -minimal -##childnames -strategies -resembles -stallone -vomiting -fierce -compatible -a4 -buster -cey -faz -jared -kite -mito -zab -##fol -##wari -##jam -##omatic -##thel -##chl -##anders -unrest -##gean -trash -polly -repos -bluff -dray -gregg -swamps -beloved -mata -matsu -angular -natasha -protested -mered -courtney -ceiling -lesson -screenplays -browne -cellist -mahler -welch -cookie -diploma -lorenz -1762 -1751 -kuz -nikki -dickinson -fitted -ventura -zoos -gustaf -nasal -muddy -chopra -gcmg -aggression -unpop -induction -embryo -metaphor -crayola -marsupials -antioch -firefighters -ritchie -##alypse -dental -fals -jaff -kits -kaj -lined -lamps -sse -tender -ump -wanda -##xton -##inished -##arb -##alent -##omen -##olta -##adish -##oti -choral -##agana -shades -##rack -plankton -tec -aditya -securities -airplay -brains -eastwood -epist -avril -finite -berber -palma -millionaire -labs -erwin -madness -denise -wilbur -persians -coldplay -##uchs -commanding -innoc -hanja -molina -bagh -1685 -virtually -clarkson -lanes -kidneys -supposedly -demonstrated -ritter -ardija -rodrigo -valentin -varying -quartz -pistols -thanksgiving -dalmatian -pfaff -kendall -reversed -hitchcock -caterpillar -wonders -kepler -quarterfinals -tochigi -phelps -hicks -mla -tiles -##85 -##games -##upp -##sley -##kim -##kill -##wah -##arak -##enton -##olus -##adio -ito -deuter -asex -conde -leb -lear -trams -boise -brant -##ahua -guan -universidad -sai -##ismo -remade -303 -brewer -243 -duc -biod -transcription -menu -hardly -##garh -southernmost -tomato -coasters -protectorate -libretto -haem -cannons -digging -##comed -999 -777 -540 -splits -oxfordshire -manufacture -splitting -saharan -shelters -watershed -svens -scarlet -behaviors -feedback -##street -sponsorship -instructor -pupil -lacrosse -cameroonian -##garden -rectangular -unnamed -irvine -excerpt -820 -cau -pino -##busters -##tas -##grove -##whe -##천 -reiss -reilly -fram -##unya -##iaceae -byte -##odian -spat -spac -##ellen -##ococ -boiled -compton -##endron -blanco -drif -16v -traum -personally -stead -##ophile -232 -saldiv -transcript -##bride -275 -openings -rhet -jackets -connie -arguing -spacex -cirsium -convince -curie -potomac -##bonne -baum -kramer -facial -##uxe -welcomed -taft -odessa -deepest -630 -##zzle -bags -rhinoc -sensation -sensory -ligature -reflects -vinci -disciplines -psychiatry -batsman -unfinished -oliveira -resemble -atheist -docklands -dessert -nottinghamshire -meadows -caliphate -maneu -sclerosis -nicholson -saldivar -412 -bains -gama -lening -m1 -pus -ponds -vaster -vida -yuki -##hay -##houn -##ор -##ᅵᆫ -##inomiya -##algo -##icides -##omir -frames -fraz -##ulo -arj -##ocy -colombo -##acao -muscular -##activity -popeye -buena -preacher -charm -desmond -outdo -smugg -saunders -distress -provider -internationale -##ylene -textil -bertha -tempt -civilizations -giro -khmer -athel -punt -torah -multinational -holders -mosques -carlton -recognizes -lineback -bonif -mastering -bowler -1746 -1609 -tripoli -ipc -conditioning -dhabi -cooperative -biographies -kobayashi -coding -nurses -explanations -commodore -template -schumacher -peanut -proceedings -midfielders -incomple -712 -bere -bint -fid -ie -ries -wain -##nl -##yk -##kat -##jid -##inium -##olese -##etia -alchem -chok -sek -spouse -##ellular -abyss -##eys -monop -##ologic -desired -1640 -albrecht -ape -marche -elton -barely -infants -recorder -slug -##unde -humbold -solst -radhika -mercer -lineage -jacket -wrestled -equestrian -museo -url -galap -nobi -audi -symbolic -cabr -blockade -##canal -accessories -jurgen -1767 -unlik -suffers -liberalism -dissolution -cavity -contribute -exhibits -installment -clockwise -anthropologists -liszt -marxism -aviators -guadalcanal -alderman -jaguar -konstantin -ballerina -seafood -asclepias -meredith -aix -dare -gina -js -ksh -neder -pile -ried -sided -tides -taek -uv -vuel -##bled -##ythe -##rush -##pal -##93 -##erick -##erton -thee -##urgy -itanium -chihu -asi -asch -##agong -##osuke -##illin -exits -colder -boz -knives -repet -popp -prequel -##alsa -airliner -capita -rowan -sura -interpre -busch -inverse -honore -martini -sato -mcgu -karim -macon -olsen -lowe -##asca -hui -timb -travelcard -darfur -broncos -ashton -ramsey -therese -bhatt -suffix -optics -patriotic -linguists -comparing -hyperb -igne -tapes -descendant -##keeping -barbera -evacuation -dansband -prestigious -jellyfish -sgt -substantial -bangalore -mckinley -leningrad -911 -cinn -ery -eind -hach -kau -nw -rana -##yi -##kell -##press -##esville -##arc -##atre -##iton -##urd -##robe -##usions -altered -seap -##ifiers -##iche -trun -clues -grays -##iked -blas -acclaim -prescott -##ugi -mono -evap -bucc -ella -submit -invading -romanized -magenta -luisa -blackpool -duff -louie -##loads -deluxe -montre -technically -readings -plantations -viewer -selective -texture -stephens -shotgun -pandora -occupies -727 -waist -abdicated -mortimer -commented -behavioral -occurrence -migrated -##asmus -absolut -bentley -prosecutor -disagreed -nudity -clergyman -brug -dn -hing -lact -mates -nana -vint -zay -zod -##kawa -##mass -##anj -##isaurus -##icion -##ento -chords -chavez -shame -leah -##oreg -scan -agust -brenda -blink -upright -janata -overhead -helene -rower -mainz -longitude -slaughter -legitimate -romana -builders -transact -colleagues -freeze -##oyama -fielding -crimea -garde -colonization -psyched -darius -differs -barbuda -carrion -haydn -exciting -nagorno -rudy -discussions -hidalgo -rhythmic -tonga -clermont -spaceship -hoffmann -denomination -rehears -pharaohs -nihon -aegean -ehime -revelation -logarithm -mausole -guidance -sichuan -unpopular -unlikely -bourn -dit -houghton -jy -kosh -kauf -lumber -mould -oed -sine -##het -##rimin -##mic -##eran -##enthal -##aty -##amu -##ersen -bye -canals -scala -alloy -butch -flaming -outfit -lauder -endow -societ -backl -##undai -valois -##inka -246 -249 -paterson -##owska -castillo -290 -realism -khuman -verona -catast -handheld -cheet -borg -illusion -veneto -taxation -crying -charlton -##viol -reigns -1756 -##ranes -djib -yamato -tonnes -columnists -counterpart -migraine -barnard -coordination -ardern -lazy -rebuild -advocacy -dixie -rashid -hilary -tavern -prosperity -madhya -tiberius -umbrella -bowser -camouflage -buss -boca -hj -jok -kicks -mere -sly -sake -ting -yum -##hoff -##blad -##long -##dings -##rio -##pet -##ар -thier -##atile -chet -chiang -univ -uninh -seized -arun -##ifs -##ichiro -boomer -knot -regensburg -sob -strath -sledge -skins -251 -humming -lucius -revision -irc -arches -transylvania -farthest -refuse -hyundai -martins -coptic -circa -##otho -karel -doubs -believers -limb -testicles -premium -garbage -panth -lowland -paperback -pencil -victorious -burnett -darm -destroyer -brandy -fairfield -ringo -460 -mythical -sevilla -cosby -otis -definite -grasshop -scenic -jaime -hobby -miniature -##american -complained -harmonic -mascul -holloway -jpg -ciud -monitoring -synthesizer -bantu -muppet -incredible -friesland -sinai -exodus -apt -bale -daph -umar -vh -vita -##dl -##ος -##inated -##arov -inmates -##reich -##sticks -frs -chances -asym -##emouth -##ulmonary -##osia -spores -shepard -grub -##endent -megan -##awat -aguil -blond -minnie -amo -amid -amos -applying -geisha -subcom -##ophila -biosphere -virgil -reporters -olig -lowell -crimean -##gli -fairb -tallinn -policeman -1761 -monarchies -gardening -landschaft -brightness -yaman -vegetarian -1736 -dickens -calculation -awake -kicked -unusually -cristobal -mysteries -amphibia -beauv -browsers -prohibition -introduces -moisture -sectors -gypsy -mahabhar -sketches -watanabe -humboldt -eindhoven -bites -c1 -hatt -jov -jem -jard -kale -mls -naugh -rye -rios -##electric -##name -##lach -##ии -##anese -##isy -##itra -tosc -##oton -delet -##agin -##avis -comprises -##endi -indoch -##ruk -1718 -baseman -upgrade -amour -preference -secured -edna -lao -armored -housew -villars -litoria -227 -richie -refr -cardiop -##ijo -browning -curious -babe -dogg -bangla -paste -akita -exceptional -aligned -mccorm -mutations -cooled -nominal -cartoonists -##tiary -1688 -trustees -##hawk -journeys -marathi -redskins -epidem -armoured -kannada -nawab -trombone -persecution -weasel -andrzej -##film -diaspora -reopened -unrelated -maureen -harpsichord -raspberry -textiles -cheetah -ayr -c2 -nal -tess -umber -##01 -##mach -##icates -##ream -##olm -stero -fresco -##idia -##opithec -alias -chicks -platt -ariege -spess -exotic -##clan -blunt -blonde -guido -universite -novak -traced -medley -tertiary -##aean -decree -patches -farn -historia -##eshoe -burgh -verbal -explode -32nd -memes -bluegrass -reactors -auditor -carla -automot -winged -hatred -sidd -crimson -1725 -1683 -scorers -employer -##ogenic -regulated -pamph -ingredient -submarines -federico -daytona -desserts -brewery -enclosed -rousse -overturn -dmitry -archduke -takahashi -dissipated -arteries -tribunal -vitoria -guillermo -solstice -igneous -cardiopulmonary -ease -fais -gail -ia -kib -lm -pelle -sver -tatsu -##gos -##rays -##cans -##moor -##71 -##inette -##oran -##arra -##ags -##ithe -##ogram -##ellier -tragic -##ocytes -grin -boer -##ibes -mek -brest -gerais -cale -traction -quill -milo -slate -defects -locals -batting -sala -camping -donnell -donovan -gunther -approve -liberals -affir -ashik -##hoe -kalmar -paddy -bathroom -employers -1644 -##emiah -chancellors -switching -requirement -mateo -subscrib -hutton -deposit -barrels -cinematography -timberlake -ezra -penalties -bratislava -fresno -mclean -varsity -incomplete -galapagos -feng -fiona -g8 -jma -lur -lash -rishi -yur -##61 -##nick -##gles -##gara -##fan -##wulf -##alke -##statt -shaf -##itha -##arted -prisc -spont -spurs -marjor -sukh -bres -brighter -flamm -##irel -willard -weil -1623 -##grad -marched -infamous -deceased -##etsu -valpara -##ccio -litre -london2012 -intensive -##lof -titular -mening -daven -markings -##psy -lightweight -honest -handic -debian -debbie -socially -visions -equity -##othe -watkins -##ajima -hospice -collide -translate -##zoa -##verse -surrounds -splash -fatty -categor -packages -celtics -picasso -duration -bounded -mesozoic -illegally -##skaya -concentr -zheng -zoologist -coco -poisoned -maintains -reiner -##asmine -sponsors -accurately -gaston -pistons -cayman -stabil -dionys -schubert -nakhch -tectonic -iqbal -mourning -scrooge -caffeine -pembro -carinthia -outdoors -valparaiso -020 -gf -nug -ramb -##haven -##houses -##bas -##tc -##try -##uber -##ᅥᆼ -##ın -##ᅩᆼ -thalia -theo -##atos -##isans -stellar -##ayo -##akawa -spine -roan -caret -##asser -parv -maynard -relate -schwart -genital -northam -switches -moe -maize -underwent -killings -glarus -valerie -307 -corros -luca -irina -blacksmith -patr -patents -merely -palazzo -famously -astros -judah -hohen -##udeau -sharia -wilkes -34th -translates -kidd -mandate -mandatory -sinatra -glasses -herschel -dietrich -aspir -1732 -levine -databases -thickness -cadet -remixes -answered -macedon -portrayal -glacial -disciples -gottfried -implemented -puzzles -miscar -corsica -sachs -syllable -posthumous -impressive -parsons -cigarettes -dmitri -dengue -frauen -himalayan -carbohydrates -mausoleum -refriger -automotive -arose -cess -coss -cove -eil -hairs -lulu -lrt -mest -mendo -oste -woll -zulu -##hir -##hai -##bay -##jer -tott -stade -##adur -##aday -##opoulos -chloe -shib -ork -neoc -arv -##icht -diarr -roast -##ibur -borneo -foley -presum -relocated -albu -##enny -starch -octave -surreal -matte -gladi -##aev -litter -auf -##minton -northernmost -successes -horus -##ijn -chew -macarthur -gunter -condorc -requiem -housed -1744 -rossi -miser -kamakura -alphabetical -ravi -sulz -sauber -paired -mariners -mallorca -discovering -ogden -##gregor -kristen -evangelist -bulletin -antoinette -stimulus -##umberland -sacked -cobra -strawberry -lepid -reelection -compensation -simultaneously -synagogue -hydraulic -biodiversity -condorcet -diac -maw -pesc -rift -sammy -tash -##zza -##law -##mother -##53 -##54 -##alm -##omys -##elis -##avar -##akin -pee -trivia -newcom -##acha -##duct -blum -1710 -regier -depress -songwriting -australasia -241 -seneca -226 -constable -231 -paternal -merrill -topology -copd -benf -alexei -hostile -panj -microw -requests -closure -encount -documentaries -##sexual -sadie -1680 -salvatore -pinoc -pulls -salesman -revolver -carpent -##fsr -bilbao -coden -stefani -yankee -advocates -reyes -metallica -fluorine -dormouse -hiking -impressionist -excluded -muppets -remnants -huston -balkans -csi -dian -dots -eileen -fina -gud -iee -nate -nelly -pills -rer -sæ -uch -yomi -zac -£1 -##87 -##range -##parts -##vale -##inius -##aray -##stetten -reut -stard -bele -##igu -##isho -##estra -##akan -peers -pry -spines -worry -##uban -##ibus -##ansa -repairs -prescription -##weiler -regis -amrit -cradle -romagna -exports -subgen -banda -valence -301 -saline -##brough -malaga -attacker -davy -mora -stiller -##meister -badminton -woodstock -torino -climber -##gaard -ambrose -liner -tuna -barbie -flagship -guardians -packed -sundays -coolest -djs -succeeding -attracts -knighted -cassette -ronaldo -1676 -scriptures -instrumentation -weighing -gotham -fibres -cockato -amphibian -crowded -sixteenth -matsumoto -cultivation -marianne -inferior -exploitation -ecosystems -scandinavian -peanuts -cholera -deliberately -rosemary -magistrate -configuration -ultraman -hepburn -peripheral -campeonato -seychelles -uninhabited -ciudad -encounters -aul -fx -fak -nub -pors -rend -zeb -##67 -##aedia -##bull -##oids -##fell -##ол -thoibi -anus -##oland -ston -##eters -seated -##aper -enact -pri -##orte -countdown -firms -footh -knut -1717 -bask -schro -swear -gubern -medusa -providers -##inker -corv -luz -comedienne -aveyron -keel -delph -hyun -africans -fallout -nonviol -arachn -panor -violations -scotts -requiring -principe -investor -sawyer -heroic -picking -selfish -sabah -namur -siena -kalam -hollyhock -soloist -suspects -mercen -hindenburg -1605 -konrad -spectacular -cassini -striped -1675 -dresses -echid -sopranos -laguna -decorative -mentally -riviere -fifteenth -instrumentalist -perpetr -ankara -fugitive -1700s -triangles -fahren -kagoshima -extensively -giselle -nilsson -curriculum -karakoram -magnesium -spessart -davenport -pinocchio -gide -hov -oul -rites -tug -wight -##ll -##log -##ев -##anya -##esque -##inate -##ischen -##isław -##omans -stru -##irds -##imid -##ulse -##arten -orson -spite -##ogene -trillion -##eya -flug -reluct -schaff -nationally -swat -1630 -##grin -slater -skate -discrimin -farrell -merge -carolyn -##lux -meme -1810s -violation -vanu -passport -ramos -neglect -##199 -defining -fernand -precurs -baptized -mazur -bosses -grossed -dustin -cortes -grenade -phyllis -chestnut -##proof -supernova -leaked -encrypt -apocalypse -accomplishments -ethanol -wartime -appreci -spectroscopy -bohemian -psychedelic -aichi -ging -kitz -koln -pals -ucla -##into -##orak -##lements -##irl -shag -shiny -##rax -bytes -##estro -##icans -lez -env -usc -canaan -compost -serena -admit -##quette -agra -##lesford -doha -outc -overland -1521 -fees -moines -emu -implies -##annis -infancy -##oble -interst -glover -##boa -assume -discrete -##anished -superint -passive -marker -billed -freem -hostages -galv -wallis -bernardo -##atoire -microorgan -390 -yellowstone -motorcycl -coupe -surveyor -migrate -##organic -puppete -refugee -##itarianism -consequence -aphrodite -syllables -enhanced -persuaded -##0e6 -supermarket -barracks -##igraphy -guarantee -rattlesnake -cuckoo -prejudice -vengeance -ashikaga -mendoza -coke -ew -fencer -jensen -publ -pension -riga -wines -##eav -##litz -##rou -##jima -##vation -##ич -##ᅧᆼ -##inov -##lein -##opo -##owicz -##aggio -##berries -proclamation -worcestershire -cans -excommun -trier -megh -knees -wario -1712 -schemes -northumberland -weis -##azawa -1550 -gamespot -bravery -252 -blackwell -finch -goldsmith -freak -cardi -trades -postage -zeit -preview -wilkinson -execute -libre -galician -##iffs -##office -mahal -dominique -marketed -bullfrog -marshes -hiragana -hartman -vacant -mortar -detection -baroness -kathy -marrying -thorne -forgiveness -disappearance -##esperson -confession -diagrams -aztecs -giacomo -coefficient -dauphin -whalers -vertebrae -darmstadt -bauer -gaw -gauss -lough -meng -nodes -pests -rite -zoro -zyg -##base -##eze -##zs -##tu -##gence -##rud -##vol -##vings -##enia -##enco -inert -##asaurus -##elier -dehyd -chaud -sele -shek -##acies -##acaf -musee -parap -butte -hadith -perd -monted -contested -greta -calabria -##azia -quote -imposed -strom -##obyl -childbirth -filipp -##undra -systematic -baton -wally -##ophiles -villagers -magu -248 -223 -263 -radcliffe -vols -password -rhymes -cyril -soundgarden -similarity -cheval -woodrow -frankish -penet -shipw -faul -baud -##ievre -doctoral -celebrates -selma -removes -reasonable -spokesperson -lynne -miocene -definitely -##walt -yeh -hazara -flavors -fukui -exercises -bromine -cristina -trenton -multiply -lesbians -beaufort -feudal -phosphate -##querque -gimn -admired -premiership -lillian -yusuf -nomencl -montpellier -polytechnic -hussain -##stitutional -##otechnology -hottest -chihuahua -benfica -ieee -gubernatorial -b0e -cue -ea -hary -hana -jang -m2 -wiley -##bah -##oft -##zie -##tl -##ик -##anilla -##leaf -##imental -asper -unfor -ench -abkh -grizz -##sports -##ppo -musa -parrot -resin -gertr -1719 -wills -depressed -moreover -guj -gupta -##inta -##upe -surah -##letta -redes -animations -anno -decid -fargo -donat -jewels -motown -snails -executions -jimenez -38th -messiah -leonese -blocking -##ridges -1753 -1735 -1666 -1661 -approaching -mongols -pavilion -reinforced -mackay -dwarfs -lombard -exposition -heathrow -sikkim -reprinted -prohibited -pompey -recruited -quotation -dharma -amnesty -cullen -genevieve -frieza -ceylon -aguilera -vanuatu -b0e0e6 -etern -gue -nid -rude -subl -soto -uae -wey -zeta -##tus -thel -##stat -##urian -##opath -hee -chuk -##imited -ors -spare -##ograd -##endar -serra -##enskan -##econom -co2 -schult -chargers -guth -emit -thresh -starter -submission -loads -glendale -archived -bios -273 -railroads -karma -watford -robber -photons -institutional -santander -drav -##flight -inscription -##iyan -caes -concacaf -arthropods -odin -odor -440 -winslow -##walker -jordanian -toyama -sahib -hydroelectric -pelop -sulph -cereal -sergey -aubrey -miyazaki -anarchism -blowing -altogether -blessing -delegate -fascism -melodies -keeper -leverkus -shabbat -crystalline -convenience -doraemon -shrewsbury -enslaved -corridor -trolley -maneuver -transactions -fahrenheit -montedio -dfb -dangers -kali -node -nanny -sia -##buster -##yne -##gam -##path -##jit -##400 -##qing -##icio -toast -frustr -prome -trudeau -herring -phou -##ensch -flush -monoxide -genu -belts -terre -emmer -australopithec -glove -staten -midwestern -vargas -palo -palms -morley -morrow -cathy -suns -sunrise -cantor -melvin -macqu -ballard -36th -routine -42nd -aquino -scroll -mythological -generating -sweep -koji -documentation -luciano -analys -embra -sails -apartments -sherwood -1742 -hangul -1709 -1643 -dressing -imaging -allegh -bernardino -mckenzie -prophets -tendency -ernesto -examined -stretching -tasmanian -reptile -bedfordshire -hajj -hesper -plaque -herbivores -assyrian -blossom -desperate -decisive -elliptical -djibouti -panthoibi -naughty -leverkusen -cfa -dane -gory -lg -nf -piers -tis -tard -ural -uri -##hat -##eous -##note -##fest -##farl -rebo -heck -##otan -##owell -shipped -##raz -##isham -##ishops -atoll -pluck -spans -##ipak -##gev -caring -thereby -blows -blanche -televised -defenses -##rij -pao -powerpuff -louvre -##hammer -louisa -handles -naturalised -##ajara -rounding -garda -fortified -inscriptions -straits -##azzi -##rels -messaging -investments -theropod -springer -lydia -freedoms -wolfe -clearing -##ews -1752 -gangs -##iabad -karls -1703 -bartlett -tyrone -thicker -rhythms -mozilla -joanne -parasite -wicked -initiated -marquess -tightly -prostitutes -meadow -hymns -##svenskan -oyster -avoiding -wavelengths -farewell -unconstitutional -incarnation -enjoys -thessalon -appalachian -styria -roanoke -albuquerque -lenz -nino -##finger -thesp -##lek -##thi -##elman -##unton -shay -seah -atal -##ipes -displ -paran -##week -##tened -pooja -bij -summon -petra -##iscard -gardiner -ulm -branded -mandy -embarr -observers -bedroom -dispers -1747 -liquor -1701 -licenses -repeating -1670 -archbishops -baptism -pronouns -storing -checked -emirate -hammerhead -meteorology -coordinator -abandon -circumc -gersh -casualty -erected -terrace -precedent -overthrown -ljubl -extensions -kilogram -cheek -receptors -matthias -hathaway -perceived -mixtape -pepsi -forensic -umberto -faisal -nakhchivan -##farlane -80s -bays -gaius -lola -nause -taw -vogue -zig -##iards -##sum -toon -##enti -rector -##opal -##otus -shack -seag -ats -##abel -spanning -##ubo -marquez -manners -clut -comprising -##aching -carole -butcher -monophy -amman -statehood -outlets -outfielder -airfield -laut -marches -impl -elgar -##ietta -augusto -wouldn -obes -voicing -runaway -228 -duk -kei -pune -postp -replied -poetic -balloons -citations -35th -barbar -aqua -emerald -470 -napoli -717 -responsibilities -hinter -unlimited -ansbach -coolidge -1606 -pinto -encyclopaedia -horseshoe -tolkien -viceroy -coffin -narrowly -eisen -sponges -inductees -francoise -orebro -britannia -supervill -reconstruct -alsace -intervals -permian -avoided -myeloma -paddington -predominantly -rhyme -complaints -merchandise -vuelta -mahabharata -marjorie -70s -a5 -aeth -jap -joss -kast -oise -oven -oleg -tull -vign -##yamba -##kovic -##fight -##cers -##jm -##leased -##ilus -##amt -##step -##chach -##etri -chag -chub -##emus -lef -leib -ariana -spells -##ichy -##ellin -trous -##endicular -knicks -alloys -twitch -disse -resolved -drill -geophys -##ysema -##ancourt -subord -valer -annot -addicted -seeks -catarina -pressing -detention -chefs -zeph -damn -cantonese -kilm -rogue -wikis -kongyamba -lyons -promoter -expressway -##prints -hornets -lynx -laden -mesa -cocoa -sensor -niagara -pipeline -subtract -hobart -##ilingual -mysterio -taxonomic -quintanilla -slipper -##ungsbez -struggles -saarland -gentlemen -##horse -disguise -nuevo -bukit -interrupted -punctuation -mcpherson -guernsey -inequality -absolutely -zodiac -cud -dang -fife -hays -hanks -hagen -mard -sash -wb -##kr -##rone -##omal -##elids -##stani -##ayas -heel -chab -##iez -pritz -##ogg -mania -manchu -clav -newbery -##ighton -boch -##acs -##acional -cara -##onduct -adver -briggs -contests -charities -chariot -##azquez -overwh -europ -socrates -steg -2500 -##burne -234 -salman -holby -faroe -malware -297 -##mination -bala -youngstown -restless -stationed -karam -wallach -papyrus -derives -sarat -mirage -miriam -cabbage -practition -practicing -antonia -eyre -lori -bonding -fredrik -nikita -1728 -scarlett -cypriot -honoured -kaye -schwyz -confessions -emphysema -mummy -antibiotic -monterey -chernobyl -evolve -merseyside -mimicry -conjug -etruscan -monopoly -regierungsbez -nomenclature -overwhel -a6 -caj -fence -nz -sime -soria -##bil -##zko -##pcion -##waffe -##juk -##vere -reinc -##etano -##oting -becker -##arty -proxy -ender -arjun -spann -##athe -sheen -grimes -boar -##iburg -##accio -meuse -parano -afterward -conti -contamin -basins -amher -1612 -teamed -crank -##cessors -starvation -expo -surve -looney -maturity -##enga -219 -305 -anybody -afrika -italiana -paulista -hygi -francais -judd -324 -lesotho -sarc -##gencies -ghar -foxes -psycho -textbook -395 -bowen -mcclel -590 -mandel -05° -##hoek -contributing -ayers -fixing -crowley -##anteed -jumped -calculator -hydrochlor -ovarian -katharine -1536 -faculties -guerra -tortoise -jinja -withdrawn -summerslam -gyor -guaranteed -werder -seventeenth -triggered -ustase -inclination -luftwaffe -pritzker -hygiene -010 -bary -dx -daly -vt -##bill -##tors -##sdorf -##wu -##alone -##ouz -stacy -wasp -##illan -##illand -##oglu -trif -roz -##ibor -##perms -##ideae -brum -ministr -guilt -1519 -mund -##upo -correl -croy -262 -courthouse -malmo -confront -billiards -argus -ruhr -satire -collects -inserted -rosie -miracles -akk -740 -570 -gilmore -weakening -suspicious -kanye -illegitimate -indicating -brunn -srin -nitro -gibbons -rushed -reconcil -reservation -sikhs -rhea -andersson -perpendicular -salamanders -razor -heterosexual -multiplied -bharatiya -enhance -gondwana -staple -seizure -mcgill -polymers -supermarkets -eureka -uncommon -dignity -fyodor -shankar -plastics -ffffff -kangleipak -tottori -sæson -superintendent -ljubljana -agh -bung -e5 -gps -ith -jays -mapping -nel -narnia -rb -vance -wigan -zan -zwe -##bug -##yas -##tan -##moke -##inz -##isers -##ascript -##olor -stork -##irr -onion -deacon -chie -seating -bypass -encom -pegas -prc -sph -##ogues -playback -playhouse -##elland -clover -##phen -collo -phob -blame -minamoto -buys -amen -1650 -##oller -##ysh -quir -divis -philologist -romani -portfol -luk -236 -salty -midi -confron -##oises -grape -murad -hoax -sidekick -privat -predictions -automated -##richt -dolores -##iatus -laval -1648 -zoological -acquitted -ozone -tempore -porcel -terminator -ardennes -fibrosis -javascript -fujita -peasant -unicode -guadalajara -tentacles -concepcion -gogh -scorpion -##stroke -hawthorne -caretaker -beleza -foothills -embarrass -reincarn -amherst -beryl -dull -jens -jima -mies -nave -silt -wakes -yester -zer -##yre -##fact -##comb -##with -##jord -##mins -thong -##anum -##onite -##oub -##adas -##roth -chased -##unas -##iale -tramp -trough -##lander -mezzo -hermit -##acea -##balls -bresc -##tee -minaj -ames -1610 -1540 -helper -##tley -harrow -stew -waterford -valery -##ontas -##inkel -##elsen -comedies -keaton -holm -##written -burden -grac -##apple -khat -erm -authored -robust -tener -collectively -322 -oppose -prefectural -climbers -trois -panthe -walloon -gunnar -355 -potentially -rosso -cabaret -caption -escort -cutter -pickup -forestry -596 -unitary -##knot -715 -horned -conquist -hangs -1727 -nutt -1649 -ferrer -limitations -##clusive -ribe -correspondence -duchesses -ocasio -paraguayan -reprise -innovative -abolitionist -shivaji -thoroughbred -micropro -nebulae -abnormal -searches -masculine -fairbanks -gideon -gertrude -aachen -cate -dich -taz -tnt -uday -yol -##riv -##wash -##aney -toed -stained -frig -##idas -alta -asb -##illance -##ipation -##ichael -marrow -mani -manny -##orean -##ictive -bloch -##tech -##ahr -regiments -##azon -emissions -elle -wherever -octavian -instagram -socio -interrog -brawl -##atsuki -assim -decent -addams -239 -265 -solv -louder -richer -advised -lineup -burgl -morse -anticip -talents -termed -judas -mcne -peterborough -distract -garlic -##geny -timing -stanisl -laying -routledge -##market -acceptable -hirsch -embolism -saddle -strengthening -ferns -1738 -favorable -dirk -1632 -sensors -attachment -paleolithic -heavenly -ichinomiya -igles -1572 -dusty -decreases -paradig -sociologists -magnetism -lichten -satisfied -knoxville -trenches -tastes -astrophysics -vauxhall -leavenworth -asexual -buccane -surveillance -dina -fum -fielder -fonda -gaku -v8 -yew -##aq -##city -##jad -##map -##anid -##enb -##ouac -tog -##amir -##iese -sein -enforced -arous -##essy -afi -##acz -##acca -cooke -schism -noodles -quiz -impr -##linary -interfer -invades -welles -voor -magna -classroom -264 -solitary -membranes -conform -realms -donn -fremont -rhod -##usha -wentworth -entre -equipped -samana -melted -321 -middleton -33rd -osage -jerzy -grocery -identifies -platyp -fairfax -piero -kimura -packs -packaging -crews -1739 -concentrate -bourne -environmentalist -environmentalists -1598 -toxins -dodd -gustave -rouen -1533 -fisheries -vulgar -rejects -photojourn -grenad -sibling -possessed -yankovic -##olyte -##espan -swansea -pixel -pompe -slipknot -polynomial -recruit -squirrels -bartholome -remnant -vipers -pygmy -privileges -pueblo -hallucinations -heisenberg -ransom -chittagong -cheyenne -boomerang -ministries -tenerife -durr -epp -eton -fission -lop -pess -sana -v6 -zork -##eck -##four -##72 -##ᅮᆫ -toss -##cey -alber -chasing -shov -##arton -plc -##odact -##iek -ari -mango -webber -##angen -##izi -##izable -disob -flair -schn -regained -##osei -guidel -calhoun -saito -maas -##oodoo -stargate -internacional -slo -champs -interl -remakes -242 -patel -intact -281 -extr -ordering -donations -economies -catcher -coloring -monta -benign -jude -factions -almond -##woods -voter -globally -##mitt -hoping -ratchet -51° -reasoning -suggestion -bahadur -##ships -axes -hanson -##aroos -trusted -vertebrate -1642 -mattel -ridges -determines -hurting -thrower -interpol -aborigines -carpath -cambodian -cosmology -##uhle -ninety -sulfuric -repaired -tensions -slovakian -carnivores -incorrectly -wolverhampton -mhz -##opteryx -retreated -forsyth -drosophila -unveiled -domenico -taekw -hyperbolic -haryana -cim -dann -eocene -faint -fuchs -hald -jai -lure -nach -ode -uy -vold -wages -##hus -##fle -##fels -##erk -thun -thinner -##inis -##lew -##reb -##thra -##uram -##irth -ita -unse -oranges -levin -spiny -suited -haskell -firth -adige -afterlife -##tea -soils -swelling -poisons -moj -larsen -maast -capcom -endoc -milestone -jeong -invinc -##etsk -humour -237 -raion -testimony -middleweight -multilingual -335 -gunshot -candles -choi -separating -flightless -384 -tuned -870 -stocks -odisha -emergencies -787 -ottomans -pediat -560 -fashioned -##points -1722 -1721 -sediments -cassidy -spyro -advancement -chatter -diablo -sapiens -##ebook -adolfo -cheney -navigator -antibodies -reprint -solely -erupted -##straße -tropics -mahmoud -manifesto -tucson -yvonne -narciss -caledonia -hibern -rhinoceros -subscribers -schwartz -hydrochloric -croydon -guidelines -011 -aeg -aaa -bf -bop -bival -cine -ether -fenn -gins -goku -m6 -upt -vc -vive -voodoo -xian -zell -zanz -##eche -##yat -##oya -##kis -##fors -##pack -##vig -##xia -##ин -##itos -##otto -chico -##aguan -shal -withers -##ckets -##illard -enlisted -arenas -spal -##sef -clones -hass -sciss -##izia -joze -aging -bliss -1716 -relates -depths -noth -guantan -poz -##ollen -governance -emission -landau -glaz -##inky -224 -salle -sumner -farr -berk -bering -berwick -vein -horatio -cantal -cirrh -informed -informally -nonpro -melts -##ajar -501 -azores -lowlands -hua -costello -##adores -fortuna -uluru -eaters -hawke -pacif -##comput -rolled -captains -lifts -illustration -fulham -dispos -##holders -kerouac -hershey -phoebe -orbitals -##enhausen -ignor -zoolog -milkweed -maze -hurd -punishments -subscription -expeditions -chandr -gymnas -skulls -pereira -margot -pillars -udp -moldovan -cavaliers -bloomberg -nemo -turbines -phonetic -instantly -calderon -accelerator -##anguard -disagreement -gladys -struggled -disliked -herzog -seamount -##oglob -##thorpe -eccentricity -jewellery -disturbed -harassment -motorsports -prasad -kimberly -##obacteria -landforms -indochina -microorganisms -unforg -nausea -dwell -ee -fancy -gems -gills -hens -kling -mtr -oun -ocl -oder -pid -pats -ragn -vell -yin -zub -##zio -##vara -##qq -##eren -##anova -##enet -##arre -inorganic -##isely -##omers -##lefeld -##entes -reind -##ayi -##opro -beams -##imos -##allah -spelt -##ritt -suits -##oute -knob -indigo -##quim -parrots -##ahontas -schalke -regener -geologists -teammate -yorker -barred -##airn -stripped -steak -christi -incub -raises -takashi -soluble -delg -295 -311 -academia -rhon -streak -astrid -freeway -rufus -333 -mixes -bangor -islamist -opio -conservatism -##icultural -participant -##zyk -bullock -frederik -tagore -sherlock -misunder -friedman -sickle -remembering -normans -1634 -barrymore -battleship -colonialism -bogota -sicilian -##govern -##jarvi -prominence -piquet -cosmos -vampires -nominees -gp2 -##missions -polynesia -guangzhou -verdict -encountered -brackets -residences -intestine -pocahontas -vittorio -parachute -micronesia -aortic -nauru -traumatic -lauderdale -asymmet -porcelain -guantanamo -boles -gaud -jian -pies -rall -sween -tz -tint -vet -yvel -##num -##cini -##was -##vist -##mies -##anze -##atical -##elos -##chichte -##irol -##etism -##agra -unanim -lease -arama -##ebol -clank -caric -##0000 -perug -moncheng -goethe -estelle -guaya -steering -bracket -cori -irr -midd -261 -282 -283 -##ippa -299 -morality -florent -snork -putnam -prevail -israelites -panzer -gunman -##fla -##ladbach -distribute -cannibal -705 -insert -tsub -praet -lasker -slowed -functioning -bede -douglass -gregor -piccad -ladakh -1726 -##breaker -##iphers -1638 -##friends -sauces -yugoslavian -absorbs -kiwi -fibre -anchorage -methyl -nicaraguan -hermione -leakey -soyuz -congolese -##atomic -catalyst -aaliyah -invasive -ammonium -##gotland -resonance -enlarged -parenth -criterion -impaired -conjunction -macquarie -taekwondo -gymnasium -monchengladbach -950 -bland -bals -caz -ciphers -dose -eject -fiance -faye -hates -hae -jiri -jafar -jasmine -kiel -monde -rpm -suez -tf -verne -wai -x86 -##nin -##sworth -##ра -##edi -##itr -##ouf -##thys -onions -##cea -decca -##agle -whl -##terozoic -unle -sewing -spoon -tep -##rito -marley -manu -reply -##ignano -resumed -moors -artic -##wayne -jehov -steals -humorous -valves -##ophytes -villar -229 -headache -moran -astor -hardin -deton -promising -sargent -nicky -sirius -##evsky -opel -rodent -originates -drying -settling -observing -illustrators -slavs -weaken -##egna -jamal -venezia -1667 -gravey -1672 -scarborough -chromat -smartphone -integrity -masked -cleric -substitution -attributes -dormant -bharat -zombies -aristocrat -rochelle -yogi -surgeons -pentecostal -remembers -elegant -akih -deciduous -bartholomew -piccadilly -baek -cured -fits -mime -mace -padu -rok -sanger -vanguard -##kie -##vil -##ır -thi -##omas -##ionage -ales -##imura -sears -conm -pea -newt -roque -meur -allsvenskan -##awks -goof -swind -weir -##neur -highlights -##ographies -quark -##amax -##ismith -underlying -slade -##kei -filters -simba -backward -philips -righteous -corr -superf -delight -extras -passau -##ombat -economically -##plan -continuation -snail -screaming -hooks -samoan -bored -gallo -haber -drains -curling -jeremiah -37th -cartridges -propriet -730 -casablanca -closes -tsuk -stephan -dunes -pyth -snowy -naismith -pleased -christophe -##escence -ryder -discusses -hurts -1692 -toxin -##rophe -kickbox -protestantism -stefano -absorption -gnome -smokey -kelley -roofs -caucasian -eritre -centimetres -cremated -fiorentina -fulfill -awakening -cactus -laptop -iroqu -spokesman -interpreter -encompass -jehovah -cumb -fender -fibr -gough -jtw -mesh -nant -uma -zd -##bok -##nom -##olev -##zadeh -##kor -##74 -thapa -##onics -##arum -##eln -reper -cheryl -##ainen -##esson -##udy -abrams -rohan -commut -##ectives -ads -phar -upload -noailles -electorate -overc -capuch -elves -barcl -armand -womb -##ophilia -operetta -##anek -269 -wrath -sanford -thiruv -downloads -sonatas -needles -debussy -devonian -fewest -sexes -woodpe -stronghold -penelope -illumin -commitment -fortun -cartil -assembled -facade -moonlight -raman -corporal -crucial -carboniferous -rugrats -alternatively -lockdown -nikola -coward -viacom -sardar -luxembour -##ogenesis -crisp -revolutions -comparable -blogger -demonstrate -puppets -##amanian -rebuilding -pillar -vivian -komodo -abdominal -##stedt -rubinstein -indirectly -brennan -sausage -tetrapods -##oglyph -peacock -elaborate -maastricht -zanzibar -guayaquil -jtwc -90s -bingham -cj -cotes -dq -fencing -gaps -huck -hiatus -ipa -jad -oax -pasha -qi -sill -samm -tid -##yards -##oese -##wiki -##mount -##alur -##ingual -##chief -##adze -aldo -##igon -##imm -orr -cones -aromatic -##abella -mariano -sule -cared -##tois -##ahed -monoc -##ffen -amd -depicts -##neg -overnight -##intra -strangers -##obile -islander -##ropy -glue -##elsdorf -##elssohn -luf -luzon -addison -rais -286 -##ukas -##ipps -loser -cyst -storyt -judo -334 -espionage -bully -esch -704 -insurg -faq -rosenberg -adventurer -lining -destroyers -logos -escal -drey -barbadian -gabor -numeral -transitional -precisely -wedge -tripura -attorneys -soder -ignacio -pronounce -coronary -sophistic -mansell -jarvis -kathryn -eliminate -cypress -challenging -escaping -rearrang -amplif -mendes -mendelssohn -cska -stylized -bhutto -dysfun -melodic -microscopic -satisfaction -mushrooms -flamengo -euclid -bodybuilder -sewage -supervisor -colossus -innocence -obesity -brescia -reindeer -dass -e4 -gow -jat -kach -kup -kami -lsd -mural -pyeong -ritz -tipp -vesp -##82 -##hari -##bos -##bru -##lau -##uis -##pas -##95 -##onists -##arab -inex -andaman -##lez -##entz -reapp -##adar -##ivores -alve -deeds -##emort -##raw -secession -##api -##ifolia -##ameen -afonso -scent -##aterina -##enshire -##ecrow -amel -craters -quay -impulse -eliot -endeav -underwear -armando -##shut -angolan -listings -redisc -romanesque -epith -luiz -irons -natur -suppress -campo -campos -markus -##inoff -greenville -rails -karan -cleans -raptors -accordance -##olester -curves -distributor -timur -bernad -cartographer -recognise -beatty -facil -synaps -rods -scranton -sugars -##trak -sundance -kermit -sedg -grassland -1682 -symptom -scarecrow -pretend -1553 -havel -blooms -apparatus -kelvin -stretches -anarchists -accompaniment -##odonts -nursery -sparks -alkaline -bjork -ankle -tacoma -vigil -squidward -nudum -##runner -cigarette -fujiwara -bieber -waterfalls -eukaryotes -pomerania -highlighted -neanderthal -flickr -enabled -acknowledged -eucalyptus -cellulose -vintage -schultz -displaced -cirrhosis -yvelines -katsu -riy -rsa -sbs -xd -yous -##81 -##83 -##kon -##kah -##mose -##erty -##edience -##icesters -anwar -##lehem -isid -##elih -recess -stav -hebe -##ulations -##oski -lei -leicesters -pele -marlon -abus -abby -##ryl -knots -lively -tweed -displace -monot -mined -drone -appet -1618 -##news -smells -1520 -##asek -attitudes -rowland -modena -watergate -millimet -306 -##ontroll -annette -walden -285 -publicity -##omba -cathode -##mei -ruf -sounding -broadly -hamburger -fishes -prima -matching -##iscus -breakup -##steen -separatist -burnside -qualifier -textbooks -orchard -safari -##iasis -escobar -akadem -programmers -830 -commandments -mccoy -##bitt -kyo -680 -580 -pedestrian -##151 -alek -1687 -theorists -1734 -##warts -nyct -perfection -1697 -terminals -conversations -choirs -cockpit -bethlehem -pharmacy -gemini -wakefield -patented -symmetric -determining -bikini -optimization -excluding -overlap -dislike -sympath -bielefeld -positively -assumption -archduch -stamford -ortiz -accusations -subdistr -subcommit -portfolio -leicestershire -bakh -caine -eber -ewing -kors -rn -tundra -wemb -##yuan -##fre -##pok -##willer -##96 -##73 -##amis -##amatsu -reel -stakes -##oten -alar -##unaga -##utter -proxim -spor -##orna -shea -serene -blend -coen -##ireann -relics -genn -britons -guzman -outlook -electors -1530 -geologic -airstr -provisions -elmer -landings -brace -valued -walther -archimed -currencies -contrary -272 -attackers -merida -capitalist -##hev -##queen -talbot -computational -copeland -judged -postcode -wilma -climax -##olfo -##genland -bridgeport -385 -45th -brandt -synod -motorways -teller -accepting -alternatives -##iotto -gaol -katz -fillmore -elders -preparations -crows -violinists -margarita -paramil -experiencing -kisses -sakai -narrated -soundtracks -grenoble -sudanese -guatemalan -alvaro -nasser -collapses -corpse -tramway -1600s -obviously -mysore -herrera -kusatsu -unreleased -##mandu -rpg -weightlifter -consecrated -asiatic -daphne -yesterday -luxembourgian -auss -banc -mika -npr -pico -pahl -zir -##eers -##oos -##zeb -##gru -##kha -##xes -##ла -thebes -##arana -##atia -##icourt -##lewood -toto -##ilov -stowe -##chison -chish -##ovia -##ulative -shams -##aval -neub -spani -##igham -mansl -adhes -##quest -flock -montal -##weg -drome -chartered -1629 -apulia -##aleh -waldo -trey -villers -decry -keane -268 -##istano -volga -presently -civile -##working -fallacy -denotes -convoy -326 -accordion -garth -administratively -ballarat -penic -curios -chartres -sarg -insight -treehouse -savanna -platoon -logs -naver -backgrounds -scrub -favre -scholastic -spokane -cosworth -excit -bradshaw -1708 -crowds -oberliga -ppv -ravenna -1694 -pioneering -rahul -mckay -spirituality -heirs -choreography -miyagi -nuns -dumont -tutorial -crusaders -nikolay -delegation -hieroglyph -brasil -1500s -prosecution -spartan -higgs -kendrick -struggling -##cutta -potsdam -remembrance -energetic -welding -telenovela -graffiti -ornamental -backlash -##elihood -manslaughter -ait -aura -bd -cgi -ebers -nv -nis -pann -rau -wta -xeon -zebra -zhao -##zar -##zier -##tag -##gene -##rip -##pres -##wid -##oru -##itel -##itim -ander -##lei -##etamine -hebr -shuk -plasm -##ckers -mara -manx -##ostak -dijon -##ght -boley -##toire -relies -gret -bucks -weed -calcutta -broker -##nev -grouping -##ussen -shower -barg -barley -underwood -conscience -defences -##ophon -legions -severn -##veda -2600 -merging -erasmus -hardest -ruin -spaceflight -etudes -dani -401 -citadel -drained -planting -pianos -objectives -##ihiro -dinamo -coldest -767 -515 -alfa -goddard -ibar -pleaded -530 -suffrage -dalai -arnhem -weddings -virtues -cleaner -ventric -scholarly -echl -priority -havilland -kazakhstani -tirol -forgive -##locked -trigon -wendell -seaside -mouths -rancho -perpet -rehabil -daisuke -boulogne -piazza -tupolev -tesla -accommodation -exchanges -adulthood -nairobi -hodgkin -carcass -hammersmith -correze -draymond -priscilla -reboot -mcclellan -nonprofit -aramaic -pyeongchang -dop -fidd -jib -kog -kano -ketch -m3 -mub -sama -silla -wic -zuko -##henge -##lab -##sville -##pel -##atma -##itating -##aski -##elton -##elines -rechar -##rost -##opsy -chik -##odi -conwy -proms -spins -spagh -exagger -scuba -##orde -##econds -##ahara -basalt -pops -buchen -##ologie -1616 -pointe -johanna -yorktown -emo -endos -substr -deficit -##riere -corona -majors -raoul -##anao -godfrey -271 -##ukawa -markt -specialised -##ushin -marta -martian -tracked -borat -osbourne -haar -bulbs -##efin -medicinal -hereford -insom -liberated -60th -identities -cabot -linus -massey -dominance -antonov -programmed -gianni -concurr -availability -##ropolis -culturally -fatima -dvorak -specifications -1704 -whitman -buckley -symphonic -mazda -mitter -vasily -infanta -huron -1658 -1659 -kathmandu -sakura -hendrik -dwarves -sindhi -phyla -nadia -moulin -obstacles -threads -pterosaurs -iberia -vickers -vaughn -marsupial -vibration -unwanted -deutschland -phylogeny -nautical -prospect -hellenistic -athenian -seismic -diarrhea -karlsru -oclc -oaxaca -bt -bs -dimin -fies -giscard -lyle -laryn -mith -nike -o2 -rund -rani -wim -woven -##hag -##nae -##tland -##tical -##like -##rama -thigh -##oron -##orius -reass -staling -algar -##aglia -comoros -##aller -orang -##ifix -##ellation -grange -scors -twe -parlement -acanth -recapt -soo -smythe -apr -apache -crouch -##intest -elise -barrie -schooling -jeju -reddy -avent -keio -##zewski -pala -##tales -khar -trader -deviation -beni -mcgra -melville -zeeland -wilkins -pasteur -insign -eaton -dario -completes -conservatoire -baal -dining -tamar -bowman -mccull -46th -suicidal -670 -polyg -contributor -brightly -spectr -siegel -1731 -meso -trieste -baths -contacts -lacked -scenario -echin -vaish -anarchy -mimi -proposals -montene -turbochar -rabbis -vibrations -annexation -brutus -cyanide -cicero -antenna -disguised -crucifix -goblin -dimitri -honekawa -emanc -dromaeosaur -estaing -porsche -thelma -guthrie -sweeney -hebrides -012 -cite -dared -dosto -feral -jia -kuch -loll -wished -xiang -zucker -##bling -##kur -##wol -##mis -inuit -##isal -##iska -##itans -##icos -##asp -##adium -##idze -heidi -##owed -demp -whistle -oron -neel -enn -##udge -##astrian -suites -noting -##assi -##assen -fools -germs -swims -smiley -larson -##inney -edd -##ancies -quer -consumed -listened -brachi -invitation -rockies -valera -brewers -keating -summar -sanctions -versa -empir -billions -cerro -verac -erika -connell -convey -ledger -procession -effectiveness -considerably -ivoire -violins -fortifications -maharaja -originating -525 -photographed -mechanic -altman -581 -lifelong -sadness -##hui -alphabets -maximus -discussing -baptists -pretending -1596 -nasir -1547 -hobbit -delivering -sigma -witnessed -surfing -cedric -eclipt -perspectives -preceded -kubrick -##ierrez -apocalypt -aeronautics -chowdh -softer -hewitt -fraternity -swabia -underside -poitiers -palmeiras -scanner -lviv -krasn -rsssf -gastrointest -rhetoric -wainwright -evaporation -flammable -echidna -galveston -thessaloniki -regierungsbezirk -grenadines -voldemort -jozef -graveyard -stalingrad -gastrointestinal -damm -fn -fisch -liang -niv -tinker -vain -vind -##84 -##hac -##bag -##good -##rino -##cane -##mak -##xit -##oras -##itre -##itte -##itives -toho -isla -##illet -##iran -onward -deemed -chas -orte -##ieure -arson -spun -spence -##iffer -compressed -##iken -##acp -##idean -parc -parole -centennial -##ultures -charism -noaa -calcio -overture -traun -##anken -albans -moy -capra -sloan -jeans -interd -##sheet -lander -humber -hummel -302 -cores -perfume -mala -malibu -274 -popularized -confusing -specialty -freiburg -cym -townsend -##arski -astra -copern -announcing -posters -mustang -macintosh -torque -402 -panamanian -galle -parliaments -raccoon -groves -##cao -##vira -murdering -insol -39th -bait -savings -stonewall -960 -felton -affection -napier -shrink -searched -dalarna -miura -quarterly -labeled -##enzell -oscill -1665 -calculating -ventures -ghostbusters -wikipedias -##blue -worshipful -anatoly -seemingly -hypert -checking -fisherman -hannover -interpretations -sigmund -ugandan -szcz -reunification -jorg -quartets -digestion -middlesbrough -##enovac -adrienne -conceived -stranded -aeronautical -blazers -chrysler -falkland -touchdowns -ecclesiast -warehouse -transistor -mukherjee -prodigy -nurnberg -arjuna -prometheus -allegheny -monophyletic -scorsese -aard -bane -db -dorf -fand -heng -jhel -lice -miley -pell -rast -tammy -vidal -wow -ying -##tell -##kova -##rium -##fran -##wati -##erting -anesth -##ouin -##ilagus -##unov -orwell -prent -prun -##ublish -##ogi -##aston -marius -manchur -succ -norbert -hertz -brno -flinders -warlord -2024 -schuster -1627 -janis -overtime -##inschaft -moff -##inna -creuse -establishes -showcase -himachal -speculation -##ashvili -##resh -modification -harley -mato -deficiency -retina -gluck -##ccini -mucus -304 -courty -##loff -merlin -palais -##meer -activision -freel -halves -hymen -visually -techno -sexy -mcbride -hamad -danes -huber -343 -pasta -cracks -##agonists -translit -microphone -##iyo -tuning -432 -heroine -pathogens -protesting -dissident -investigating -ryo -bathing -fabian -necklace -disappears -contempor -francesca -thorpe -chatel -regulatory -isabelle -complaint -toulon -a300 -listeners -consortium -gutierrez -palaeont -poured -lichen -nihil -cruelty -tropic -grosso -dartmouth -toilets -sylvilagus -excessive -vortis -bulldogs -compulsory -##texts -peshawar -vh1 -trousers -extrater -iroquois -sophisticated -paramilitary -810 -811 -b1 -dary -eid -fue -gaj -jut -jena -jahan -lbs -pai -##lus -##rists -##ит -##inder -##arist -anac -tokaido -##throp -##stairs -##irt -##rott -##ounced -seh -pli -lek -ary -abru -abba -dire -bohr -##enden -##ptonshire -phyt -blaine -charging -charitable -swans -wehr -1619 -1628 -##azy -geese -crewe -junta -matric -glorious -muc -addis -salah -suppose -266 -284 -294 -donation -refusing -lightly -verge -quezon -debts -latina -vegan -entom -##beau -mcgr -karol -macfarlane -primate -garment -363 -##genstein -namesake -columbian -chaired -carlson -kenji -425 -43rd -grammatical -blockbuster -545 -wikitable -44th -sudbury -occupying -632 -swept -conducts -bushes -marcia -sleepy -siddi -investigative -cavern -extraction -spectators -##conn -vertically -bryn -imagery -detector -saturdays -compiler -wolff -vallee -lobe -smoked -pseudos -lionsgate -tumors -eclips -bloomington -intersect -jasenovac -quarterbacks -sauropod -allegro -senegalese -judgement -martyrs -sassari -nsw -affordable -zachary -pasht -snowboarding -xinjiang -cagli -exhaust -cephalopods -##gorod -invasions -cursed -gossip -appendix -microbiology -kindergarten -dynamite -supremacy -interstellar -colloqu -aich -aida -giotto -hila -iata -jenna -lakh -m4 -mers -taf -zhe -łodz -##hya -##fen -##cm -##wijk -##mc -##maine -##51 -thu -##lech -##ivir -ons -frans -alo -##igley -asphal -##aguchi -sei -##alls -congl -leit -arne -##abs -spree -mancha -newborn -aba -partisan -carriages -herds -indefin -phan -parental -cents -##icka -charley -betsy -##ierz -1526 -subgroup -consume -loading -##letal -##atorium -romanov -breach -additions -supercontinent -signatures -vij -campania -realised -burd -morb -posse -debated -hooper -mcint -dutchess -failures -postc -limiting -galatas -oskar -osiris -okay -modeled -thinker -jimi -happily -##iyang -393 -facel -achilles -bowls -theropods -launching -840 -mccon -essence -guarded -##imento -harvested -gaia -flattened -tyre -1607 -neurone -questioning -nitric -impacts -ridley -heaviest -blindness -bogdan -portraying -meteorite -kiw -krem -proposition -piston -intellectuals -dalmatians -stampede -exported -ostergotland -inverted -hijacked -examinations -spurgeon -redevelop -indycar -ijssel -epilepsy -gimnasia -pantheon -boleyn -mcgraw -asphalt -galatasaray -ather -aper -bons -cress -fg -kank -koy -oden -piv -pga -rour -tuck -ume -xix -yarn -yakov -##oidea -##lal -##ucher -##paw -##erz -##erte -##enham -##educt -inactive -ansel -##stall -##irts -##irable -hepha -shale -##estation -atkins -plumb -orne -##iegel -leimar -suf -newsc -##acchar -##izio -adren -somehow -monograph -warp -evapor -northward -willi -novo -##duced -maul -##ielle -barriers -surfer -treats -villiers -decatur -comedic -##enegger -solic -bien -superfamily -liar -##eshow -refin -balc -##arnock -madrig -karna -##arios -curly -ivorian -qualifications -warship -642 -electrol -890 -masterpiece -stephane -shootout -optional -1706 -kangaroos -habib -dictionaries -compilations -cnid -lazarus -eisenstein -jailed -forums -entrepreneurs -ripley -anglicans -##cracker -pursued -vaginal -expectations -injected -vedic -plummer -mammalian -preceding -counselor -persuade -guangdong -vogel -stuntman -jaguars -caterpillars -gillespie -chimpanze -linebacker -grayson -overturned -northamptonshire -microwave -reruns -schaffhausen -precursor -maguire -phouoibi -chieft -buccaneers -spaghetti -ecliptic -jhelum -chimpanzees -aung -bile -couch -fas -gears -hilly -kgb -loun -nemat -nune -sond -vulture -wul -##tur -##pw -##wings -##wark -##waters -##anan -##enues -incheon -##icello -##adir -##adine -shig -atten -polka -##perm -parodies -##acek -offense -1625 -calib -airborne -edg -capric -rommel -endless -oldham -harlan -jeann -259 -muir -annoy -voic -malle -merkel -dona -burgenland -antelope -activated -khanna -verified -franca -punta -premise -##apeake -huns -toole -mayoral -394 -645 -aqueduct -tamara -favored -nordiques -naacp -lindbergh -slovene -abdur -mutants -hearst -pigments -1733 -guinean -sorrow -svet -1653 -lanar -ivanov -saturated -taboo -1542 -puppy -randomly -mannheim -botanists -gustavo -furnace -pieter -shawnee -battalions -prestige -kohler -brutal -herzl -esteem -squeez -herbivorous -remarks -chesapeake -peyton -euclidean -repertoire -bk -ddr -ere -eel -hk -hiro -jt -nga -vere -vett -vaj -wired -waver -##zlo -##tit -##tze -##laut -##vii -##он -##eru -thut -rewards -##ayn -asa -asimov -arezzo -conject -##illi -pept -useless -##ogie -newell -abi -##spe -allgau -coated -coimb -2023 -##issy -##ffe -docks -scholl -televisa -famer -outline -janice -calig -crumb -maury -artworks -edema -##epolis -##shahi -angios -landown -philatel -258 -257 -assumes -irma -ecumen -archiv -paige -233 -incomp -transsexual -291 -markers -rhys -khrush -catfish -receivers -montag -friar -chinch -secretariat -connector -bankers -citing -haas -curator -earls -bulg -lowercase -reacted -darth -451 -##ochem -caen -bono -##agnac -416 -768 -quarantine -516 -splend -fluores -polyhed -kilometre -cuyah -spector -spectral -kamal -1662 -1664 -sorcer -1673 -necked -scenery -pelvis -vicar -fleetwood -ensign -surveys -hendrick -hubbard -decreasing -kreuz -podium -priesthood -gonzalo -reeve -rhodesia -yemeni -kochi -toothed -tactical -salamander -mehta -supervision -lokomot -griffiths -outlaws -mauve -kaisers -##alpindi -tanner -luminous -renovated -stainless -telenovelas -panchayat -disappoint -gdansk -palatine -concentrations -abkhazia -archduchess -ibaraki -buchenwald -hilaire -khrushchev -cuyahoga -bani -darn -domes -dopp -fs -git -kral -menn -pains -sore -##bam -##nuts -##zep -##lak -##last -##jiro -##moon -##macht -##ᅡᆷ -##omori -heap -wholly -##emire -##iav -##osure -uno -##estus -plots -##avas -##aines -peuge -abort -birken -sermon -disin -##tsky -##icker -doi -amc -noor -1615 -1613 -belo -##ographed -quang -quake -armist -defens -landshut -void -epit -irre -arrests -natalia -##linger -biom -delays -296 -empower -specialists -needing -erick -autistic -judoka -saty -robbed -banknotes -ferre -403 -336 -persepolis -consideration -boyz -violated -committing -frontiers -1790s -hawking -masah -rollins -hopper -welker -standardized -kidman -cookies -442 -465 -cruze -sabrina -puls -algiers -duran -financially -rawalpindi -bengals -revealing -kamen -chloropl -nyon -banning -bucket -1590 -1592 -biographer -blew -kaplan -dependency -gifted -bromwich -kaspar -gottingen -wicket -trioxide -brasile -marcelo -erupts -chakwal -battista -taoism -nicolae -jurisdictions -recalled -zionist -reelected -fermentation -bushranger -bushrangers -shetland -byzantium -zedong -palsy -motorcycles -chevalier -genuine -kilmarnock -##oglobin -conglomer -peugeot -cach -cuth -daf -gout -jans -kombat -nau -pum -penda -tumb -yek -zara -##63 -##kner -##kka -##wave -##wali -##mos -##°f -thayer -##inan -##edal -##atars -##alid -isere -reis -reese -recession -##champ -##oter -alman -shines -primes -prism -spikes -##ource -grat -herault -musik -parry -coil -cocker -minerv -uploaded -##olde -sway -smol -##ollern -##urred -junctions -endurance -sliding -accents -illy -308 -asset -arrange -massa -##bergen -successors -modernist -funer -sportsman -denial -cev -macrop -ohl -screening -405 -342 -matched -##osauria -ghent -lowering -stanisław -reviewers -librarian -homicide -mahatma -campaigned -odo -saba -waited -laszlo -pedest -bosch -donaldson -goran -investigators -conception -conceptual -appliances -fills -ellie -1686 -handsome -bryce -geographically -travellers -wetland -reflecting -kicking -ferrara -lenny -psv -bicameral -dystop -pups -possessions -pineapple -ricci -javelin -analytical -truce -brahman -geraldine -tutor -alkali -gadd -superson -vinegar -sustainability -licensing -niels -kellogg -schwarzenegger -wittgenstein -exchanged -bombardier -swabian -pollination -altenburg -smurf -smurfs -lakshmi -ceasefire -persistent -transfers -kaufman -stardust -postponed -asbest -invincible -unleash -displacement -chowdhury -lanarkshire -bors -dwayne -hench -kool -kraft -kota -lana -nons -nank -nfc -pouch -sith -sushi -tama -wac -##nab -##nica -##vie -##´s -##erun -##atto -##atron -inline -##alas -##isburg -##ingian -##ilin -sturt -##adz -##usch -heid -alden -shiger -atkinson -##theria -plo -##avut -leach -enfor -prad -##geois -##ourne -##orns -abuses -##okie -serf -twa -agenda -warns -upwards -amis -noz -guo -1622 -##azir -##azov -api -crested -instances -ifk -skies -obi -rockwell -##ontaine -magellan -##burton -auge -267 -##ukh -##rette -berth -318 -advancing -grandpa -morg -erg -firefly -stationary -eventual -##eki -franken -bulge -learnt -revolutionaries -##ensee -arthritis -##forth -##akhand -780 -policemen -suggesting -##iotic -packet -klan -shaan -bartolome -fitting -cleaned -worthy -fabulous -detectors -continuum -1637 -demanding -appealed -allegian -ovid -1565 -bilingual -manned -petrov -truj -dwelling -deposited -a320 -guarani -khalid -hossein -prostitute -firefox -grimm -unicameral -kakadu -sikhism -spartak -recovering -accomplished -adjectives -compassion -##felden -kruger -resolve -huntsville -enthusi -oratorio -magdalena -consolidated -regeneration -diagonal -sapphire -poppins -timbaland -thierry -mccormick -##machine -shipwreck -threshold -parentheses -sedgwick -baut -cil -dread -eels -fade -goli -glyn -lili -p2 -pits -sere -sog -ws -##hame -##bish -##bock -##bilt -##yx -##yun -##oia -##ioms -##tree -##rens -##enka -inputs -##alai -##lema -##elion -reag -forge -heine -beb -ores -arles -arias -##aban -actin -canis -##oreau -sui -notion -notices -##okawa -boon -carre -pheas -brides -##tees -buffer -novgorod -capped -thrones -metis -##sham -locust -##ccan -##iseach -raft -faraday -279 -castor -delivers -298 -billings -frei -equino -satoshi -sharps -garib -##affenburg -frontal -susanna -managerial -strains -caball -ashland -giann -scrolls -stephenson -kingsley -03° -analogy -peaking -lifespan -tuval -daley -1602 -kerber -dedu -1678 -lacking -parameters -1656 -1690 -1691 -1588 -1580 -reflections -tatiana -shellfish -ribs -##uffle -dsm -tetra -##families -kristin -nunavut -folding -trentino -uttarakhand -nemesis -cracked -pearce -shifts -taoiseach -rwandan -babylonian -jalal -greensboro -motivated -memorable -adultery -tlc -sacrifices -coeffic -rejoined -##mantle -aschaffenburg -seleuc -wembley -montenegrin -##ublished -hephaestus -eireann -fences -fuse -giza -jure -koc -kott -mx -mure -mitz -ogle -olof -pn -rink -tutt -wie -woz -yos -zem -##hound -##buck -##eering -##tis -##kiewicz -##pc -##52 -##arks -inge -##alin -##amble -##olle -stony -##iru -chum -shaman -pled -##icant -##ainville -enable -acton -canopy -dik -commits -norwood -adhd -foil -foam -livorno -presiding -offenbach -##tsch -popper -calc -calories -brok -medall -capabilities -quakers -##upi -elvira -differentiation -landis -##riya -remy -##cco -eastenders -##coin -litt -runoff -avian -289 -malaya -signalling -passages -vienn -316 -autor -argonaut -badger -hitachi -macl -multic -candice -productive -362 -deadline -thinkers -sommer -##evich -forehead -antoni -lawsuits -ratification -falun -alphonse -mandalay -fatigue -haired -hayward -wingspan -globalization -catalunya -archaea -##enzollern -warden -sardegna -immigrated -1647 -yoko -advisors -petersen -achieving -1585 -1560 -benedictine -wickets -cougars -shakur -##assee -cobain -concorde -commodity -abolitionists -odysse -pendleton -convenient -gaulle -confirmation -hohenzollern -interfere -kylie -demigod -hydrocarbons -boniface -platypus -apocalyptic -wehrmacht -thutmose -shigeru -coefficients -812 -bongo -eater -gens -hager -kata -kappa -mors -namb -nievre -ots -pizz -sist -sade -sasha -wett -yacht -##02 -##eons -##dad -##fiction -##muhle -##qa -##orhyn -##reys -##ilan -##elen -stoud -##chal -##oteric -beasts -cholester -##oske -unhe -seo -plesi -enig -pears -##abon -playground -trag -trance -granger -##ibati -meiosis -paratro -agost -whoever -goya -coo -gert -contender -amit -swap -weeds -1614 -thromb -meta -loh -jeep -redwall -romanization -archaic -276 -277 -sangh -##ukov -representations -rhiz -queer -##ushka -montes -robins -ferenc -nobita -##haan -derry -somal -psychic -##royd -##lessness -##glass -drummond -##holz -domination -##ageddon -00pm -orgasm -sweets -##anzee -elevators -dealers -baham -fundra -lifting -bundesrat -gorillas -kilometer -storey -hindust -peninsular -##zhen -ignat -vincenzo -shelton -lacks -##ropsophus -bleed -##organized -encourages -stalk -kidnap -beverages -deposition -villefran -blessings -shakti -denton -mogadish -relegation -inferno -intentions -workshops -motivation -beaked -mocking -scorpions -shorth -obsessed -schwein -dendropsophus -vassal -bulldog -tyrannosaurus -depicting -confucianism -kansai -transcend -surpassed -endowment -zephyr -meurthe -ortega -60s -610 -bending -bouch -dax -ghe -gma -iwa -ked -kamm -oe -oates -sow -tams -vf -vines -yal -zal -zug -##bey -##tok -##pole -##wit -##qvist -##enthe -stump -forks -beers -chao -shink -sherb -shostak -unne -##rai -##illus -arise -##ebb -marlow -abbe -##angan -collin -boana -##oughts -disg -soups -drank -ami -howell -musicologist -likewise -elong -barlow -langston -expon -expend -slap -intercommun -divisible -landlocked -steppe -bandit -easton -severus -castes -supercomput -superieure -##retion -palin -donetsk -bursts -##cheid -squads -purity -mcqueen -petals -##osho -hitt -sharpe -furry -359 -354 -sandler -projected -##waite -insisted -##evil -377 -faber -krust -bello -acha -closet -##former -husbands -slept -sabot -490 -kiribati -otters -loiret -innings -imperfect -polygon -signifies -durga -marsden -1608 -##feldt -##enhof -buyers -geneticists -##aphyl -anatol -misty -alleles -purchasing -iwo -toxicity -yangtze -choreographers -circumference -laughing -ramsay -fateh -snoop -tattoos -transmitter -sacrificed -entrances -chimpanzee -frightened -betrayed -h2o -daimyo -fokker -trunks -miscarriage -pegasus -perugia -suleiman -veracruz -cagliari -trujillo -mogadishu -shostakovich -bower -bisch -ehr -epping -goph -hover -jind -jihad -kia -kanto -lute -pd -tamm -tars -ux -wn -wiel -xer -yarm -zi -##bun -##yam -##zell -##lang -##kom -##rically -##ет -##inac -##iguous -chard -whig -shiga -##rail -##mern -sejm -##avo -lec -##rites -##ocop -dibi -##orsky -rother -##ikos -borne -souven -bray -prescribed -perch -basics -##olding -swaz -overlook -1515 -1513 -medellin -##ieta -undercover -strick -haram -##undo -russes -muhl -walnut -##ylon -breads -epiph -archery -discus -288 -##bright -downing -richland -sonia -##headed -mores -successive -purcell -deny -urged -hamlin -arauc -clements -407 -##yevich -promo -gunma -hunch -shoemaker -nicki -##bands -peacefully -bonne -bonnet -aquila -odds -544 -jonah -accession -pyotr -mandir -wolfsburg -loretta -inspection -finned -offerings -1503 -arnulf -1604 -cassius -nutcracker -repeats -troubled -1633 -baronet -1651 -pavl -1555 -dramatically -hazards -chickens -nestor -corresponds -meteorologist -ponce -unterallgau -quietly -haines -atheism -mangrove -disagreements -demolition -carmine -untouch -antennae -orphanage -metamorphic -physiologist -tcp -huey -vanderbilt -reinforcement -disqualified -##ourable -spontaneous -puppeteer -goryeo -contaminated -afrikaans -eritrean -courtyard -rourke -stoudemire -cholesterol -bier -d4 -dard -hush -hano -hyla -kess -lore -qut -rames -tore -vivid -vichy -zy -##bron -##lana -##uu -##ucle -##frid -##wire -##mind -##98 -##ła -thak -##enst -ineff -anz -##omic -##then -##amph -##stal -ashe -##estre -ata -##avid -proc -##aina -enables -aram -##abies -spang -##ostok -##ocl -scanning -kne -##ruv -twente -brigham -blitz -footed -upstream -schre -histoire -1024 -applic -guiding -brod -novella -##inska -tractor -teammates -munro -baran -personification -undergoing -##rils -slides -modelling -hari -##engen -invested -muzzle -didier -corals -lukas -##anez -patti -malley -volk -emporis -##oiselle -grapp -giraff -muriel -netany -maddie -scotty -takeover -robbers -collectors -olson -latex -constitute -##media -bloodstream -timp -381 -existential -392 -##hilev -logging -synopsis -860 -welter -charlott -026 -lucie -suggestions -poorer -bitten -celts -tailor -cavan -oakley -##enhofen -continuity -1695 -battleships -1582 -ribos -rejection -iwate -##impy -declaring -praying -pumpkin -vallad -rosenheim -ankyl -capsule -erectus -zimbabwean -sensitivity -xiaomi -shifting -forecasts -##omorphic -pendulum -impressions -tracey -metabolic -astrophysicist -bounce -vortex -couture -cylind -cylinders -henriette -allergic -spacetime -compostela -gershwin -reconciliation -ithaca -iglesias -archimedes -caligula -swaziland -netanyahu -boud -dov -diph -jaya -kt -kana -kumb -ld -nol -nus -pads -v2 -##aar -##bolt -##guns -##ulet -##ded -##dell -##rc -##rans -##fighter -##jia -##vep -thad -##orb -##atown -##atists -##icum -##ingdon -##thro -##ilah -##amen -##ctv -##chia -##iraptor -framed -fringe -beit -decker -wholes -whirl -##estead -conifers -prada -##ogh -exche -clippers -sheik -roo -roach -firuz -##acho -scars -cares -carav -carab -carchar -##001 -agatha -monke -coe -##wei -charon -outlet -1611 -populist -capill -capability -attic -laund -proving -##ashima -rockford -maki -annapolis -legged -natch -croats -intake -biophys -militants -queue -veil -entropy -##yrd -##anooga -replication -fermat -garter -gallant -345 -347 -boyer -federalist -##adora -craven -positioned -rainer -##oula -miroslav -##101 -negatively -41st -generators -576 -loren -boson -hatfield -accountant -padding -kao -youtuber -liberties -1669 -harrisburg -vikram -converting -1652 -umayy -paved -kissing -##lieb -experimented -golfers -folds -cpus -eriksen -intersex -diaghilev -orientales -electromagnetism -ossetia -dowager -tahiti -mcclane -##abyte -pardon -consulship -relaxed -##cephalus -luton -melanoma -##gemeinschaft -rukus -disappointed -guggen -arbitr -baird -mcguire -paranormal -ragnar -subcommittee -pahlavi -##ucherry -armistice -exchequer -css -dk -dames -eintra -gass -hnl -kiv -nang -semb -sven -sonder -vard -wied -zieg -на -##bang -##tre -##lins -##uw -##sim -##komm -##pac -##wani -##mbi -##esus -##onnes -##orov -##orax -##ilio -##olon -##etting -bech -alton -chond -##ianna -shaking -pliny -necks -suzy -clancy -adher -indicted -##reep -##ugal -regn -amtrak -appenzell -caliber -1566 -maude -helix -##ancon -quil -quetz -rowling -specification -chrome -infra -infante -departed -slime -placement -brexit -coro -assad -ecoreg -duarte -goda -malian -sanjay -delano -doncaster -chemotherapy -pakhang -hotter -limoges -damian -406 -##oyuki -357 -groom -renato -399 -##iasm -001 -domains -431 -435 -595 -575 -fuß -crowns -rabin -kazim -paddle -botanic -imperialism -wardrobe -##osterone -bourgeois -gamble -vasari -vaude -gradual -lemur -ricc -1483 -pseudo -gastropods -hogwarts -##america -dumas -aldrich -rheum -morphology -##igarh -cinematographers -swinging -supervised -chitral -stuffed -##fuhrer -cervantes -gyps -metamorphosis -devastated -mcdowell -densely -mosquitoes -linnaeus -interfaces -disturbance -horrible -interlingue -kardash -gwynedd -brantford -deleted -rousseau -pembroke -newcomer -cockatoo -##waltungs -thespa -lufth -karlsruhe -villefranche -exponential -dibiase -monkees -eintracht -aum -aime -aire -aene -baza -esh -gdr -kanch -lul -pung -papp -tdi -##top -##pike -##mag -##errat -##esda -##orff -##atari -##aler -anemia -##amination -##oler -##irge -chast -##oung -##fera -##aked -##akarta -leary -##athlete -teut -canons -trist -grady -scrap -##acci -mech -caramel -##spl -##awal -peric -gerd -minn -##aziz -saigon -geh -mohan -apoll -creativity -elam -elgin -demi -christensen -##onti -raider -radios -couldn -filmmakers -confined -khark -argon -halland -chinatown -bentheim -##ijan -presses -begum -##pleas -premature -nominate -climbs -337 -puya -352 -charting -##iscip -pastoral -transports -traveler -transliter -383 -identifying -magnum -krill -beatrix -brando -fairs -stonehenge -messina -yellowish -tallah -dunkirk -letterman -fantasia -mobility -514 -rotate -farmland -innate -cdc -telecom -ganges -mortality -bakr -lyrical -dedication -buyer -phoenician -rescues -1645 -franchises -kitaky -1558 -remixed -blogs -sorting -lobsters -##xxx -grandmasters -uttlesford -heiress -loaned -psychiatrists -volunteered -mildred -flourished -quadru -consequently -karaikal -embassies -honduran -federated -rusty -eugenio -vesta -tacitus -guadalupe -carmichael -cesare -paleontologist -passeriformes -triton -serenity -dachau -chattanooga -orkney -seahawks -aethel -nyctim -carcassonne -ecumenical -odysseus -valladolid -lufthansa -kitakyushu -aun -apert -dt -dime -ect -fot -fau -fium -gareth -lign -loki -mute -mazz -nengo -sio -##hala -##eong -##yb -##ioc -##uks -##rif -##erel -##esch -topp -isk -##elot -##elberg -ond -heist -##otrophic -##iges -##imation -##rault -seward -##odrome -arct -argy -aruba -spade -usgs -##geons -##ubers -roar -meer -heron -##ruit -##aways -centro -monmouth -coating -schwer -##alski -brittle -charing -outward -bette -electrod -apat -mundo -showtime -himm -rowe -secondly -arminia -simons -stealth -banded -locus -rematch -##ophagus -portage -breck -discour -saloon -salazar -crock -incon -patil -##othermal -antrim -debra -##afogo -pressures -lottery -walled -papacy -penang -##iscal -issuing -weekends -wildcats -oprah -##iyya -##ohn -assemblies -propose -propulsion -ashford -427 -757 -##ufu -lovel -koe -contracted -settler -bonded -hayat -hornet -doroth -heller -landsberg -rajput -yamada -##skine -##uzu -pullman -1679 -1646 -1655 -acquire -1693 -6000 -partnerships -isaiah -admirals -gatherings -1569 -horizons -brigades -autobiographical -tanker -phyll -gwang -mulligan -examine -paragraph -crusades -hippoc -shearer -reorganized -rented -kulm -clauses -miscell -puducherry -whisper -paganism -excavations -##ystes -gruber -recommendation -silvio -spawned -##otrans -henrietta -coppola -norrbotten -yelts -deployed -menstrual -##wheel -seaport -envoy -eternity -chatterjee -delgado -kickboxing -proximity -dystopian -somaliland -bmi -ciel -fonse -gage -ganz -jop -kie -kier -leng -nann -oo -pith -pumps -tbs -yao -##nov -##oel -##lik -##fits -##jos -##ßen -##이 -##onov -##orro -inclusion -##leon -toa -tos -stoker -##opers -##ovy -##ovolta -##ands -unpl -##allic -atchison -##avu -##odar -neural -##iei -##eby -##ateurs -##ritz -abduct -firestone -##ryne -carous -##awari -phi -brink -##tep -monog -goias -relied -upward -televisions -amalia -broth -maf -capp -expose -armageddon -harrington -backpack -##she -##atsuka -revere -senso -saliva -kew -classify -patience -287 -palacio -moravia -##ledore -martina -erskine -madden -ideals -copying -eleon -researched -328 -329 -racehorse -wilfred -shipbu -violently -##iente -esqu -pastry -inscribed -jeanette -cartwright -rebirth -1780s -caud -predatory -ourselves -barbarian -##uchus -770 -633 -634 -skywalker -clearwater -gabe -gaon -625 -gorg -dorado -padres -botafogo -bearer -1601 -hexadec -konig -konigs -dietary -tanu -ignore -syndicate -harmless -scripture -1635 -cadiz -1599 -1556 -1552 -1535 -lucknow -encouraging -beckett -lilo -1453 -javanese -bethesda -choreographed -culp -accompanying -##ozoa -fractions -preserving -resembling -horizontally -negotiate -shonen -ljung -overlord -heredity -navarro -kellner -cytoplasm -poultry -luminosity -remarried -adolphe -##ouleme -confidential -mavericks -spatial -fiddle -bournemouth -epidemiology -auditorium -filippo -gyorgy -reincarnation -padua -krusty -nyctimystes -blet -duma -ives -m19 -nano -salsa -tn -vigo -##68 -##nil -##nach -##zies -##lac -##kot -##94 -##ᅮᆨ -thq -thap -##icons -##asar -andro -##lec -##stow -##store -foreman -heyd -##igation -chino -chola -##utin -unmann -unchang -plaut -##akon -congen -prag -##abal -##ipar -##iala -##occup -bother -rooster -meyers -oneplus -heresy -parque -flax -preschool -##iedo -doe -poppy -regain -reggie -charming -1626 -##velope -starfish -internment -elba -##veau -diva -retract -angouleme -makoto -legitim -organelles -missy -decap -epistle -irvin -discourse -goodwin -volatile -shortage -morgen -couns -bodyguard -handful -roberta -radiohead -critique -broader -clef -photovolta -ferris -olimp -treatise -espos -367 -insulation -cartel -802 -##anyan -453 -kram -##mares -fairies -423 -415 -pathogen -shapiro -dispatch -minded -pollut -bakers -piracy -consistently -neurological -virtu -ipcc -herbaceous -fabrics -pretended -sulp -uncovered -denny -skeletal -1494 -1541 -abolish -amput -##999 -cockro -culver -stimulation -fujim -##restrial -ishikawa -chuo -helmets -anastas -caldera -pudding -pesticides -fountains -commemorate -metaphys -fleece -##opolitan -wolverine -resolutions -shikoku -montserrat -greeting -extermination -schoenberg -disadvantages -repository -steadily -##economics -rehabilitation -emancipation -unmanned -bitch -cune -gans -goll -jago -lite -nen -pina -rish -rano -vfl -xml -##nagar -##yte -##oil -##jian -##vah -##mau -##92 -thay -##icelli -##thur -##ilated -##elm -##adendron -heg -chry -##agiri -##emu -shub -seuss -sext -seeded -##umu -envelope -##arded -manz -gris -hertha diff --git a/cli/jobs/nebulaml/bert-pretrain-deepspeed/src/train.py b/cli/jobs/nebulaml/bert-pretrain-deepspeed/src/train.py deleted file mode 100644 index cabc25fdca..0000000000 --- a/cli/jobs/nebulaml/bert-pretrain-deepspeed/src/train.py +++ /dev/null @@ -1,124 +0,0 @@ -from datasets import * -from transformers import * -from tokenizers import * -import os -import json - - -if __name__ == "__main__": - - parser = HfArgumentParser(TrainingArguments) - training_args, args = parser.parse_args_into_dataclasses( - return_remaining_strings=True - ) - - wikiit = load_dataset("wikipedia", "20220301.it", split="train") - dataset = wikiit - - d = dataset.train_test_split(test_size=0.1) - d["train"], d["test"] - - special_tokens = ["[PAD]", "[UNK]", "[CLS]", "[SEP]", "[MASK]", "", ""] - # 30,522 vocab is BERT's default vocab size, feel free to tweak - vocab_size = 30_522 - # maximum sequence length, lowering will result to faster training (when increasing batch size) - max_length = 512 - - model_path = "pretrained-bert" - - # make the directory if not already there - if not os.path.isdir(model_path): - os.mkdir(model_path) - - # dumping some of the tokenizer config to config file, - # including special tokens, whether to lower case and the maximum sequence length - with open(os.path.join(model_path, "config.json"), "w") as f: - tokenizer_cfg = { - "do_lower_case": True, - "unk_token": "[UNK]", - "sep_token": "[SEP]", - "pad_token": "[PAD]", - "cls_token": "[CLS]", - "mask_token": "[MASK]", - "model_max_length": max_length, - "max_len": max_length, - } - json.dump(tokenizer_cfg, f) - - tokenizer = BertTokenizerFast.from_pretrained(model_path) - truncate_longer_samples = True - - def encode_with_truncation(examples): - """Mapping function to tokenize the sentences passed with truncation""" - return tokenizer( - examples["text"], - truncation=True, - padding="max_length", - max_length=max_length, - return_special_tokens_mask=True, - ) - - def encode_without_truncation(examples): - """Mapping function to tokenize the sentences passed without truncation""" - return tokenizer(examples["text"], return_special_tokens_mask=True) - - # the encode function will depend on the truncate_longer_samples variable - encode = ( - encode_with_truncation if truncate_longer_samples else encode_without_truncation - ) - # tokenizing the train dataset - train_dataset = d["train"].map(encode, batched=True) - # tokenizing the testing dataset - test_dataset = d["test"].map(encode, batched=True) - - model_config = BertConfig(vocab_size=vocab_size, max_position_embeddings=max_length) - model = BertForMaskedLM(config=model_config) - data_collator = DataCollatorForLanguageModeling( - tokenizer=tokenizer, mlm=True, mlm_probability=0.15 - ) - - trainer = Trainer( - model=model, - args=training_args, - data_collator=data_collator, - train_dataset=train_dataset, - eval_dataset=test_dataset, - ) - - # Perfrom pre-training and save the model - result = trainer.train() - print(f"Time: {result.metrics['train_runtime']:.2f}") - print(f"Samples/second: {result.metrics['train_samples_per_second']:.2f}") - - # compute the number of floating-point operations per forward pass - # calculate FLOPs for embeddings layer - embedding_size = 768 - sequence_length = 512 - embedding_flops = ( - embedding_size * sequence_length * 2 - ) # 2 FLOPs for each multiplication and addition operation - - # calculate FLOPs for transformer layers - hidden_size = 768 - num_layers = 12 - transformer_flops = ( - hidden_size * sequence_length * 3 * 2 * num_layers - ) # 3 matrix multiplications, 2 layer norm ops - - # calculate total FLOPs - flops_per_pass = embedding_flops + transformer_flops - - # compute the number of forward passes per second - compute_training_samples_per_second = result.metrics["train_samples_per_second"] - forward_passes_per_second = ( - compute_training_samples_per_second / training_args.per_device_train_batch_size - ) - - # compute the number of floating-point operations per second - flops_per_second = flops_per_pass * forward_passes_per_second - - # compute the number of teraflops - tflops = flops_per_second / 1e12 - - # print the number of teraflops - print(f"Estimated teraflops: {tflops:.2f} TFLOPS") diff --git a/cli/jobs/pipelines-with-components/basics/1b_e2e_registered_components/pipeline.yml b/cli/jobs/pipelines-with-components/basics/1b_e2e_registered_components/pipeline.yml index 6720354dd9..a5539d6a4d 100644 --- a/cli/jobs/pipelines-with-components/basics/1b_e2e_registered_components/pipeline.yml +++ b/cli/jobs/pipelines-with-components/basics/1b_e2e_registered_components/pipeline.yml @@ -34,14 +34,14 @@ jobs: model_output: ${{parent.outputs.pipeline_job_trained_model}} services: my_vscode: - job_service_type: vs_code + type: vs_code my_jupyter_lab: - job_service_type: jupyter_lab + type: jupyter_lab my_tensorboard: - job_service_type: tensor_board + type: tensor_board log_dir: "outputs/tblogs" # my_ssh: - # job_service_type: tensor_board + # type: tensor_board # ssh_public_keys: # nodes: all # Use the `nodes` property to pick which node you want to enable interactive services on. If `nodes` are not selected, by default, interactive applications are only enabled on the head node. diff --git a/cli/jobs/pipelines-with-components/image_classification_with_densenet/image_cnn_train/requirements.txt b/cli/jobs/pipelines-with-components/image_classification_with_densenet/image_cnn_train/requirements.txt deleted file mode 100644 index d605e3bcc0..0000000000 --- a/cli/jobs/pipelines-with-components/image_classification_with_densenet/image_cnn_train/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -git+git://github.com/NVIDIA/dllogger.git@26a0f8f1958de2c0c460925ff6102a4d2486d6cc#egg=dllogger diff --git a/cli/jobs/pipelines-with-components/nyc_taxi_data_regression/README.md b/cli/jobs/pipelines-with-components/nyc_taxi_data_regression/README.md index 36b25ae9cc..43faa3bf30 100644 --- a/cli/jobs/pipelines-with-components/nyc_taxi_data_regression/README.md +++ b/cli/jobs/pipelines-with-components/nyc_taxi_data_regression/README.md @@ -170,11 +170,11 @@ Asset labels are still in preview and may resolve to an incorrect asset version. "services": { "Studio": { "endpoint": "https://ml.azure.com/runs/6cef8ff4-2bd3-4101-adf2-11e0b62e6f6d?wsid=/subscriptions/ee85ed72-2b26-48f6-a0e8-cb5bcf98fbd9/resourcegroups/pipeline-pm/workspaces/pm-dev&tid=72f988bf-86f1-41af-91ab-2d7cd011db47", - "job_service_type": "Studio" + "type": "Studio" }, "Tracking": { "endpoint": "azureml://eastus.api.azureml.ms/mlflow/v1.0/subscriptions/ee85ed72-2b26-48f6-a0e8-cb5bcf98fbd9/resourceGroups/pipeline-pm/providers/Microsoft.MachineLearningServices/workspaces/pm-dev?", - "job_service_type": "Tracking" + "type": "Tracking" } }, "settings": { diff --git a/cli/resources/connections/s3-access-key.yml b/cli/resources/connections/s3-access-key.yml index 1bb6eca399..e159a797e9 100644 --- a/cli/resources/connections/s3-access-key.yml +++ b/cli/resources/connections/s3-access-key.yml @@ -3,7 +3,7 @@ $schema: http://azureml/sdk-2-0/Connection.json type: s3 name: my_s3_connection -target: https://.amazonaws.com # add the s3 bucket details +target: # add the s3 bucket details credentials: type: access_key access_key_id: XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX # add access key id diff --git a/sdk/python/assets/assets-in-registry/share-models-components-environments.ipynb b/sdk/python/assets/assets-in-registry/share-models-components-environments.ipynb index 9b53250c36..8c0e3adb08 100644 --- a/sdk/python/assets/assets-in-registry/share-models-components-environments.ipynb +++ b/sdk/python/assets/assets-in-registry/share-models-components-environments.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -54,6 +55,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -82,6 +84,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -116,6 +119,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -147,6 +151,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -173,6 +178,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -196,6 +202,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -232,6 +239,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -255,6 +263,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -288,6 +297,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -314,6 +324,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -327,6 +338,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -355,6 +367,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -380,6 +393,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -410,6 +424,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -432,6 +447,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -460,6 +476,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -469,6 +486,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -490,6 +508,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -515,6 +534,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -542,6 +562,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -565,6 +586,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -580,7 +602,7 @@ "outputs": [], "source": [ "print(f\"online_endpoint_name: {online_endpoint_name}\")\n", - "ml_client_workspace.online_endpoints.begin_delete(name=online_endpoint_name).wait()" + "ml_client_workspace.online_endpoints.begin_delete(name=online_endpoint_name).result()" ] } ], diff --git a/sdk/python/assets/data/data.ipynb b/sdk/python/assets/data/data.ipynb index dad411b68f..8dd4db0fb1 100644 --- a/sdk/python/assets/data/data.ipynb +++ b/sdk/python/assets/data/data.ipynb @@ -365,7 +365,7 @@ "from azure.ai.ml.entities import AccessKeyConfiguration\n", "\n", "name = \"my_s3_connection\"\n", - "target = \"https://.amazonaws.com\" # add the s3 bucket details\n", + "target = \"\" # add the s3 bucket details\n", "wps_connection = WorkspaceConnection(\n", " name=name,\n", " type=\"s3\",\n", diff --git a/sdk/python/foundation-models/system/evaluation/fill-mask/README.md b/sdk/python/foundation-models/system/evaluation/fill-mask/README.md deleted file mode 100644 index 5609ee1a3b..0000000000 --- a/sdk/python/foundation-models/system/evaluation/fill-mask/README.md +++ /dev/null @@ -1,15 +0,0 @@ -## Fill Mask - -### List of supported keyword arguments: - -| Keyword Argument | Description | Type | Sample | -|:-----------------:|:-----------------------------------------------------------------------------------------------------------------|-----------|-----------------------------| -| metrics | List for subset of metrics to be computed. All supported metrics listed below. | list | ["perplexities"] | -| model_id | Model used for calculating Perplexity. Perplexity can only be calculated for causal language models. | str | "gpt2", "bert-base-uncased" | -| batch_size | The batch size to run texts through the model | int | 16 | -| add_start_token | Boolean flag to add the start token to the texts so the perplexity can include the probability of the first word | boolean | true, false | -| custom_dimensions | Used to report telemetry data (can later be used to perform PII scrubbing) | dict | | - -### List of supported metrics: - -* perplexities \ No newline at end of file diff --git a/sdk/python/foundation-models/system/evaluation/fill-mask/eval-config.json b/sdk/python/foundation-models/system/evaluation/fill-mask/eval-config.json deleted file mode 100644 index 81c4c0061b..0000000000 --- a/sdk/python/foundation-models/system/evaluation/fill-mask/eval-config.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "metrics": ["perplexities"], - "model_id": "gpt2", - "add_start_token": true -} \ No newline at end of file diff --git a/sdk/python/foundation-models/system/evaluation/fill-mask/fill-mask-eval-dashboard.png b/sdk/python/foundation-models/system/evaluation/fill-mask/fill-mask-eval-dashboard.png deleted file mode 100644 index 3eeb20923f..0000000000 Binary files a/sdk/python/foundation-models/system/evaluation/fill-mask/fill-mask-eval-dashboard.png and /dev/null differ diff --git a/sdk/python/foundation-models/system/evaluation/fill-mask/fill-mask.ipynb b/sdk/python/foundation-models/system/evaluation/fill-mask/fill-mask.ipynb deleted file mode 100644 index 221ee60f9d..0000000000 --- a/sdk/python/foundation-models/system/evaluation/fill-mask/fill-mask.ipynb +++ /dev/null @@ -1,468 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Fill Mask Evaluation\n", - "\n", - "This sample shows how use the evaluate a group of models against a given set of metrics for the `fill-mask` task. \n", - "\n", - "### Evaluation dataset\n", - "Contains ~70k pages from wikipedia, each describing a person. For each page, the person described in the text is masked with a mask token. The WikiText language modeling dataset is a collection of over 100 million tokens extracted from the set of verified Good and Featured articles on Wikipedia. The dataset is available under the Creative Commons Attribution-ShareAlike License. Compared to the preprocessed version of Penn Treebank (PTB), WikiText-2 is over 2 times larger and WikiText-103 is over 110 times larger. The WikiText dataset also features a far larger vocabulary and retains the original case, punctuation and numbers - all of which are removed in PTB. As it is composed of full articles, the dataset is well suited for models that can take advantage of long term dependencies. A copy of the [rcds/wikipedia-for-mask-filling](https://huggingface.co/datasets/rcds/wikipedia-for-mask-filling/viewer/original_512/train) dataset is available in the [fill-mask](./fill-mask) folder.\n", - "\n", - "### Model\n", - "The goal of evaluating models is to compare their performance on a variety of metrics. `fill-mask` is generic task type that can be used for predicting which words should replace some of the words that were masked in a sentence based on context provided. As such, the models you pick to compare must be finetuned for same scenario. Given that we have the rcds/wikipedia-for-mask-filling dataset, we would like to look for models finetuned for this specific scenario. We will compare `bert-base-uncased`, `distilbert-base-uncased` and `microsoft-deberta-large` in this sample, which are available in the `azureml` system registry.\n", - "\n", - "If you'd like to evaluate models that are not in the system registry, you can import those models to your workspace or organization registry and then evaluate them using the approach outlined in this sample. Review the sample notebook for [importing models](../../import/import-model-from-huggingface.ipynb). \n", - "\n", - "### Outline\n", - "* Setup pre-requisites such as compute.\n", - "* Pick the models to evaluate.\n", - "* Pick and explore evaluate data.\n", - "* Configure the evaluation jobs.\n", - "* Run the evaluation jobs.\n", - "* Review the evaluation metrics. " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 1. Setup pre-requisites\n", - "* Install dependencies\n", - "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace ``, `` and `` below.\n", - "* Connect to `azureml` system registry\n", - "* Set an optional experiment name\n", - "* Check or create compute. A single GPU node can have multiple GPU cards. For example, in one node of `Standard_ND40rs_v2` there are 8 NVIDIA V100 GPUs while in `Standard_NC12s_v3`, there are 2 NVIDIA V100 GPUs. Refer to the [docs](https://learn.microsoft.com/en-us/azure/virtual-machines/sizes-gpu) for this information. The number of GPU cards per node is set in the param `gpus_per_node` below. Setting this value correctly will ensure utilization of all GPUs in the node. The recommended GPU compute SKUs can be found [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ncv3-series) and [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ndv2-series)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Install dependencies by running below cell. This is not an optional step if running in a new environment." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "import pandas as pd\n", - "\n", - "%pip install --upgrade azure-ai-ml\n", - "%pip install --upgrade azure-identity\n", - "%pip install --upgrade datasets==2.9.0" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "gather": { - "logged": 1679319346668 - }, - "jupyter": { - "outputs_hidden": false, - "source_hidden": false - }, - "nteract": { - "transient": { - "deleting": false - } - } - }, - "outputs": [], - "source": [ - "from azure.ai.ml import MLClient\n", - "from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential\n", - "from azure.ai.ml.entities import AmlCompute\n", - "import time\n", - "\n", - "try:\n", - " credential = DefaultAzureCredential()\n", - " credential.get_token(\"https://management.azure.com/.default\")\n", - "except Exception as ex:\n", - " credential = InteractiveBrowserCredential()\n", - "\n", - "workspace_ml_client = None\n", - "try:\n", - " workspace_ml_client = MLClient.from_config(credential)\n", - " subscription_id = workspace_ml_client.subscription_id\n", - " workspace = workspace_ml_client.workspace_name\n", - " resource_group = workspace_ml_client.resource_group_name\n", - "except Exception as ex:\n", - " print(ex)\n", - " # Enter details of your AML workspace\n", - " subscription_id = \"\"\n", - " resource_group = \"\"\n", - " workspace = \"\"\n", - " workspace_ml_client = MLClient(\n", - " credential, subscription_id, resource_group, workspace\n", - " )\n", - "\n", - "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n", - "preview_registry = \"azureml-staging\"\n", - "registry = \"azureml\"\n", - "\n", - "preview_registry_ml_client = MLClient(\n", - " credential, subscription_id, resource_group, registry_name=preview_registry\n", - ")\n", - "print(preview_registry_ml_client)\n", - "\n", - "registry_ml_client = MLClient(\n", - " credential, subscription_id, resource_group, registry_name=registry\n", - ")\n", - "registry_ml_client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# If you already have a gpu cluster, mention it here. Else will create a new one with the name 'gpu-cluster-big'\n", - "compute_cluster = \"gpu-cluster-big\"\n", - "try:\n", - " compute = workspace_ml_client.compute.get(compute_cluster)\n", - " print(f\"GPU compute '{compute_cluster}' found.\")\n", - "except Exception as ex:\n", - " print(f\"GPU compute '{compute_cluster}' not found. Creating new one.\")\n", - " compute = AmlCompute(\n", - " name=compute_cluster,\n", - " size=\"Standard_ND40rs_v2\",\n", - " max_instances=2, # For multi node training set this to an integer value more than 1\n", - " )\n", - " workspace_ml_client.compute.begin_create_or_update(compute).wait()\n", - "\n", - "# generating a unique timestamp that can be used for names and versions that need to be unique\n", - "timestamp = str(int(time.time()))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Below snippet will allow us to query number of GPU's present on the compute. We can use it to set `gpu_per_node` to ensure utilization of all GPUs in the node." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# This is the number of GPUs in a single node of the selected 'vm_size' compute.\n", - "# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.\n", - "# Setting this to more than the number of GPUs will result in an error.\n", - "gpus_per_node = 1 # default value\n", - "gpu_count_found = False\n", - "ws_computes = workspace_ml_client.compute.list_sizes()\n", - "for ws_compute in ws_computes:\n", - " if ws_compute.name.lower() == compute.size.lower():\n", - " gpus_per_node = ws_compute.gpus\n", - " print(f\"Number of GPUs in compute {ws_compute.name} are {ws_compute.gpus}\")\n", - "# if gpu_count_found not found, then print an error\n", - "if gpus_per_node > 0:\n", - " gpu_count_found = True\n", - "else:\n", - " gpu_count_found = False\n", - " print(f\"No GPUs found in compute. Number of GPUs in compute {compute.size} 0.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 2. Pick the models to evaluate\n", - "\n", - "Verify that the models selected for evaluation are available in system registry" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "gather": { - "logged": 1679319354708 - }, - "jupyter": { - "outputs_hidden": false, - "source_hidden": false - }, - "nteract": { - "transient": { - "deleting": false - } - } - }, - "outputs": [], - "source": [ - "# need to specify model versions until the bug to support fetching the latest version using latest label is fixed\n", - "models = [\n", - " {\"name\": \"bert-base-cased\", \"version\": \"1\", \"mask\": \"[MASK]\"},\n", - " {\"name\": \"bert-base-uncased\", \"version\": \"1\", \"mask\": \"[MASK]\"},\n", - " {\"name\": \"bert-large-cased\", \"version\": \"1\", \"mask\": \"[MASK]\"},\n", - " {\"name\": \"bert-large-uncased\", \"version\": \"1\", \"mask\": \"[MASK]\"},\n", - " {\"name\": \"camembert-base\", \"version\": \"1\", \"mask\": \"\"},\n", - " {\"name\": \"distilbert-base-cased\", \"version\": \"1\", \"mask\": \"[MASK]\"},\n", - " {\"name\": \"distilbert-base-uncased\", \"version\": \"1\", \"mask\": \"[MASK]\"},\n", - " {\"name\": \"distilroberta-base\", \"version\": \"1\", \"mask\": \"\"},\n", - " {\"name\": \"microsoft-deberta-base\", \"version\": \"1\", \"mask\": \"[MASK]\"},\n", - " {\"name\": \"microsoft-deberta-large\", \"version\": \"1\", \"mask\": \"[MASK]\"},\n", - " {\"name\": \"microsoft-deberta-xlarge\", \"version\": \"1\", \"mask\": \"[MASK]\"},\n", - " {\"name\": \"roberta-base\", \"version\": \"1\", \"mask\": \"\"},\n", - " {\"name\": \"roberta-large\", \"version\": \"1\", \"mask\": \"\"},\n", - "]\n", - "for model in models:\n", - " model = preview_registry_ml_client.models.get(\n", - " model[\"name\"], version=model[\"version\"]\n", - " )\n", - " print(model.id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 3. Pick the test dataset for evaluation\n", - "A copy of the Wikipedia For Mask Filling is available in the [fill-mask](./fill-mask/) folder. The next few cells show basic data preparation:\n", - "* Visualize some data rows\n", - "* We want this sample to run quickly, so we use a smaller dataset containing 10% of the original.\n", - "* To use the entire dataset, uncomment the cells below and run." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "# !pip install datasets" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# from datasets import load_dataset\n", - "\n", - "# hf_test_data = load_dataset('rcds/wikipedia-for-mask-filling', 'original_512')\n", - "\n", - "# hf_test_data['train'].to_pandas().head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "test_data_mask_1 = \"./fill-mask/small-test-[MASK].jsonl\" # [MASK]\n", - "test_data_mask_2 = \"./fill-mask/small-test-mask.jsonl\" # " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "pd.read_json(test_data_mask_1, lines=True).head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "pd.read_json(test_data_mask_2, lines=True).head()" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4. Submit the evaluation jobs using the model and data as inputs\n", - "\n", - "Create the job that uses the `model_evaluation_pipeline` component. We will submit one job per model.\n", - "\n", - "Note that the metrics that the evaluation jobs need to calculate are specified in the [eval_config.json](./eval_config.json) file. We calculate `perplexities` in this sample.\n", - "\n", - "All supported evaluation configurations for `fill-mask` can be found in [README](./README.md)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azure.ai.ml.dsl import pipeline\n", - "from azure.ai.ml import Input\n", - "from azure.ai.ml.constants import AssetTypes\n", - "\n", - "# fetch the pipeline component\n", - "pipeline_component_func = registry_ml_client.components.get(\n", - " name=\"model_evaluation_pipeline\", label=\"latest\"\n", - ")\n", - "\n", - "\n", - "# define the pipeline job\n", - "@pipeline()\n", - "def evaluation_pipeline(test_data, mlflow_model):\n", - " evaluation_job = pipeline_component_func(\n", - " # specify the foundation model available in the azureml system registry or a model from the workspace\n", - " # mlflow_model = Input(type=AssetTypes.MLFLOW_MODEL, path=f\"{mlflow_model_path}\"),\n", - " mlflow_model=mlflow_model,\n", - " # test data\n", - " test_data=test_data,\n", - " # The following parameters map to the dataset fields\n", - " input_column_names=\"input_string\",\n", - " label_column_name=\"title\",\n", - " # Evaluation settings\n", - " task=\"fill-mask\",\n", - " # config file containing the details of evaluation metrics to calculate\n", - " evaluation_config=Input(type=AssetTypes.URI_FILE, path=\"./eval-config.json\"),\n", - " # config cluster/device job is running on\n", - " # set device to GPU/CPU on basis if GPU count was found\n", - " device=\"gpu\" if gpu_count_found else \"cpu\",\n", - " )\n", - " return {\"evaluation_result\": evaluation_job.outputs.evaluation_result}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Submit the jobs, passing the model as a parameter to the pipeline created in the above step." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "# submit the pipeline job for each model that we want to evaluate\n", - "# you could consider submitting the pipeline jobs in parallel, provided your cluster has multiple nodes\n", - "pipeline_jobs = []\n", - "\n", - "experiment_name = \"fill-mask-evaluation\"\n", - "\n", - "for model in models:\n", - " model_object = preview_registry_ml_client.models.get(\n", - " model[\"name\"], version=model[\"version\"]\n", - " )\n", - " if model[\"mask\"] == \"[MASK]\":\n", - " test_data = Input(type=AssetTypes.URI_FILE, path=test_data_mask_1)\n", - " else:\n", - " test_data = Input(type=AssetTypes.URI_FILE, path=test_data_mask_2)\n", - " pipeline_object = evaluation_pipeline(\n", - " test_data=test_data,\n", - " mlflow_model=Input(type=AssetTypes.MLFLOW_MODEL, path=f\"{model_object.id}\"),\n", - " )\n", - " # don't reuse cached results from previous jobs\n", - " pipeline_object.settings.force_rerun = True\n", - " pipeline_object.settings.default_compute = compute_cluster\n", - " pipeline_job = workspace_ml_client.jobs.create_or_update(\n", - " pipeline_object, experiment_name=experiment_name\n", - " )\n", - " # add model['name'] and pipeline_job.name as key value pairs to a dictionary\n", - " pipeline_jobs.append({\"model_name\": model[\"name\"], \"job_name\": pipeline_job.name})\n", - " # wait for the pipeline job to complete\n", - " workspace_ml_client.jobs.stream(pipeline_job.name)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 5. Review evaluation metrics\n", - "Viewing the job in AzureML studio is the best way to analyze logs, metrics and outputs of jobs. You can create custom charts and compare metics across different jobs. See https://learn.microsoft.com/en-us/azure/machine-learning/how-to-log-view-metrics?tabs=interactive#view-jobsruns-information-in-the-studio to learn more.\n", - "\n", - "![Model evaluation dashboard in AzureML studio](./fill-mask-eval-dashboard.png)\n", - "\n", - "However, we may need to access and review metrics programmatically for which we will use MLflow, which is the recommended client for logging and querying metrics." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import mlflow, json\n", - "\n", - "mlflow_tracking_uri = workspace_ml_client.workspaces.get(\n", - " workspace_ml_client.workspace_name\n", - ").mlflow_tracking_uri\n", - "mlflow.set_tracking_uri(mlflow_tracking_uri)\n", - "\n", - "metrics_df = pd.DataFrame()\n", - "for job in pipeline_jobs:\n", - " # concat 'tags.mlflow.rootRunId=' and pipeline_job.name in single quotes as filter variable\n", - " filter = \"tags.mlflow.rootRunId='\" + job[\"job_name\"] + \"'\"\n", - " runs = mlflow.search_runs(\n", - " experiment_names=[experiment_name], filter_string=filter, output_format=\"list\"\n", - " )\n", - " # get the compute_metrics runs.\n", - " # using a hacky way till 'Bug 2320997: not able to show eval metrics in FT notebooks - mlflow client now showing display names' is fixed\n", - " for run in runs:\n", - " # else, check if run.data.metrics.accuracy exists\n", - " if \"exact_match\" in run.data.metrics:\n", - " # get the metrics from the mlflow run\n", - " run_metric = run.data.metrics\n", - " # add the model name to the run_metric dictionary\n", - " run_metric[\"model_name\"] = job[\"model_name\"]\n", - " # convert the run_metric dictionary to a pandas dataframe\n", - " temp_df = pd.DataFrame(run_metric, index=[0])\n", - " # concat the temp_df to the metrics_df\n", - " metrics_df = pd.concat([metrics_df, temp_df], ignore_index=True)\n", - "\n", - "# move the model_name columns to the first column\n", - "cols = metrics_df.columns.tolist()\n", - "cols = cols[-1:] + cols[:-1]\n", - "metrics_df = metrics_df[cols]\n", - "metrics_df.head()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3.10 - SDK V2", - "language": "python", - "name": "python310-sdkv2" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.5" - } - }, - "nbformat": 4, - "nbformat_minor": 1 -} diff --git a/sdk/python/foundation-models/system/evaluation/question-answering/README.md b/sdk/python/foundation-models/system/evaluation/question-answering/README.md deleted file mode 100644 index e6020e1d50..0000000000 --- a/sdk/python/foundation-models/system/evaluation/question-answering/README.md +++ /dev/null @@ -1,20 +0,0 @@ -## Question Answering - -### List of supported keyword arguments: - -| Keyword Argument | Description | Type | Sample | -|:------------------:|:-------------------------------------------------------------------------------|-----------|-----------------------------| -| metrics | List for subset of metrics to be computed. All supported metrics listed below. | list | ["exact_match", "f1_score"] | -| tokenizer | Tokenizer object to perform tokenization on provided input text | boolean | false, true | -| regexes_to_ignore | List of regex to ignore in our input data points | list | ["$[A-Z]+"] | -| ignore_case | Boolean flag to indicate whether we need to ignore case | boolean | false, true | -| ignore_punctuation | Boolean flag to indicate whether we need to ignore punctuation | boolean | false, true | -| ignore_numbers | Boolean flag to indicate whether we need to ignore numbers | boolean | false, true | -| custom_dimensions | Used to report telemetry data (can later be used to perform PII scrubbing) | dict | | - -### List of supported metrics: - -* rouge1 -* rouge2 -* rougeLsum -* rougeL \ No newline at end of file diff --git a/sdk/python/foundation-models/system/evaluation/question-answering/eval-config.json b/sdk/python/foundation-models/system/evaluation/question-answering/eval-config.json deleted file mode 100644 index 15165acfe5..0000000000 --- a/sdk/python/foundation-models/system/evaluation/question-answering/eval-config.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "metrics": ["exact_match", "f1_score"], - "regexes_to_ignore": ["$[A-Z]+"], - "ignore_case": false, - "ignore_numbers": false, - "ignore_punctuations": true -} \ No newline at end of file diff --git a/sdk/python/foundation-models/system/evaluation/question-answering/question-answering-eval-dashboard.png b/sdk/python/foundation-models/system/evaluation/question-answering/question-answering-eval-dashboard.png deleted file mode 100644 index dc76733da1..0000000000 Binary files a/sdk/python/foundation-models/system/evaluation/question-answering/question-answering-eval-dashboard.png and /dev/null differ diff --git a/sdk/python/foundation-models/system/evaluation/question-answering/question-answering.ipynb b/sdk/python/foundation-models/system/evaluation/question-answering/question-answering.ipynb deleted file mode 100644 index 24fb314bc0..0000000000 --- a/sdk/python/foundation-models/system/evaluation/question-answering/question-answering.ipynb +++ /dev/null @@ -1,462 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Question Answering Evaluation\n", - "\n", - "This sample shows how use the evaluate a group of models against a given set of metrics for the `question-answering` task. \n", - "\n", - "### Evaluation dataset\n", - "The version 2 of Stanford Question Answering Dataset (SQuAD), SQuAD 2.0, combines the 100,000 questions in SQuAD 1.1 with over 50,000 unanswerable questions written adversarially by crowdworkers to look similar to answerable ones. To do well on SQuAD2.0, systems must not only answer questions when possible, but also determine when no answer is supported by the paragraph and abstain from answering. A copy of the [SQuAD_v2](https://huggingface.co/datasets/squad_v2) dataset is available in the [squad-v2](./squad-v2) folder.\n", - "\n", - "### Model\n", - "The goal of evaluating models is to compare their performance on a variety of metrics. `question-answering` is generic task type that can be used for scenarios to answer questions based on context provided. As such, the models you pick to compare must be finetuned for same scenario. Given that we have the SQuAD_v2 dataset, we would like to look for models finetuned for this specific scenario. We will compare `distilbert-base-uncased-distilled-squad`, `deepset-roberta-base-squad2` and `deepset-minilm-uncased-squad2` in this sample, which are available in the `azureml` system registry.\n", - "\n", - "If you'd like to evaluate models that are not in the system registry, you can import those models to your workspace or organization registry and then evaluate them using the approach outlined in this sample. Review the sample notebook for [importing models](../../import/import-model-from-huggingface.ipynb). \n", - "\n", - "### Outline\n", - "* Setup pre-requisites such as compute.\n", - "* Pick the models to evaluate.\n", - "* Pick and explore evaluate data.\n", - "* Configure the evaluation jobs.\n", - "* Run the evaluation jobs.\n", - "* Review the evaluation metrics. " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 1. Setup pre-requisites\n", - "* Install dependencies\n", - "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace ``, `` and `` below.\n", - "* Connect to `azureml` system registry\n", - "* Set an optional experiment name\n", - "* Check or create compute. A single GPU node can have multiple GPU cards. For example, in one node of `Standard_ND40rs_v2` there are 8 NVIDIA V100 GPUs while in `Standard_NC12s_v3`, there are 2 NVIDIA V100 GPUs. Refer to the [docs](https://learn.microsoft.com/en-us/azure/virtual-machines/sizes-gpu) for this information. The number of GPU cards per node is set in the param `gpus_per_node` below. Setting this value correctly will ensure utilization of all GPUs in the node. The recommended GPU compute SKUs can be found [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ncv3-series) and [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ndv2-series)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Install dependencies by running below cell. This is not an optional step if running in a new environment." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "import pandas as pd\n", - "\n", - "%pip install --upgrade azure-ai-ml\n", - "%pip install --upgrade azure-identity\n", - "%pip install --upgrade datasets==2.9.0" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "gather": { - "logged": 1679319346668 - }, - "jupyter": { - "outputs_hidden": false, - "source_hidden": false - }, - "nteract": { - "transient": { - "deleting": false - } - } - }, - "outputs": [], - "source": [ - "from azure.ai.ml import MLClient\n", - "from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential\n", - "from azure.ai.ml.entities import AmlCompute\n", - "import time\n", - "\n", - "try:\n", - " credential = DefaultAzureCredential()\n", - " credential.get_token(\"https://management.azure.com/.default\")\n", - "except Exception as ex:\n", - " credential = InteractiveBrowserCredential()\n", - "\n", - "workspace_ml_client = None\n", - "try:\n", - " workspace_ml_client = MLClient.from_config(credential)\n", - " subscription_id = workspace_ml_client.subscription_id\n", - " workspace = workspace_ml_client.workspace_name\n", - " resource_group = workspace_ml_client.resource_group_name\n", - "except Exception as ex:\n", - " print(ex)\n", - " # Enter details of your AML workspace\n", - " subscription_id = \"\"\n", - " resource_group = \"\"\n", - " workspace = \"\"\n", - " workspace_ml_client = MLClient(\n", - " credential, subscription_id, resource_group, workspace\n", - " )\n", - "\n", - "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n", - "preview_registry = \"azureml-staging\"\n", - "registry = \"azureml\"\n", - "\n", - "preview_registry_ml_client = MLClient(\n", - " credential, subscription_id, resource_group, registry_name=preview_registry\n", - ")\n", - "print(preview_registry_ml_client)\n", - "\n", - "registry_ml_client = MLClient(\n", - " credential, subscription_id, resource_group, registry_name=registry\n", - ")\n", - "registry_ml_client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# If you already have a gpu cluster, mention it here. Else will create a new one with the name 'gpu-cluster-big'\n", - "compute_cluster = \"gpu-cluster-big\"\n", - "try:\n", - " compute = workspace_ml_client.compute.get(compute_cluster)\n", - " print(f\"GPU compute '{compute_cluster}' found.\")\n", - "except Exception as ex:\n", - " print(f\"GPU compute '{compute_cluster}' not found. Creating new one.\")\n", - " compute = AmlCompute(\n", - " name=compute_cluster,\n", - " size=\"Standard_ND40rs_v2\",\n", - " max_instances=2, # For multi node training set this to an integer value more than 1\n", - " )\n", - " workspace_ml_client.compute.begin_create_or_update(compute).wait()\n", - "\n", - "# generating a unique timestamp that can be used for names and versions that need to be unique\n", - "timestamp = str(int(time.time()))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Below snippet will allow us to query number of GPU's present on the compute. We can use it to set `gpu_per_node` to ensure utilization of all GPUs in the node." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# This is the number of GPUs in a single node of the selected 'vm_size' compute.\n", - "# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.\n", - "# Setting this to more than the number of GPUs will result in an error.\n", - "gpus_per_node = 1 # default value\n", - "gpu_count_found = False\n", - "ws_computes = workspace_ml_client.compute.list_sizes()\n", - "for ws_compute in ws_computes:\n", - " if ws_compute.name.lower() == compute.size.lower():\n", - " gpus_per_node = ws_compute.gpus\n", - " print(f\"Number of GPUs in compute {ws_compute.name} are {ws_compute.gpus}\")\n", - "# if gpu_count_found not found, then print an error\n", - "if gpus_per_node > 0:\n", - " gpu_count_found = True\n", - "else:\n", - " gpu_count_found = False\n", - " print(f\"No GPUs found in compute. Number of GPUs in compute {compute.size} 0.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 2. Pick the models to evaluate\n", - "\n", - "Verify that the models selected for evaluation are available in system registry" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "gather": { - "logged": 1679319354708 - }, - "jupyter": { - "outputs_hidden": false, - "source_hidden": false - }, - "nteract": { - "transient": { - "deleting": false - } - }, - "pycharm": { - "is_executing": true - } - }, - "outputs": [], - "source": [ - "# need to specify model versions until the bug to support fetching the latest version using latest label is fixed\n", - "models = [\n", - " {\"name\": \"deepset-minilm-uncased-squad2\", \"version\": \"1\"},\n", - " {\"name\": \"deepset-roberta-base-squad2\", \"version\": \"1\"},\n", - " {\"name\": \"distilbert-base-cased-distilled-squad\", \"version\": \"1\"},\n", - " {\"name\": \"distilbert-base-uncased-distilled-squad\", \"version\": \"1\"},\n", - "]\n", - "for model in models:\n", - " model = preview_registry_ml_client.models.get(\n", - " model[\"name\"], version=model[\"version\"]\n", - " )\n", - " print(model.id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 3. Pick the test dataset for evaluation\n", - "A copy of the Squad v2 is available in the [squad-v2](./squad-v2/) folder. The next few cells show basic data preparation:\n", - "* Visualize some data rows\n", - "* We want this sample to run quickly, so we use a smaller dataset containing 10% of the original.\n", - "* To use the entire dataset, uncomment the cells below and run." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "# !pip install datasets" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# from datasets import load_dataset\n", - "\n", - "# hf_test_data = load_dataset('squad_v2')\n", - "\n", - "# hf_test_data['train'].to_pandas().head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "test_data = \"./squad-v2/small-test.jsonl\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "pd.read_json(test_data, lines=True).head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4. Submit the evaluation jobs using the model and data as inputs\n", - "\n", - "Create the job that uses the `model_evaluation_pipeline` component. We will submit one job per model.\n", - "\n", - "Note that the metrics that the evaluation jobs need to calculate are specified in the [eval_config.json](./eval_config.json) file. We calculate `exact_match` and `f1_score` in this sample.\n", - "\n", - "All supported evaluation configurations for `question-answering` can be found in [README](./README.md)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azure.ai.ml.dsl import pipeline\n", - "from azure.ai.ml import Input\n", - "from azure.ai.ml.constants import AssetTypes\n", - "\n", - "# fetch the pipeline component\n", - "pipeline_component_func = registry_ml_client.components.get(\n", - " name=\"model_evaluation_pipeline\", label=\"latest\"\n", - ")\n", - "\n", - "\n", - "# define the pipeline job\n", - "@pipeline()\n", - "def evaluation_pipeline(mlflow_model):\n", - " evaluation_job = pipeline_component_func(\n", - " # specify the foundation model available in the azureml system registry or a model from the workspace\n", - " # mlflow_model = Input(type=AssetTypes.MLFLOW_MODEL, path=f\"{mlflow_model_path}\"),\n", - " mlflow_model=mlflow_model,\n", - " # test data\n", - " test_data=Input(type=AssetTypes.URI_FILE, path=test_data),\n", - " # The following parameters map to the dataset fields\n", - " input_column_names=\"context,question\",\n", - " label_column_name=\"answer_text\",\n", - " # Evaluation settings\n", - " task=\"question-answering\",\n", - " # config file containing the details of evaluation metrics to calculate\n", - " evaluation_config=Input(type=AssetTypes.URI_FILE, path=\"./eval-config.json\"),\n", - " # config cluster/device job is running on\n", - " # set device to GPU/CPU on basis if GPU count was found\n", - " device=\"gpu\" if gpu_count_found else \"cpu\",\n", - " )\n", - " return {\"evaluation_result\": evaluation_job.outputs.evaluation_result}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Submit the jobs, passing the model as a parameter to the pipeline created in the above step." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "# submit the pipeline job for each model that we want to evaluate\n", - "# you could consider submitting the pipeline jobs in parallel, provided your cluster has multiple nodes\n", - "pipeline_jobs = []\n", - "\n", - "experiment_name = \"question-answering-evaluation\"\n", - "\n", - "for model in models:\n", - " model_object = preview_registry_ml_client.models.get(\n", - " model[\"name\"], version=model[\"version\"]\n", - " )\n", - " pipeline_object = evaluation_pipeline(\n", - " mlflow_model=Input(type=AssetTypes.MLFLOW_MODEL, path=f\"{model_object.id}\"),\n", - " )\n", - " # don't reuse cached results from previous jobs\n", - " pipeline_object.settings.force_rerun = True\n", - " pipeline_object.settings.default_compute = compute_cluster\n", - " pipeline_object.display_name = f\"eval-{model['name']}-{timestamp}\"\n", - " pipeline_job = workspace_ml_client.jobs.create_or_update(\n", - " pipeline_object, experiment_name=experiment_name\n", - " )\n", - " # add model['name'] and pipeline_job.name as key value pairs to a dictionary\n", - " pipeline_jobs.append({\"model_name\": model[\"name\"], \"job_name\": pipeline_job.name})\n", - " # wait for the pipeline job to complete\n", - " workspace_ml_client.jobs.stream(pipeline_job.name)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 5. Review evaluation metrics\n", - "Viewing the job in AzureML studio is the best way to analyze logs, metrics and outputs of jobs. You can create custom charts and compare metics across different jobs. See https://learn.microsoft.com/en-us/azure/machine-learning/how-to-log-view-metrics?tabs=interactive#view-jobsruns-information-in-the-studio to learn more.\n", - "\n", - "![Model evaluation dashboard in AzureML studio](./question-answering-eval-dashboard.png)\n", - "\n", - "However, we may need to access and review metrics programmatically for which we will use MLflow, which is the recommended client for logging and querying metrics." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import mlflow, json\n", - "\n", - "mlflow_tracking_uri = workspace_ml_client.workspaces.get(\n", - " workspace_ml_client.workspace_name\n", - ").mlflow_tracking_uri\n", - "mlflow.set_tracking_uri(mlflow_tracking_uri)\n", - "\n", - "metrics_df = pd.DataFrame()\n", - "for job in pipeline_jobs:\n", - " # concat 'tags.mlflow.rootRunId=' and pipeline_job.name in single quotes as filter variable\n", - " filter = \"tags.mlflow.rootRunId='\" + job[\"job_name\"] + \"'\"\n", - " runs = mlflow.search_runs(\n", - " experiment_names=[experiment_name], filter_string=filter, output_format=\"list\"\n", - " )\n", - " # get the compute_metrics runs.\n", - " # using a hacky way till 'Bug 2320997: not able to show eval metrics in FT notebooks - mlflow client now showing display names' is fixed\n", - " for run in runs:\n", - " # else, check if run.data.metrics.accuracy exists\n", - " if \"exact_match\" in run.data.metrics:\n", - " # get the metrics from the mlflow run\n", - " run_metric = run.data.metrics\n", - " # add the model name to the run_metric dictionary\n", - " run_metric[\"model_name\"] = job[\"model_name\"]\n", - " # convert the run_metric dictionary to a pandas dataframe\n", - " temp_df = pd.DataFrame(run_metric, index=[0])\n", - " # concat the temp_df to the metrics_df\n", - " metrics_df = pd.concat([metrics_df, temp_df], ignore_index=True)\n", - "\n", - "# move the model_name columns to the first column\n", - "cols = metrics_df.columns.tolist()\n", - "cols = cols[-1:] + cols[:-1]\n", - "metrics_df = metrics_df[cols]\n", - "metrics_df.head()" - ] - } - ], - "metadata": { - "kernel_info": { - "name": "python310-sdkv2" - }, - "kernelspec": { - "display_name": "Python 3.10 - SDK V2", - "language": "python", - "name": "python310-sdkv2" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.9" - }, - "microsoft": { - "host": { - "AzureML": { - "notebookHasBeenCompleted": true - } - }, - "ms_spell_check": { - "ms_spell_check_language": "en" - } - }, - "nteract": { - "version": "nteract-front-end@1.0.0" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/sdk/python/foundation-models/system/evaluation/summarization/README.md b/sdk/python/foundation-models/system/evaluation/summarization/README.md deleted file mode 100644 index 4973b2a713..0000000000 --- a/sdk/python/foundation-models/system/evaluation/summarization/README.md +++ /dev/null @@ -1,17 +0,0 @@ -## Summarization - -### List of supported keyword arguments: - -| Keyword Argument | Description | Type | Sample | -|:-----------------:|:--------------------------------------------------------------------------------------|-----------|---------------------------------------------| -| metrics | List for subset of metrics to be computed. All supported metrics listed below. | list | ["rouge1", "rouge2", "rougeL", "rougeLsum"] | -| aggregator | Boolean flag to indicate if need to aggregate rouge scores for individual data points | boolean | true, false | -| stemmer | Boolean flag to indicate whether to use Porter Stemmer for suffixes | boolean | true, false | -| custom_dimensions | Used to report telemetry data (can later be used to perform PII scrubbing) | dict | | - -### List of supported metrics: - -* rouge1 -* rouge2 -* rougeLsum -* rougeL \ No newline at end of file diff --git a/sdk/python/foundation-models/system/evaluation/summarization/abstractive-and-extractive-summarization.ipynb b/sdk/python/foundation-models/system/evaluation/summarization/abstractive-and-extractive-summarization.ipynb deleted file mode 100644 index 81cfaa38b2..0000000000 --- a/sdk/python/foundation-models/system/evaluation/summarization/abstractive-and-extractive-summarization.ipynb +++ /dev/null @@ -1,451 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Text Summarization Evaluation - Abstractive and Extractive Summarization\n", - "\n", - "This sample shows how use the evaluate a group of models against a given set of metrics for the `text-summarization` task. \n", - "\n", - "### Evaluation dataset\n", - "The CNN / DailyMail Dataset is an English-language dataset containing just over 300k unique news articles as written by journalists at CNN and the Daily Mail. The current version supports both extractive and abstractive summarization, though the original version was created for machine reading and comprehension and abstractive question answering. A copy of the [cnn_dailymail](https://huggingface.co/datasets/cnn_dailymail) dataset is available in the [cnn_dailymail](./cnn_dailymail) folder.\n", - "\n", - "### Model\n", - "The goal of evaluating models is to compare their performance on a variety of metrics. `text-summarization` is generic task type that can be used for scenarios such as abstractive and extractive summarization. As such, the models you pick to compare must be finetuned for same scenario. Given that we have the CNN_Dailymail dataset, we would like to look for models finetuned for this specific scenario. We will compare `sshleifer-distilbart-cnn-12-6`, and `facebook-bart-large-cnn` in this sample, which are available in the `azureml` system registry.\n", - "\n", - "If you'd like to evaluate models that are not in the system registry, you can import those models to your workspace or organization registry and then evaluate them using the approach outlined in this sample. Review the sample notebook for [importing models](../../import/import-model-from-huggingface.ipynb). \n", - "\n", - "### Outline\n", - "* Setup pre-requisites such as compute.\n", - "* Pick the models to evaluate.\n", - "* Pick and explore evaluate data.\n", - "* Configure the evaluation jobs.\n", - "* Run the evaluation jobs.\n", - "* Review the evaluation metrics. " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 1. Setup pre-requisites\n", - "* Install dependencies\n", - "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace ``, `` and `` below.\n", - "* Connect to `azureml` system registry\n", - "* Set an optional experiment name\n", - "* Check or create compute. A single GPU node can have multiple GPU cards. For example, in one node of `Standard_ND40rs_v2` there are 8 NVIDIA V100 GPUs while in `Standard_NC12s_v3`, there are 2 NVIDIA V100 GPUs. Refer to the [docs](https://learn.microsoft.com/en-us/azure/virtual-machines/sizes-gpu) for this information. The number of GPU cards per node is set in the param `gpus_per_node` below. Setting this value correctly will ensure utilization of all GPUs in the node. The recommended GPU compute SKUs can be found [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ncv3-series) and [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ndv2-series)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Install dependencies by running below cell. This is not an optional step if running in a new environment." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "\n", - "%pip install --upgrade azure-ai-ml\n", - "%pip install --upgrade azure-identity\n", - "%pip install --upgrade datasets==2.9.0" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "gather": { - "logged": 1679319346668 - }, - "jupyter": { - "outputs_hidden": false, - "source_hidden": false - }, - "nteract": { - "transient": { - "deleting": false - } - } - }, - "outputs": [], - "source": [ - "from azure.ai.ml import MLClient\n", - "from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential\n", - "from azure.ai.ml.entities import AmlCompute\n", - "import time\n", - "\n", - "try:\n", - " credential = DefaultAzureCredential()\n", - " credential.get_token(\"https://management.azure.com/.default\")\n", - "except Exception as ex:\n", - " credential = InteractiveBrowserCredential()\n", - "\n", - "workspace_ml_client = None\n", - "try:\n", - " workspace_ml_client = MLClient.from_config(credential)\n", - " subscription_id = workspace_ml_client.subscription_id\n", - " workspace = workspace_ml_client.workspace_name\n", - " resource_group = workspace_ml_client.resource_group_name\n", - "except Exception as ex:\n", - " print(ex)\n", - " # Enter details of your AML workspace\n", - " subscription_id = \"\"\n", - " resource_group = \"\"\n", - " workspace = \"\"\n", - " workspace_ml_client = MLClient(\n", - " credential, subscription_id, resource_group, workspace\n", - " )\n", - "\n", - "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n", - "preview_registry = \"azureml-staging\"\n", - "registry = \"azureml\"\n", - "\n", - "preview_registry_ml_client = MLClient(\n", - " credential, subscription_id, resource_group, registry_name=preview_registry\n", - ")\n", - "print(preview_registry_ml_client)\n", - "\n", - "registry_ml_client = MLClient(\n", - " credential, subscription_id, resource_group, registry_name=registry\n", - ")\n", - "registry_ml_client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# If you already have a gpu cluster, mention it here. Else will create a new one with the name 'gpu-cluster-big'\n", - "compute_cluster = \"gpu-cluster-big\"\n", - "try:\n", - " compute = workspace_ml_client.compute.get(compute_cluster)\n", - " print(f\"GPU compute '{compute_cluster}' found.\")\n", - "except Exception as ex:\n", - " print(f\"GPU compute '{compute_cluster}' not found. Creating new one.\")\n", - " compute = AmlCompute(\n", - " name=compute_cluster,\n", - " size=\"Standard_ND40rs_v2\",\n", - " max_instances=2, # For multi node training set this to an integer value more than 1\n", - " )\n", - " workspace_ml_client.compute.begin_create_or_update(compute).wait()\n", - "\n", - "# generating a unique timestamp that can be used for names and versions that need to be unique\n", - "timestamp = str(int(time.time()))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Below snippet will allow us to query number of GPU's present on the compute. We can use it to set `gpu_per_node` to ensure utilization of all GPUs in the node." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# This is the number of GPUs in a single node of the selected 'vm_size' compute.\n", - "# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.\n", - "# Setting this to more than the number of GPUs will result in an error.\n", - "gpus_per_node = 1 # default value\n", - "gpu_count_found = False\n", - "ws_computes = workspace_ml_client.compute.list_sizes()\n", - "for ws_compute in ws_computes:\n", - " if ws_compute.name.lower() == compute.size.lower():\n", - " gpus_per_node = ws_compute.gpus\n", - " print(f\"Number of GPUs in compute {ws_compute.name} are {ws_compute.gpus}\")\n", - "# if gpu_count_found not found, then print an error\n", - "if gpus_per_node > 0:\n", - " gpu_count_found = True\n", - "else:\n", - " gpu_count_found = False\n", - " print(f\"No GPUs found in compute. Number of GPUs in compute {compute.size} 0.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 2. Pick the models to evaluate\n", - "\n", - "Verify that the models selected for evaluation are available in system registry" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "gather": { - "logged": 1679319354708 - }, - "jupyter": { - "outputs_hidden": false, - "source_hidden": false - }, - "nteract": { - "transient": { - "deleting": false - } - } - }, - "outputs": [], - "source": [ - "# need to specify model versions until the bug to support fetching the latest version using latest label is fixed\n", - "models = [\n", - " {\"name\": \"facebook-bart-large-cnn\", \"version\": \"1\"},\n", - " {\"name\": \"sshleifer-distilbart-cnn-12-6\", \"version\": \"1\"},\n", - "]\n", - "for model in models:\n", - " model = preview_registry_ml_client.models.get(\n", - " model[\"name\"], version=model[\"version\"]\n", - " )\n", - " print(model.id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 3. Pick the test dataset for evaluation\n", - "A copy of the cnn_dailymail is available in the [cnn_dailymail](./cnn_dailymail/) folder. The next few cells show basic data preparation:\n", - "* Visualize some data rows\n", - "* We want this sample to run quickly, so we use a smaller dataset containing 10% of the original.\n", - "* To use the entire dataset, uncomment the cells below and run." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# !pip install datasets" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# from datasets import load_dataset\n", - "\n", - "# hf_test_data = load_dataset('cnn_dailymail')\n", - "\n", - "# hf_test_data['train'].to_pandas().head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "test_data = \"./cnn_dailymail/small-test.jsonl\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "pd.read_json(test_data, lines=True).head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4. Submit the evaluation jobs using the model and data as inputs\n", - "\n", - "Create the job that uses the `model_evaluation_pipeline` component. We will submit one job per model.\n", - "\n", - "Note that the metrics that the evaluation jobs need to calculate are specified in the [eval_config.json](./eval_config.json) file. We calculate `rouge1`, `rouge2`, `rougeL` and `rougeLsum` in this sample.\n", - "\n", - "All supported evaluation configurations for `text-summarization` can be found in [README](./README.md)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azure.ai.ml.dsl import pipeline\n", - "from azure.ai.ml import Input\n", - "from azure.ai.ml.constants import AssetTypes\n", - "\n", - "# fetch the pipeline component\n", - "pipeline_component_func = registry_ml_client.components.get(\n", - " name=\"model_evaluation_pipeline\", label=\"latest\"\n", - ")\n", - "\n", - "\n", - "# define the pipeline job\n", - "@pipeline()\n", - "def evaluation_pipeline(mlflow_model):\n", - " evaluation_job = pipeline_component_func(\n", - " # specify the foundation model available in the azureml system registry or a model from the workspace\n", - " # mlflow_model = Input(type=AssetTypes.MLFLOW_MODEL, path=f\"{mlflow_model_path}\"),\n", - " mlflow_model=mlflow_model,\n", - " # test data\n", - " test_data=Input(type=AssetTypes.URI_FILE, path=test_data),\n", - " # The following parameters map to the dataset fields\n", - " input_column_names=\"input_string\",\n", - " label_column_name=\"summary\",\n", - " # Evaluation settings\n", - " task=\"text-summarization\",\n", - " # config file containing the details of evaluation metrics to calculate\n", - " evaluation_config=Input(type=AssetTypes.URI_FILE, path=\"./eval-config.json\"),\n", - " # config cluster/device job is running on\n", - " # set device to GPU/CPU on basis if GPU count was found\n", - " device=\"gpu\" if gpu_count_found else \"cpu\",\n", - " )\n", - " return {\"evaluation_result\": evaluation_job.outputs.evaluation_result}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Submit the jobs, passing the model as a parameter to the pipeline created in the above step." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# submit the pipeline job for each model that we want to evaluate\n", - "# you could consider submitting the pipeline jobs in parallel, provided your cluster has multiple nodes\n", - "pipeline_jobs = []\n", - "\n", - "experiment_name = \"summarization-evaluation\"\n", - "\n", - "for model in models:\n", - " model_object = preview_registry_ml_client.models.get(\n", - " model[\"name\"], version=model[\"version\"]\n", - " )\n", - " pipeline_object = evaluation_pipeline(\n", - " mlflow_model=Input(type=AssetTypes.MLFLOW_MODEL, path=f\"{model_object.id}\"),\n", - " )\n", - " # don't reuse cached results from previous jobs\n", - " pipeline_object.settings.force_rerun = True\n", - " pipeline_object.settings.default_compute = compute_cluster\n", - " pipeline_object.display_name = f\"eval-{model['name']}-{timestamp}\"\n", - " pipeline_job = workspace_ml_client.jobs.create_or_update(\n", - " pipeline_object, experiment_name=experiment_name\n", - " )\n", - " # add model['name'] and pipeline_job.name as key value pairs to a dictionary\n", - " pipeline_jobs.append({\"model_name\": model[\"name\"], \"job_name\": pipeline_job.name})\n", - " # wait for the pipeline job to complete\n", - " workspace_ml_client.jobs.stream(pipeline_job.name)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 5. Review evaluation metrics\n", - "Viewing the job in AzureML studio is the best way to analyze logs, metrics and outputs of jobs. You can create custom charts and compare metics across different jobs. See https://learn.microsoft.com/en-us/azure/machine-learning/how-to-log-view-metrics?tabs=interactive#view-jobsruns-information-in-the-studio to learn more.\n", - "\n", - "![Model evaluation dashboard in AzureML studio](./text-summarization-eval-dashboard.png)\n", - "\n", - "However, we may need to access and review metrics programmatically for which we will use MLflow, which is the recommended client for logging and querying metrics." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import mlflow, json\n", - "\n", - "mlflow_tracking_uri = workspace_ml_client.workspaces.get(\n", - " workspace_ml_client.workspace_name\n", - ").mlflow_tracking_uri\n", - "mlflow.set_tracking_uri(mlflow_tracking_uri)\n", - "\n", - "metrics_df = pd.DataFrame()\n", - "for job in pipeline_jobs:\n", - " # concat 'tags.mlflow.rootRunId=' and pipeline_job.name in single quotes as filter variable\n", - " filter = \"tags.mlflow.rootRunId='\" + job[\"job_name\"] + \"'\"\n", - " runs = mlflow.search_runs(\n", - " experiment_names=[experiment_name], filter_string=filter, output_format=\"list\"\n", - " )\n", - " # get the compute_metrics runs.\n", - " # using a hacky way till 'Bug 2320997: not able to show eval metrics in FT notebooks - mlflow client now showing display names' is fixed\n", - " for run in runs:\n", - " # else, check if run.data.metrics.accuracy exists\n", - " if \"rouge1\" in run.data.metrics:\n", - " # get the metrics from the mlflow run\n", - " run_metric = run.data.metrics\n", - " # add the model name to the run_metric dictionary\n", - " run_metric[\"model_name\"] = job[\"model_name\"]\n", - " # convert the run_metric dictionary to a pandas dataframe\n", - " temp_df = pd.DataFrame(run_metric, index=[0])\n", - " # concat the temp_df to the metrics_df\n", - " metrics_df = pd.concat([metrics_df, temp_df], ignore_index=True)\n", - "\n", - "# move the model_name columns to the first column\n", - "cols = metrics_df.columns.tolist()\n", - "cols = cols[-1:] + cols[:-1]\n", - "metrics_df = metrics_df[cols]\n", - "metrics_df.head()" - ] - } - ], - "metadata": { - "kernel_info": { - "name": "python310-sdkv2" - }, - "kernelspec": { - "display_name": "Python 3.10 - SDK V2", - "language": "python", - "name": "python310-sdkv2" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.9" - }, - "microsoft": { - "host": { - "AzureML": { - "notebookHasBeenCompleted": true - } - }, - "ms_spell_check": { - "ms_spell_check_language": "en" - } - }, - "nteract": { - "version": "nteract-front-end@1.0.0" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/sdk/python/foundation-models/system/evaluation/summarization/eval-config.json b/sdk/python/foundation-models/system/evaluation/summarization/eval-config.json deleted file mode 100644 index 899d0c33b8..0000000000 --- a/sdk/python/foundation-models/system/evaluation/summarization/eval-config.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "metrics": ["rouge1", "rouge2", "rougeL", "rougeLsum"], - "aggregator": true, - "stemmer": true -} diff --git a/sdk/python/foundation-models/system/evaluation/summarization/text-summarization-eval-dashboard.png b/sdk/python/foundation-models/system/evaluation/summarization/text-summarization-eval-dashboard.png deleted file mode 100644 index ff381d293d..0000000000 Binary files a/sdk/python/foundation-models/system/evaluation/summarization/text-summarization-eval-dashboard.png and /dev/null differ diff --git a/sdk/python/foundation-models/system/evaluation/text-classification/README.md b/sdk/python/foundation-models/system/evaluation/text-classification/README.md deleted file mode 100644 index c444319df0..0000000000 --- a/sdk/python/foundation-models/system/evaluation/text-classification/README.md +++ /dev/null @@ -1,48 +0,0 @@ -## Single Label Classification - -### List of supported keyword arguments: - -| Keyword Argument | Description | Type | Sample | -|:------------------------:|:-------------------------------------------------------------------------------|------------------|-----------------------------------------------------------------| -| metrics | List for subset of metrics to be computed. All supported metrics listed below. | list | ["accuracy", "f1_score_micro", "average_precision_score_macro"] | -| class_labels | List for superset of all existing labels in our dataset | list, np.ndarray | [0, 1, 2, 3], ["CONTRADICTION", "NEUTRAL", "ENTAILMENT"] | -| train_labels | List for labels on which model is trained | list, np.ndarray | [0, 1, 2, 3], ["CONTRADICTION", "NEUTRAL", "ENTAILMENT"] | -| sample_weights | List containing the weight associated with each data sample | list, np.ndarray | [1, 2, 3, 4, 5, 6] | -| y_transformer | Transformer object to be applied on y_pred | | | -| use_binary | Compute metrics only on the true class for binary classification | boolean | true, false | -| enable_metric_confidence | Computes confidence interval for supported metrics | boolean | true, false | -| multilabel | Boolean variable that computes multilabel metrics when set to True | boolean | false (Should be false for single label classification) | -| positive_label | Label to be treated as positive label | int/str | 0, "CONTRADICTION" | -| confidence_metrics | List of metrics to compute confidence intervals | list | ["accuracy", "f1_score_micro"] | -| custom_dimensions | Used to report telemetry data (can later be used to perform PII scrubbing) | dict | | - -### List of supported metrics: - -* log_loss -* average_precision_score_binary -* weighted_accuracy -* AUC_weighted -* f1_score_micro -* f1_score_binary -* precision_score_micro -* precision_score_binary -* recall_score_weighted -* f1_score_weighted -* confusion_matrix -* average_precision_score_micro -* recall_score_binary -* recall_score_macro -* average_precision_score_weighted -* AUC_binary -* matthews_correlation -* precision_score_macro -* accuracy -* average_precision_score_macro -* AUC_macro -* recall_score_micro -* balanced_accuracy -* f1_score_macro -* precision_score_weighted -* accuracy_table -* AUC_micro -* norm_macro_recall \ No newline at end of file diff --git a/sdk/python/foundation-models/system/evaluation/text-classification/entailment-contradiction.ipynb b/sdk/python/foundation-models/system/evaluation/text-classification/entailment-contradiction.ipynb deleted file mode 100644 index efacf9fa46..0000000000 --- a/sdk/python/foundation-models/system/evaluation/text-classification/entailment-contradiction.ipynb +++ /dev/null @@ -1,484 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Text Classification Evaluation - Entailment v/s Contradiction\n", - "\n", - "This sample shows how use the evaluate a group of models against a given set of metrics for the `text-classification` task. \n", - "\n", - "### Evaluation dataset\n", - "The Multi-Genre Natural Language Inference Corpus, or MNLI is a crowd sourced collection of sentence pairs with textual entailment annotations. Given a premise sentence and a hypothesis sentence, the task is to predict whether the premise entails the hypothesis (entailment), contradicts the hypothesis (contradiction), or neither (neutral). The [MNLI](https://huggingface.co/datasets/glue) dataset is a subset of the larger [General Language Understanding Evaluation](https://gluebenchmark.com/) dataset. A copy of this dataset is available in the [glue-mnli-dataset](./glue-mnli-dataset/) folder. \n", - "\n", - "### Model\n", - "The goal of evaluating models is to compare their performance on a variety of metrics. `text-classification` is generic task type that can be used for scenarios such as sentiment analysis, emotion detection, grammar checking, spam filtering, etc. As such, the models you pick to compare must be finetuned for same scenario. Given that we have the entailment v/s contradiction dataset, we would like to look for models finetuned for this specific scenario. We will compare `roberta-large-mnli`, `microsoft-deberta-large-mnli` and `microsoft-deberta-base-mnli` in this sample, which are available in the `azureml` system registry.\n", - "\n", - "If you'd like to evaluate models that are not in the system registry, you can import those models to your workspace or organization registry and then evaluate them using the approach outlined in this sample. Review the sample notebook for [importing models](../../import/import-model-from-huggingface.ipynb). \n", - "\n", - "### Outline\n", - "* Setup pre-requisites such as compute.\n", - "* Pick the models to evaluate.\n", - "* Pick and explore evaluate data.\n", - "* Configure the evaluation jobs.\n", - "* Run the evaluation jobs.\n", - "* Review the evaluation metrics. " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 1. Setup pre-requisites\n", - "* Install dependencies\n", - "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace ``, `` and `` below.\n", - "* Connect to `azureml` system registry\n", - "* Set an optional experiment name\n", - "* Check or create compute. A single GPU node can have multiple GPU cards. For example, in one node of `Standard_ND40rs_v2` there are 8 NVIDIA V100 GPUs while in `Standard_NC12s_v3`, there are 2 NVIDIA V100 GPUs. Refer to the [docs](https://learn.microsoft.com/en-us/azure/virtual-machines/sizes-gpu) for this information. The number of GPU cards per node is set in the param `gpus_per_node` below. Setting this value correctly will ensure utilization of all GPUs in the node. The recommended GPU compute SKUs can be found [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ncv3-series) and [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ndv2-series)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Install dependencies by running below cell. This is not an optional step if running in a new environment." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%pip install azure-ai-ml\n", - "%pip install azure-identity\n", - "%pip install datasets==2.9.0" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "gather": { - "logged": 1679319346668 - }, - "jupyter": { - "outputs_hidden": false, - "source_hidden": false - }, - "nteract": { - "transient": { - "deleting": false - } - } - }, - "outputs": [], - "source": [ - "from azure.ai.ml import MLClient\n", - "from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential\n", - "from azure.ai.ml.entities import AmlCompute\n", - "import time\n", - "\n", - "try:\n", - " credential = DefaultAzureCredential()\n", - " credential.get_token(\"https://management.azure.com/.default\")\n", - "except Exception as ex:\n", - " credential = InteractiveBrowserCredential()\n", - "\n", - "workspace_ml_client = None\n", - "try:\n", - " workspace_ml_client = MLClient.from_config(credential)\n", - " subscription_id = workspace_ml_client.subscription_id\n", - " workspace = workspace_ml_client.workspace_name\n", - " resource_group = workspace_ml_client.resource_group_name\n", - "except Exception as ex:\n", - " print(ex)\n", - " # Enter details of your AML workspace\n", - " subscription_id = \"\"\n", - " resource_group = \"\"\n", - " workspace = \"\"\n", - " workspace_ml_client = MLClient(\n", - " credential, subscription_id, resource_group, workspace\n", - " )\n", - "\n", - "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n", - "preview_registry = \"azureml-staging\"\n", - "registry = \"azureml\"\n", - "\n", - "preview_registry_ml_client = MLClient(\n", - " credential, subscription_id, resource_group, registry_name=preview_registry\n", - ")\n", - "print(preview_registry_ml_client)\n", - "\n", - "registry_ml_client = MLClient(\n", - " credential, subscription_id, resource_group, registry_name=registry\n", - ")\n", - "registry_ml_client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "workspace_ml_client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# If you already have a gpu cluster, mention it here. Else will create a new one with the name 'gpu-cluster-big'\n", - "compute_cluster = \"gpu-cluster-big\"\n", - "try:\n", - " compute = workspace_ml_client.compute.get(compute_cluster)\n", - " print(f\"GPU compute '{compute_cluster}' found.\")\n", - "except Exception as ex:\n", - " print(f\"GPU compute '{compute_cluster}' not found. Creating new one.\")\n", - " compute = AmlCompute(\n", - " name=compute_cluster,\n", - " size=\"Standard_ND40rs_v2\",\n", - " max_instances=2, # For multi node training set this to an integer value more than 1\n", - " )\n", - " workspace_ml_client.compute.begin_create_or_update(compute).wait()\n", - "\n", - "# generating a unique timestamp that can be used for names and versions that need to be unique\n", - "timestamp = str(int(time.time()))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Below snippet will allow us to query number of GPU's present on the compute. We can use it to set `gpu_per_node` to ensure utilization of all GPUs in the node." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# This is the number of GPUs in a single node of the selected 'vm_size' compute.\n", - "# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.\n", - "# Setting this to more than the number of GPUs will result in an error.\n", - "gpus_per_node = 1 # default value\n", - "gpu_count_found = False\n", - "ws_computes = workspace_ml_client.compute.list_sizes()\n", - "for ws_compute in ws_computes:\n", - " if ws_compute.name.lower() == compute.size.lower():\n", - " gpus_per_node = ws_compute.gpus\n", - " print(f\"Number of GPUs in compute {ws_compute.name} are {ws_compute.gpus}\")\n", - "# if gpu_count_found not found, then print an error\n", - "if gpus_per_node > 0:\n", - " gpu_count_found = True\n", - "else:\n", - " gpu_count_found = False\n", - " print(f\"No GPUs found in compute. Number of GPUs in compute {compute.size} 0.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 2. Pick the models to evaluate\n", - "\n", - "Verify that the models selected for evaluation are available in system registry" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# need to specify model versions until the bug to support fetching the latest version using latest label is fixed\n", - "models = [\n", - " {\"name\": \"microsoft-deberta-base-mnli\", \"version\": \"1\"},\n", - " {\"name\": \"microsoft-deberta-large-mnli\", \"version\": \"1\"},\n", - " {\"name\": \"roberta-large-mnli\", \"version\": \"1\"},\n", - " # please prepare appropriate dataset and config in similar way to run evaluation on this dataset\n", - " # {\"name\": \"roberta-large-openai-detector\", \"version\": \"1\"},\n", - " # {\"name\": \"roberta-base-openai-detector\", \"version\": \"1\"},\n", - " # {\"name\": \"distilbert-base-uncased-finetuned-sst-2-english\", \"version\": \"1\"},\n", - " # {\"name\": \"finiteautomata-bertweet-base-sentiment-analysis\", \"version\": \"1\"},\n", - "]\n", - "for model in models:\n", - " model = preview_registry_ml_client.models.get(\n", - " model[\"name\"], version=model[\"version\"]\n", - " )\n", - " print(model.id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 3. Pick the test dataset for evaluation\n", - "A copy of the MNLI is available in the [glue-mnli-dataset](./glue-mnli-dataset/) folder. The next few cells show basic data preparation:\n", - "* Visualize some data rows\n", - "* Replace numerical categories in data with the actual string labels. This mapping is available in the [./glue-mnli-dataset/label.json](./glue-mnli-dataset/label.json). This step is needed because the selected models will return labels such `CONTRADICTION`, `CONTRADICTION`, etc. when running prediction. If the labels in your ground truth data are left as `0`, `1`, `2`, etc., then they would not match with prediction labels returned by the models.\n", - "* The dataset contains `premise` and `hypothesis` as two different columns. However, the models expect a single string for prediction in the format `[CLS] [SEP] [SEP]`. Hence we merge the columns and drop the original columns.\n", - "* We want this sample to run quickly, so save smaller dataset containing 10% of the original. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "dataset_dir = \"./glue-mnli-dataset\"\n", - "data_file = \"train.jsonl\"\n", - "\n", - "# load the train.jsonl file into a pandas dataframe and show the first 5 rows\n", - "import pandas as pd\n", - "\n", - "pd.set_option(\n", - " \"display.max_colwidth\", 0\n", - ") # set the max column width to 0 to display the full text\n", - "df = pd.read_json(os.path.join(dataset_dir, data_file), lines=True)\n", - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# load the id2label json element of the label.json file into pandas table with keys as 'label' column of int64 type and values as 'label_string' column as string type\n", - "import json\n", - "\n", - "label_file = \"label.json\"\n", - "with open(os.path.join(dataset_dir, label_file)) as f:\n", - " id2label = json.load(f)\n", - " id2label = id2label[\"id2label\"]\n", - " label_df = pd.DataFrame.from_dict(\n", - " id2label, orient=\"index\", columns=[\"label_string\"]\n", - " )\n", - " label_df[\"label\"] = label_df.index.astype(\"int64\")\n", - " label_df = label_df[[\"label\", \"label_string\"]]\n", - "label_df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# join the train, validation and test dataframes with the id2label dataframe to get the label_string column\n", - "df = df.merge(label_df, on=\"label\", how=\"left\")\n", - "# concat the premise and hypothesis columns to with \"[CLS]\" in the beginning and \"[SEP]\" in the middle and end to get the text column\n", - "df[\"input_string\"] = \"[CLS] \" + df[\"premise\"] + \" [SEP] \" + df[\"hypothesis\"] + \" [SEP]\"\n", - "# drop the idx, premise and hypothesis columns as they are not needed\n", - "df = df.drop(columns=[\"idx\", \"premise\", \"hypothesis\"])\n", - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# save 10% of the rows from the train, validation and test dataframes into files with small_ prefix in the ./dataset_dir folder\n", - "small_data_file = \"small_train.jsonl\"\n", - "df.sample(frac=0.1).to_json(\n", - " os.path.join(dataset_dir, small_data_file), orient=\"records\", lines=True\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4. Submit the evaluation jobs using the model and data as inputs\n", - " \n", - "Create the job that uses the `model_evaluation_pipeline` component. We will submit one job per model. \n", - "\n", - "Note that the metrics that the evaluation jobs need to calculate are specified in the [eval-config.json](./eval-config.json) file.\n", - "\n", - "All supported evaluation configurations for `text-classification` can be found in [README](./README.md)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azure.ai.ml.dsl import pipeline\n", - "from azure.ai.ml.entities import CommandComponent, PipelineComponent, Job, Component\n", - "from azure.ai.ml import PyTorchDistribution, Input\n", - "from azure.ai.ml.constants import AssetTypes\n", - "\n", - "# fetch the pipeline component\n", - "pipeline_component_func = registry_ml_client.components.get(\n", - " name=\"model_evaluation_pipeline\", label=\"latest\"\n", - ")\n", - "\n", - "\n", - "# define the pipeline job\n", - "@pipeline()\n", - "def evaluation_pipeline(mlflow_model):\n", - " evaluation_job = pipeline_component_func(\n", - " # specify the foundation model available in the azureml system registry or a model from the workspace\n", - " # mlflow_model = Input(type=AssetTypes.MLFLOW_MODEL, path=f\"{mlflow_model_path}\"),\n", - " mlflow_model=mlflow_model,\n", - " # test data\n", - " test_data=Input(\n", - " type=AssetTypes.URI_FILE, path=os.path.join(dataset_dir, small_data_file)\n", - " ),\n", - " # The following parameters map to the dataset fields\n", - " input_column_names=\"input_string\",\n", - " label_column_name=\"label_string\",\n", - " # Evaluation settings\n", - " task=\"text-classification\",\n", - " # config file containing the details of evaluation metrics to calculate\n", - " evaluation_config=Input(type=AssetTypes.URI_FILE, path=\"./eval-config.json\"),\n", - " # config cluster/device job is running on\n", - " # set device to GPU/CPU on basis if GPU count was found\n", - " device=\"gpu\" if gpu_count_found else \"cpu\",\n", - " )\n", - " return {\"evaluation_result\": evaluation_job.outputs.evaluation_result}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Submit the jobs, passing the model as a parameter to the pipeline created in the above step." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "# submit the pipeline job for each model that we want to evaluate\n", - "# you could consider submitting the pipeline jobs in parallel, provided your cluster has multiple nodes\n", - "pipeline_jobs = []\n", - "\n", - "experiment_name = \"text-classification-mnli-evaluation\"\n", - "\n", - "for model in models:\n", - " model_object = preview_registry_ml_client.models.get(\n", - " model[\"name\"], version=model[\"version\"]\n", - " )\n", - " pipeline_object = evaluation_pipeline(\n", - " mlflow_model=Input(type=AssetTypes.MLFLOW_MODEL, path=f\"{model_object.id}\"),\n", - " )\n", - " # don't reuse cached results from previous jobs\n", - " pipeline_object.settings.force_rerun = True\n", - " pipeline_object.settings.default_compute = compute_cluster\n", - " pipeline_object.display_name = f\"eval-{model['name']}-{timestamp}\"\n", - " pipeline_job = workspace_ml_client.jobs.create_or_update(\n", - " pipeline_object, experiment_name=experiment_name\n", - " )\n", - " # add model['name'] and pipeline_job.name as key value pairs to a dictionary\n", - " pipeline_jobs.append({\"model_name\": model[\"name\"], \"job_name\": pipeline_job.name})\n", - " display(pipeline_jobs)\n", - " # wait for the pipeline job to complete\n", - "# workspace_ml_client.jobs.stream(pipeline_job.name)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pipeline_job" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 5. Review evaluation metrics\n", - "Viewing the job in AzureML studio is the best way to analyze logs, metrics and outputs of jobs. You can create custom charts and compare metics across different jobs. See https://learn.microsoft.com/en-us/azure/machine-learning/how-to-log-view-metrics?tabs=interactive#view-jobsruns-information-in-the-studio to learn more. \n", - "\n", - "![Model evaluation dashboard in AzureML studio](./mnli-eval-dashboard.png)\n", - "\n", - "However, we may need to access and review metrics programmatically for which we will use MLflow, which is the recommended client for logging and querying metrics." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import mlflow, json\n", - "\n", - "mlflow_tracking_uri = workspace_ml_client.workspaces.get(\n", - " workspace_ml_client.workspace_name\n", - ").mlflow_tracking_uri\n", - "mlflow.set_tracking_uri(mlflow_tracking_uri)\n", - "\n", - "metrics_df = pd.DataFrame()\n", - "for job in pipeline_jobs:\n", - " # concat 'tags.mlflow.rootRunId=' and pipeline_job.name in single quotes as filter variable\n", - " filter = \"tags.mlflow.rootRunId='\" + job[\"job_name\"] + \"'\"\n", - " runs = mlflow.search_runs(\n", - " experiment_names=[experiment_name], filter_string=filter, output_format=\"list\"\n", - " )\n", - " # get the compute_metrics runs.\n", - " # using a hacky way till 'Bug 2320997: not able to show eval metrics in FT notebooks - mlflow client now showing display names' is fixed\n", - " for run in runs:\n", - " # else, check if run.data.metrics.accuracy exists\n", - " if \"accuracy\" in run.data.metrics:\n", - " # get the metrics from the mlflow run\n", - " run_metric = run.data.metrics\n", - " # add the model name to the run_metric dictionary\n", - " run_metric[\"model_name\"] = job[\"model_name\"]\n", - " # convert the run_metric dictionary to a pandas dataframe\n", - " temp_df = pd.DataFrame(run_metric, index=[0])\n", - " # concat the temp_df to the metrics_df\n", - " metrics_df = pd.concat([metrics_df, temp_df], ignore_index=True)\n", - "\n", - "# move the model_name columns to the first column\n", - "cols = metrics_df.columns.tolist()\n", - "cols = cols[-1:] + cols[:-1]\n", - "metrics_df = metrics_df[cols]\n", - "metrics_df.head()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3.10 - SDK V2", - "language": "python", - "name": "python310-sdkv2" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.0" - }, - "vscode": { - "interpreter": { - "hash": "2f394aca7ca06fed1e6064aef884364492d7cdda3614a461e02e6407fc40ba69" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/sdk/python/foundation-models/system/evaluation/text-classification/eval-config.json b/sdk/python/foundation-models/system/evaluation/text-classification/eval-config.json deleted file mode 100644 index 4f852bdcb1..0000000000 --- a/sdk/python/foundation-models/system/evaluation/text-classification/eval-config.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "metrics": ["average_precision_score_macro", "AUC_macro", "recall_score_macro", "average_precision_score_binary", "average_precision_score_micro", "AUC_binary", "recall_score_micro", "AUC_micro", "norm_macro_recall", "average_precision_score_weighted", "weighted_accuracy", "precision_score_micro", "f1_score_binary", "accuracy_table", "precision_score_macro", "f1_score_micro", "precision_score_weighted", "f1_score_weighted", "confusion_matrix", "recall_score_binary", "matthews_correlation", "log_loss", "accuracy", "precision_score_binary", "balanced_accuracy", "AUC_weighted", "f1_score_macro", "recall_score_weighted"], - "class_labels": ["CONTRADICTION", "NEUTRAL", "ENTAILMENT"], - "train_labels": ["CONTRADICTION", "NEUTRAL", "ENTAILMENT"], - "multilabel": false, - "enable_metric_confidence": true, - "confidence_metrics": ["accuracy", "f1_score_micro"], - "use_binary": false -} \ No newline at end of file diff --git a/sdk/python/foundation-models/system/evaluation/text-classification/glue-mnli-dataset/download-dataset.py b/sdk/python/foundation-models/system/evaluation/text-classification/glue-mnli-dataset/download-dataset.py deleted file mode 100644 index b6794c4b4f..0000000000 --- a/sdk/python/foundation-models/system/evaluation/text-classification/glue-mnli-dataset/download-dataset.py +++ /dev/null @@ -1,43 +0,0 @@ -# import library to parse command line arguments -import argparse, os - -parser = argparse.ArgumentParser() -# add an argument to specify a dataset name to download -parser.add_argument("--dataset", type=str, default="glue", help="dataset name") -# add an argument to specify the config name of the dataset -parser.add_argument( - "--config_name", type=str, default="mnli", help="config name of the dataset" -) -# argument to save a fraction of the dataset -parser.add_argument( - "--fraction", type=float, default=0.1, help="fraction of the dataset to save" -) -# add an argument to specify the directory to download the dataset to -parser.add_argument( - "--download_dir", - type=str, - default="./", - help="directory to download the dataset to", -) -# add an argument to specify the split of the dataset to download -parser.add_argument( - "--split", type=str, default="train", help="split of the dataset to download" -) -args = parser.parse_args() - -# create the download directory if it does not exist -if not os.path.exists(args.download_dir): - os.makedirs(args.download_dir) - -# import hugging face datasets library -from datasets import load_dataset, get_dataset_split_names - -for split in get_dataset_split_names(args.dataset, config_name=args.config_name): - if split == args.split: - print(f"Loading {split} split of {args.dataset} dataset...") - # load the split of the dataset - dataset = load_dataset(args.dataset, args.config_name, split=split) - # save the split of the dataset to the download directory as json lines file - dataset.select(range(int(dataset.num_rows * args.fraction))).to_json( - os.path.join(args.download_dir, f"{split}.jsonl") - ) diff --git a/sdk/python/foundation-models/system/evaluation/text-classification/glue-mnli-dataset/label.json b/sdk/python/foundation-models/system/evaluation/text-classification/glue-mnli-dataset/label.json deleted file mode 100644 index b836faff17..0000000000 --- a/sdk/python/foundation-models/system/evaluation/text-classification/glue-mnli-dataset/label.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "id2label": { - "0": "ENTAILMENT", - "1": "NEUTRAL", - "2": "CONTRADICTION" - }, - "label2id": { - "ENTAILMENT": 0, - "CONTRADICTION": 2, - "NEUTRAL": 1 - } -} \ No newline at end of file diff --git a/sdk/python/foundation-models/system/evaluation/text-classification/glue-sst2-dataset/download-dataset.py b/sdk/python/foundation-models/system/evaluation/text-classification/glue-sst2-dataset/download-dataset.py deleted file mode 100644 index b6794c4b4f..0000000000 --- a/sdk/python/foundation-models/system/evaluation/text-classification/glue-sst2-dataset/download-dataset.py +++ /dev/null @@ -1,43 +0,0 @@ -# import library to parse command line arguments -import argparse, os - -parser = argparse.ArgumentParser() -# add an argument to specify a dataset name to download -parser.add_argument("--dataset", type=str, default="glue", help="dataset name") -# add an argument to specify the config name of the dataset -parser.add_argument( - "--config_name", type=str, default="mnli", help="config name of the dataset" -) -# argument to save a fraction of the dataset -parser.add_argument( - "--fraction", type=float, default=0.1, help="fraction of the dataset to save" -) -# add an argument to specify the directory to download the dataset to -parser.add_argument( - "--download_dir", - type=str, - default="./", - help="directory to download the dataset to", -) -# add an argument to specify the split of the dataset to download -parser.add_argument( - "--split", type=str, default="train", help="split of the dataset to download" -) -args = parser.parse_args() - -# create the download directory if it does not exist -if not os.path.exists(args.download_dir): - os.makedirs(args.download_dir) - -# import hugging face datasets library -from datasets import load_dataset, get_dataset_split_names - -for split in get_dataset_split_names(args.dataset, config_name=args.config_name): - if split == args.split: - print(f"Loading {split} split of {args.dataset} dataset...") - # load the split of the dataset - dataset = load_dataset(args.dataset, args.config_name, split=split) - # save the split of the dataset to the download directory as json lines file - dataset.select(range(int(dataset.num_rows * args.fraction))).to_json( - os.path.join(args.download_dir, f"{split}.jsonl") - ) diff --git a/sdk/python/foundation-models/system/evaluation/text-classification/glue-sst2-dataset/label.json b/sdk/python/foundation-models/system/evaluation/text-classification/glue-sst2-dataset/label.json deleted file mode 100644 index 67b701dbfd..0000000000 --- a/sdk/python/foundation-models/system/evaluation/text-classification/glue-sst2-dataset/label.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "id2label": { - "0": "NEGATIVE", - "1": "POSITIVE" - }, - "label2id": { - "NEGATIVE": 0, - "POSITIVE": 1 - } -} \ No newline at end of file diff --git a/sdk/python/foundation-models/system/evaluation/text-classification/mnli-eval-dashboard.png b/sdk/python/foundation-models/system/evaluation/text-classification/mnli-eval-dashboard.png deleted file mode 100644 index 5bb523f46d..0000000000 Binary files a/sdk/python/foundation-models/system/evaluation/text-classification/mnli-eval-dashboard.png and /dev/null differ diff --git a/sdk/python/foundation-models/system/evaluation/text-classification/sentiment-analysis.ipynb b/sdk/python/foundation-models/system/evaluation/text-classification/sentiment-analysis.ipynb deleted file mode 100644 index e185690d05..0000000000 --- a/sdk/python/foundation-models/system/evaluation/text-classification/sentiment-analysis.ipynb +++ /dev/null @@ -1,477 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Text Classification Evaluation - Sentiment Analysis\n", - "\n", - "This sample shows how use the evaluate a group of models against a given set of metrics for the `text-classification` task. \n", - "\n", - "### Evaluation dataset\n", - "The Stanford Sentiment Treebank consists of sentences from movie reviews and human annotations of their sentiment. The task is to predict the sentiment of a given sentence. It uses the two-way (positive/negative) class split, with only sentence-level labels. The [SST2](https://huggingface.co/datasets/glue/viewer/sst2/validation) dataset is a subset of the larger [General Language Understanding Evaluation](https://gluebenchmark.com/) dataset. A copy of this dataset is available in the [glue-sst2-dataset](./glue-sst2-dataset/) folder.\n", - "\n", - "### Model\n", - "The goal of evaluating models is to compare their performance on a variety of metrics. `text-classification` is generic task type that can be used for scenarios such as sentiment analysis, emotion detection, grammar checking, spam filtering, etc. As such, the models you pick to compare must be finetuned for same scenario. Given that we have the sentiment analysis dataset, we would like to look for models finetuned for this specific scenario. We will compare `distilbert-base-uncased-finetuned-sst-2-english` and `finiteautomata-bertweet-base-sentiment-analysis` in this sample, which are available in the `azureml` system registry.\n", - "\n", - "If you'd like to evaluate models that are not in the system registry, you can import those models to your workspace or organization registry and then evaluate them using the approach outlined in this sample. Review the sample notebook for [importing models](../../import/import-model-from-huggingface.ipynb). \n", - "\n", - "### Outline\n", - "* Setup pre-requisites such as compute.\n", - "* Pick the models to evaluate.\n", - "* Pick and explore evaluate data.\n", - "* Configure the evaluation jobs.\n", - "* Run the evaluation jobs.\n", - "* Review the evaluation metrics. " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 1. Setup pre-requisites\n", - "* Install dependencies\n", - "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace ``, `` and `` below.\n", - "* Connect to `azureml` system registry\n", - "* Set an optional experiment name\n", - "* Check or create compute. A single GPU node can have multiple GPU cards. For example, in one node of `Standard_ND40rs_v2` there are 8 NVIDIA V100 GPUs while in `Standard_NC12s_v3`, there are 2 NVIDIA V100 GPUs. Refer to the [docs](https://learn.microsoft.com/en-us/azure/virtual-machines/sizes-gpu) for this information. The number of GPU cards per node is set in the param `gpus_per_node` below. Setting this value correctly will ensure utilization of all GPUs in the node. The recommended GPU compute SKUs can be found [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ncv3-series) and [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ndv2-series)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Install dependencies by running below cell. This is not an optional step if running in a new environment." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%pip install azure-ai-ml\n", - "%pip install azure-identity\n", - "%pip install datasets==2.9.0" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "gather": { - "logged": 1679319346668 - }, - "jupyter": { - "outputs_hidden": false, - "source_hidden": false - }, - "nteract": { - "transient": { - "deleting": false - } - } - }, - "outputs": [], - "source": [ - "from azure.ai.ml import MLClient\n", - "from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential\n", - "from azure.ai.ml.entities import AmlCompute\n", - "import time\n", - "\n", - "try:\n", - " credential = DefaultAzureCredential()\n", - " credential.get_token(\"https://management.azure.com/.default\")\n", - "except Exception as ex:\n", - " credential = InteractiveBrowserCredential()\n", - "\n", - "workspace_ml_client = None\n", - "try:\n", - " workspace_ml_client = MLClient.from_config(credential)\n", - " subscription_id = workspace_ml_client.subscription_id\n", - " workspace = workspace_ml_client.workspace_name\n", - " resource_group = workspace_ml_client.resource_group_name\n", - "except Exception as ex:\n", - " print(ex)\n", - " # Enter details of your AML workspace\n", - " subscription_id = \"\"\n", - " resource_group = \"\"\n", - " workspace = \"\"\n", - " workspace_ml_client = MLClient(\n", - " credential, subscription_id, resource_group, workspace\n", - " )\n", - "\n", - "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n", - "preview_registry = \"azureml-staging\"\n", - "registry = \"azureml\"\n", - "\n", - "preview_registry_ml_client = MLClient(\n", - " credential, subscription_id, resource_group, registry_name=preview_registry\n", - ")\n", - "print(preview_registry_ml_client)\n", - "\n", - "registry_ml_client = MLClient(\n", - " credential, subscription_id, resource_group, registry_name=registry\n", - ")\n", - "registry_ml_client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "workspace_ml_client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# If you already have a gpu cluster, mention it here. Else will create a new one with the name 'gpu-cluster-big'\n", - "compute_cluster = \"gpu-cluster-big\"\n", - "try:\n", - " compute = workspace_ml_client.compute.get(compute_cluster)\n", - " print(f\"GPU compute '{compute_cluster}' found.\")\n", - "except Exception as ex:\n", - " print(f\"GPU compute '{compute_cluster}' not found. Creating new one.\")\n", - " compute = AmlCompute(\n", - " name=compute_cluster,\n", - " size=\"Standard_ND40rs_v2\",\n", - " max_instances=2, # For multi node training set this to an integer value more than 1\n", - " )\n", - " workspace_ml_client.compute.begin_create_or_update(compute).wait()\n", - "\n", - "# generating a unique timestamp that can be used for names and versions that need to be unique\n", - "timestamp = str(int(time.time()))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Below snippet will allow us to query number of GPU's present on the compute. We can use it to set `gpu_per_node` to ensure utilization of all GPUs in the node." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# This is the number of GPUs in a single node of the selected 'vm_size' compute.\n", - "# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.\n", - "# Setting this to more than the number of GPUs will result in an error.\n", - "gpus_per_node = 1 # default value\n", - "gpu_count_found = False\n", - "ws_computes = workspace_ml_client.compute.list_sizes()\n", - "for ws_compute in ws_computes:\n", - " if ws_compute.name.lower() == compute.size.lower():\n", - " gpus_per_node = ws_compute.gpus\n", - " print(f\"Number of GPUs in compute {ws_compute.name} are {ws_compute.gpus}\")\n", - "# if gpu_count_found not found, then print an error\n", - "if gpus_per_node > 0:\n", - " gpu_count_found = True\n", - "else:\n", - " gpu_count_found = False\n", - " print(f\"No GPUs found in compute. Number of GPUs in compute {compute.size} 0.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 2. Pick the models to evaluate\n", - "\n", - "Verify that the models selected for evaluation are available in system registry" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# need to specify model versions until the bug to support fetching the latest version using latest label is fixed\n", - "models = [\n", - " {\"name\": \"distilbert-base-uncased-finetuned-sst-2-english\", \"version\": \"1\"},\n", - " # please prepare appropriate dataset and config in similar way to run evaluation on this dataset\n", - " # {\"name\": \"finiteautomata-bertweet-base-sentiment-analysis\", \"version\": \"1\"},\n", - "]\n", - "for model in models:\n", - " model = preview_registry_ml_client.models.get(\n", - " model[\"name\"], version=model[\"version\"]\n", - " )\n", - " print(model.id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 3. Pick the test dataset for evaluation\n", - "A copy of the SST2 is available in the [glue-sst2-dataset](./glue-sst2-dataset/) folder. The next few cells show basic data preparation:\n", - "* Visualize some data rows\n", - "* Replace numerical categories in data with the actual string labels. This mapping is available in the [./glue-sst2-dataset/label.json](./glue-sst2-dataset/label.json). This step is needed because the selected models will return labels such `POSITVE`, `NEGATIVE`, etc. when running prediction. If the labels in your ground truth data are left as `0`, `1`, etc., then they would not match with prediction labels returned by the models.\n", - "* The dataset contains `sentence` and `label` as two different columns. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "dataset_dir = \"./glue-sst2-dataset\"\n", - "data_file = \"validation.jsonl\"\n", - "\n", - "# load the train.jsonl file into a pandas dataframe and show the first 5 rows\n", - "import pandas as pd\n", - "\n", - "pd.set_option(\n", - " \"display.max_colwidth\", 0\n", - ") # set the max column width to 0 to display the full text\n", - "df = pd.read_json(os.path.join(dataset_dir, data_file), lines=True)\n", - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# load the id2label json element of the label.json file into pandas table with keys as 'label' column of int64 type and values as 'label_string' column as string type\n", - "import json\n", - "\n", - "label_file = \"label.json\"\n", - "with open(os.path.join(dataset_dir, label_file)) as f:\n", - " id2label = json.load(f)\n", - " id2label = id2label[\"id2label\"]\n", - " label_df = pd.DataFrame.from_dict(\n", - " id2label, orient=\"index\", columns=[\"label_string\"]\n", - " )\n", - " label_df[\"label\"] = label_df.index.astype(\"int64\")\n", - " label_df = label_df[[\"label\", \"label_string\"]]\n", - "label_df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# join the train, validation and test dataframes with the id2label dataframe to get the label_string column\n", - "df = df.merge(label_df, on=\"label\", how=\"left\")\n", - "# creating a new column to match the signature of mlflow base model\n", - "df[\"input_string\"] = df[\"sentence\"]\n", - "# drop the idx, sentence columns as they are not needed\n", - "df = df.drop(columns=[\"idx\", \"sentence\"])\n", - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# save 10% of the rows from the train, validation and test dataframes into files with small_ prefix in the ./dataset_dir folder\n", - "small_data_file = \"small_validation.jsonl\"\n", - "df.sample(frac=0.1).to_json(\n", - " os.path.join(dataset_dir, small_data_file), orient=\"records\", lines=True\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4. Submit the evaluation jobs using the model and data as inputs\n", - " \n", - "Create the job that uses the `model_evaluation_pipeline` component. We will submit one job per model. \n", - "\n", - "Note that the metrics that the evaluation jobs need to calculate are specified in the [sst2-eval-config.json](./sst2-eval-config.json) file.\n", - "\n", - "All supported evaluation configurations for `text-classification` can be found in [README](./README.md)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azure.ai.ml.dsl import pipeline\n", - "from azure.ai.ml.entities import CommandComponent, PipelineComponent, Job, Component\n", - "from azure.ai.ml import PyTorchDistribution, Input\n", - "from azure.ai.ml.constants import AssetTypes\n", - "\n", - "# fetch the pipeline component\n", - "pipeline_component_func = registry_ml_client.components.get(\n", - " name=\"model_evaluation_pipeline\", label=\"latest\"\n", - ")\n", - "\n", - "\n", - "# define the pipeline job\n", - "@pipeline()\n", - "def evaluation_pipeline(mlflow_model):\n", - " evaluation_job = pipeline_component_func(\n", - " # specify the foundation model available in the azureml system registry or a model from the workspace\n", - " # mlflow_model = Input(type=AssetTypes.MLFLOW_MODEL, path=f\"{mlflow_model_path}\"),\n", - " mlflow_model=mlflow_model,\n", - " # test data\n", - " test_data=Input(\n", - " type=AssetTypes.URI_FILE, path=os.path.join(dataset_dir, small_data_file)\n", - " ),\n", - " # The following parameters map to the dataset fields\n", - " input_column_names=\"input_string\",\n", - " label_column_name=\"label_string\",\n", - " # Evaluation settings\n", - " task=\"text-classification\",\n", - " # config file containing the details of evaluation metrics to calculate\n", - " evaluation_config=Input(\n", - " type=AssetTypes.URI_FILE, path=\"./sst2-eval-config.json\"\n", - " ),\n", - " # config cluster/device job is running on\n", - " # set device to GPU/CPU on basis if GPU count was found\n", - " device=\"gpu\" if gpu_count_found else \"cpu\",\n", - " )\n", - " return {\"evaluation_result\": evaluation_job.outputs.evaluation_result}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Submit the jobs, passing the model as a parameter to the pipeline created in the above step." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "# submit the pipeline job for each model that we want to evaluate\n", - "# you could consider submitting the pipeline jobs in parallel, provided your cluster has multiple nodes\n", - "pipeline_jobs = []\n", - "\n", - "experiment_name = \"text-classification-sentiment-analysis\"\n", - "\n", - "for model in models:\n", - " model_object = preview_registry_ml_client.models.get(\n", - " model[\"name\"], version=model[\"version\"]\n", - " )\n", - " pipeline_object = evaluation_pipeline(\n", - " mlflow_model=Input(type=AssetTypes.MLFLOW_MODEL, path=f\"{model_object.id}\"),\n", - " )\n", - " # don't reuse cached results from previous jobs\n", - " pipeline_object.settings.force_rerun = True\n", - " pipeline_object.settings.default_compute = compute_cluster\n", - " pipeline_object.display_name = f\"eval-{model['name']}-{timestamp}\"\n", - " pipeline_job = workspace_ml_client.jobs.create_or_update(\n", - " pipeline_object, experiment_name=experiment_name\n", - " )\n", - " # add model['name'] and pipeline_job.name as key value pairs to a dictionary\n", - " pipeline_jobs.append({\"model_name\": model[\"name\"], \"job_name\": pipeline_job.name})\n", - " # wait for the pipeline job to complete\n", - " workspace_ml_client.jobs.stream(pipeline_job.name)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 5. Review evaluation metrics\n", - "Viewing the job in AzureML studio is the best way to analyze logs, metrics and outputs of jobs. You can create custom charts and compare metics across different jobs. See https://learn.microsoft.com/en-us/azure/machine-learning/how-to-log-view-metrics?tabs=interactive#view-jobsruns-information-in-the-studio to learn more. \n", - "\n", - "![Model evaluation dashboard in AzureML studio](./sst2-eval-dashboard.png)\n", - "\n", - "However, we may need to access and review metrics programmatically for which we will use MLflow, which is the recommended client for logging and querying metrics." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import mlflow, json\n", - "\n", - "mlflow_tracking_uri = workspace_ml_client.workspaces.get(\n", - " workspace_ml_client.workspace_name\n", - ").mlflow_tracking_uri\n", - "mlflow.set_tracking_uri(mlflow_tracking_uri)\n", - "\n", - "metrics_df = pd.DataFrame()\n", - "for job in pipeline_jobs:\n", - " # concat 'tags.mlflow.rootRunId=' and pipeline_job.name in single quotes as filter variable\n", - " filter = \"tags.mlflow.rootRunId='\" + job[\"job_name\"] + \"'\"\n", - " runs = mlflow.search_runs(\n", - " experiment_names=[experiment_name], filter_string=filter, output_format=\"list\"\n", - " )\n", - " # get the compute_metrics runs.\n", - " # using a hacky way till 'Bug 2320997: not able to show eval metrics in FT notebooks - mlflow client now showing display names' is fixed\n", - " for run in runs:\n", - " # else, check if run.data.metrics.accuracy exists\n", - " if \"accuracy\" in run.data.metrics:\n", - " # get the metrics from the mlflow run\n", - " run_metric = run.data.metrics\n", - " # add the model name to the run_metric dictionary\n", - " run_metric[\"model_name\"] = job[\"model_name\"]\n", - " # convert the run_metric dictionary to a pandas dataframe\n", - " temp_df = pd.DataFrame(run_metric, index=[0])\n", - " # concat the temp_df to the metrics_df\n", - " metrics_df = pd.concat([metrics_df, temp_df], ignore_index=True)\n", - "\n", - "# move the model_name columns to the first column\n", - "cols = metrics_df.columns.tolist()\n", - "cols = cols[-1:] + cols[:-1]\n", - "metrics_df = metrics_df[cols]\n", - "metrics_df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3.10 - SDK V2", - "language": "python", - "name": "python310-sdkv2" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.0" - }, - "vscode": { - "interpreter": { - "hash": "2f394aca7ca06fed1e6064aef884364492d7cdda3614a461e02e6407fc40ba69" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/sdk/python/foundation-models/system/evaluation/text-classification/sst2-eval-config.json b/sdk/python/foundation-models/system/evaluation/text-classification/sst2-eval-config.json deleted file mode 100644 index 869f0860f1..0000000000 --- a/sdk/python/foundation-models/system/evaluation/text-classification/sst2-eval-config.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "metrics": ["average_precision_score_macro", "AUC_macro", "recall_score_macro", "average_precision_score_binary", "average_precision_score_micro", "AUC_binary", "recall_score_micro", "AUC_micro", "norm_macro_recall", "average_precision_score_weighted", "weighted_accuracy", "precision_score_micro", "f1_score_binary", "accuracy_table", "precision_score_macro", "f1_score_micro", "precision_score_weighted", "f1_score_weighted", "confusion_matrix", "recall_score_binary", "matthews_correlation", "log_loss", "accuracy", "precision_score_binary", "balanced_accuracy", "AUC_weighted", "f1_score_macro", "recall_score_weighted"], - "class_labels": ["NEGATIVE", "POSITIVE"], - "train_labels": ["NEGATIVE", "POSITIVE"], - "multilabel": false, - "enable_metric_confidence": true, - "confidence_metrics": ["accuracy", "f1_score_micro"], - "use_binary": false -} \ No newline at end of file diff --git a/sdk/python/foundation-models/system/evaluation/text-classification/sst2-eval-dashboard.png b/sdk/python/foundation-models/system/evaluation/text-classification/sst2-eval-dashboard.png deleted file mode 100644 index fc389b52b2..0000000000 Binary files a/sdk/python/foundation-models/system/evaluation/text-classification/sst2-eval-dashboard.png and /dev/null differ diff --git a/sdk/python/foundation-models/system/evaluation/text-generation/README.md b/sdk/python/foundation-models/system/evaluation/text-generation/README.md deleted file mode 100644 index 2f67947277..0000000000 --- a/sdk/python/foundation-models/system/evaluation/text-generation/README.md +++ /dev/null @@ -1,23 +0,0 @@ -## Text Generation - -### List of supported keyword arguments: - -| Keyword Argument | Description | Type | Sample | -|:-----------------:|:--------------------------------------------------------------------------------------|-----------|------------------------------------------| -| metrics | List for subset of metrics to be computed. All supported metrics listed below. | list | ["bleu_1", "bleu_2", "rouge1", "rouge2"] | -| tokenizer | Tokenizer object to perform tokenization on provided input text | | | -| smoothing | Boolean flag to indicate if bleu score needs to be smoothened | boolean | false, true | -| aggregator | Boolean flag to indicate if need to aggregate rouge scores for individual data points | boolean | true, false | -| stemmer | Boolean flag to indicate whether to use Porter Stemmer for suffixes | boolean | true, false | -| custom_dimensions | Used to report telemetry data (can later be used to perform PII scrubbing) | dict | | - -### List of supported metrics: - -* rouge1 -* rouge2 -* rougeLsum -* rougeL -* bleu_1 -* bleu_2 -* bleu_3 -* bleu_4 \ No newline at end of file diff --git a/sdk/python/foundation-models/system/evaluation/text-generation/eval-config.json b/sdk/python/foundation-models/system/evaluation/text-generation/eval-config.json deleted file mode 100644 index 8dd4358113..0000000000 --- a/sdk/python/foundation-models/system/evaluation/text-generation/eval-config.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "metrics": ["rouge1", "rouge2", "bleu_3", "bleu_4"], - "aggregator": true, - "stemmer": true, - "smoothing": false -} diff --git a/sdk/python/foundation-models/system/evaluation/text-generation/text-generation-eval-dashboard.png b/sdk/python/foundation-models/system/evaluation/text-generation/text-generation-eval-dashboard.png deleted file mode 100644 index e9f076781f..0000000000 Binary files a/sdk/python/foundation-models/system/evaluation/text-generation/text-generation-eval-dashboard.png and /dev/null differ diff --git a/sdk/python/foundation-models/system/evaluation/text-generation/text-generation.ipynb b/sdk/python/foundation-models/system/evaluation/text-generation/text-generation.ipynb deleted file mode 100644 index 466762c3f4..0000000000 --- a/sdk/python/foundation-models/system/evaluation/text-generation/text-generation.ipynb +++ /dev/null @@ -1,442 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Text Generation Evaluation\n", - "\n", - "This sample shows how use the evaluate a group of models against a given set of metrics for the `text-generation` task.\n", - "\n", - "### Evaluation dataset\n", - "The CNN / DailyMail Dataset is an English-language dataset containing just over 300k unique news articles as written by journalists at CNN and the Daily Mail. The current version supports both extractive and abstractive summarization, though the original version was created for machine reading and comprehension and abstractive question answering. A copy of the [cnn_dailymail](https://huggingface.co/datasets/cnn_dailymail) dataset is available in the [text-generation](./text-generation) folder.\n", - "\n", - "### Model\n", - "The goal of evaluating models is to compare their performance on a variety of metrics. `text-generation` is generic task type that can be used for scenarios to generate text based on context provided. As such, the models you pick to compare must be finetuned for same scenario. Given that we have the cnn_dailymail dataset, we would like to look for models finetuned for this specific scenario. We will compare `gpt2`, `gpt2-medium` and `distilgpt2` in this sample, which are available in the `azureml` system registry.\n", - "\n", - "If you'd like to evaluate models that are not in the system registry, you can import those models to your workspace or organization registry and then evaluate them using the approach outlined in this sample. Review the sample notebook for [importing models](../../import/import-model-from-huggingface.ipynb).\n", - "\n", - "### Outline\n", - "* Setup pre-requisites such as compute.\n", - "* Pick the models to evaluate.\n", - "* Pick and explore evaluate data.\n", - "* Configure the evaluation jobs.\n", - "* Run the evaluation jobs.\n", - "* Review the evaluation metrics." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 1. Setup pre-requisites\n", - "* Install dependencies\n", - "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace ``, `` and `` below.\n", - "* Connect to `azureml` system registry\n", - "* Set an optional experiment name\n", - "* Check or create compute. A single GPU node can have multiple GPU cards. For example, in one node of `Standard_ND40rs_v2` there are 8 NVIDIA V100 GPUs while in `Standard_NC12s_v3`, there are 2 NVIDIA V100 GPUs. Refer to the [docs](https://learn.microsoft.com/en-us/azure/virtual-machines/sizes-gpu) for this information. The number of GPU cards per node is set in the param `gpus_per_node` below. Setting this value correctly will ensure utilization of all GPUs in the node. The recommended GPU compute SKUs can be found [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ncv3-series) and [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ndv2-series)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Install dependencies by running below cell. This is not an optional step if running in a new environment." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "import pandas as pd\n", - "\n", - "%pip install --upgrade azure-ai-ml\n", - "%pip install --upgrade azure-identity\n", - "%pip install --upgrade datasets==2.9.0" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "gather": { - "logged": 1679319346668 - }, - "jupyter": { - "outputs_hidden": false, - "source_hidden": false - }, - "nteract": { - "transient": { - "deleting": false - } - } - }, - "outputs": [], - "source": [ - "from azure.ai.ml import MLClient\n", - "from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential\n", - "from azure.ai.ml.entities import AmlCompute\n", - "import time\n", - "\n", - "try:\n", - " credential = DefaultAzureCredential()\n", - " credential.get_token(\"https://management.azure.com/.default\")\n", - "except Exception as ex:\n", - " credential = InteractiveBrowserCredential()\n", - "\n", - "workspace_ml_client = None\n", - "try:\n", - " workspace_ml_client = MLClient.from_config(credential)\n", - " subscription_id = workspace_ml_client.subscription_id\n", - " workspace = workspace_ml_client.workspace_name\n", - " resource_group = workspace_ml_client.resource_group_name\n", - "except Exception as ex:\n", - " print(ex)\n", - " # Enter details of your AML workspace\n", - " subscription_id = \"\"\n", - " resource_group = \"\"\n", - " workspace = \"\"\n", - " workspace_ml_client = MLClient(\n", - " credential, subscription_id, resource_group, workspace\n", - " )\n", - "\n", - "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n", - "preview_registry = \"azureml-staging\"\n", - "registry = \"azureml\"\n", - "\n", - "preview_registry_ml_client = MLClient(\n", - " credential, subscription_id, resource_group, registry_name=preview_registry\n", - ")\n", - "print(preview_registry_ml_client)\n", - "\n", - "registry_ml_client = MLClient(\n", - " credential, subscription_id, resource_group, registry_name=registry\n", - ")\n", - "registry_ml_client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# If you already have a gpu cluster, mention it here. Else will create a new one with the name 'gpu-cluster-big'\n", - "compute_cluster = \"gpu-cluster-big\"\n", - "try:\n", - " compute = workspace_ml_client.compute.get(compute_cluster)\n", - " print(f\"GPU compute '{compute_cluster}' found.\")\n", - "except Exception as ex:\n", - " print(f\"GPU compute '{compute_cluster}' not found. Creating new one.\")\n", - " compute = AmlCompute(\n", - " name=compute_cluster,\n", - " size=\"Standard_ND40rs_v2\",\n", - " max_instances=2, # For multi node training set this to an integer value more than 1\n", - " )\n", - " workspace_ml_client.compute.begin_create_or_update(compute).wait()\n", - "\n", - "# generating a unique timestamp that can be used for names and versions that need to be unique\n", - "timestamp = str(int(time.time()))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Below snippet will allow us to query number of GPU's present on the compute. We can use it to set `gpu_per_node` to ensure utilization of all GPUs in the node." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# This is the number of GPUs in a single node of the selected 'vm_size' compute.\n", - "# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.\n", - "# Setting this to more than the number of GPUs will result in an error.\n", - "gpus_per_node = 1 # default value\n", - "gpu_count_found = False\n", - "ws_computes = workspace_ml_client.compute.list_sizes()\n", - "for ws_compute in ws_computes:\n", - " if ws_compute.name.lower() == compute.size.lower():\n", - " gpus_per_node = ws_compute.gpus\n", - " print(f\"Number of GPUs in compute {ws_compute.name} are {ws_compute.gpus}\")\n", - "# if gpu_count_found not found, then print an error\n", - "if gpus_per_node > 0:\n", - " gpu_count_found = True\n", - "else:\n", - " gpu_count_found = False\n", - " print(f\"No GPUs found in compute. Number of GPUs in compute {compute.size} 0.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 2. Pick the models to evaluate\n", - "\n", - "Verify that the models selected for evaluation are available in system registry" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "gather": { - "logged": 1679319354708 - }, - "jupyter": { - "outputs_hidden": false, - "source_hidden": false - }, - "nteract": { - "transient": { - "deleting": false - } - } - }, - "outputs": [], - "source": [ - "# need to specify model versions until the bug to support fetching the latest version using latest label is fixed\n", - "models = [\n", - " {\"name\": \"distilgpt2\", \"version\": \"1\"},\n", - " {\"name\": \"gpt2\", \"version\": \"1\"},\n", - " {\"name\": \"gpt2-large\", \"version\": \"1\"},\n", - " {\"name\": \"gpt2-medium\", \"version\": \"1\"},\n", - "]\n", - "for model in models:\n", - " model = preview_registry_ml_client.models.get(\n", - " model[\"name\"], version=model[\"version\"]\n", - " )\n", - " print(model.id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 3. Pick the test dataset for evaluation\n", - "A copy of the cnn_dailymail is available in the [text-generation](./text-generation/) folder. The next few cells show basic data preparation:\n", - "* Visualize some data rows\n", - "* We want this sample to run quickly, so we use a smaller dataset containing 10% of the original.\n", - "* To use the entire dataset, uncomment the cells below and run." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "# !pip install datasets" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# from datasets import load_dataset\n", - "\n", - "# hf_test_data = load_dataset('cnn_dailymail')\n", - "\n", - "# hf_test_data['train'].to_pandas().head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "test_data = \"./text-generation/small-test.jsonl\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "pd.read_json(test_data, lines=True).head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4. Submit the evaluation jobs using the model and data as inputs\n", - "\n", - "Create the job that uses the `model_evaluation_pipeline` component. We will submit one job per model.\n", - "\n", - "Note that the metrics that the evaluation jobs need to calculate are specified in the [eval_config.json](./eval_config.json) file. We calculate `rouge1`, `rouge2`, `bleu_3` and `bleu_4` in this sample.\n", - "\n", - "All supported evaluation configurations for `text-generation` can be found in [README](./README.md)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azure.ai.ml.dsl import pipeline\n", - "from azure.ai.ml import Input\n", - "from azure.ai.ml.constants import AssetTypes\n", - "\n", - "# fetch the pipeline component\n", - "pipeline_component_func = registry_ml_client.components.get(\n", - " name=\"model_evaluation_pipeline\", label=\"latest\"\n", - ")\n", - "\n", - "\n", - "# define the pipeline job\n", - "@pipeline()\n", - "def evaluation_pipeline(mlflow_model):\n", - " evaluation_job = pipeline_component_func(\n", - " # specify the foundation model available in the azureml system registry or a model from the workspace\n", - " # mlflow_model = Input(type=AssetTypes.MLFLOW_MODEL, path=f\"{mlflow_model_path}\"),\n", - " mlflow_model=mlflow_model,\n", - " # test data\n", - " test_data=Input(type=AssetTypes.URI_FILE, path=test_data),\n", - " # The following parameters map to the dataset fields\n", - " input_column_names=\"input_string\",\n", - " label_column_name=\"ground_truth\",\n", - " # Evaluation settings\n", - " task=\"text-generation\",\n", - " # config file containing the details of evaluation metrics to calculate\n", - " evaluation_config=Input(type=AssetTypes.URI_FILE, path=\"./eval-config.json\"),\n", - " # config cluster/device job is running on\n", - " # set device to GPU/CPU on basis if GPU count was found\n", - " device=\"gpu\" if gpu_count_found else \"cpu\",\n", - " )\n", - " return {\"evaluation_result\": evaluation_job.outputs.evaluation_result}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Submit the jobs, passing the model as a parameter to the pipeline created in the above step." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "# submit the pipeline job for each model that we want to evaluate\n", - "# you could consider submitting the pipeline jobs in parallel, provided your cluster has multiple nodes\n", - "pipeline_jobs = []\n", - "\n", - "experiment_name = \"text-generation-evaluation\"\n", - "\n", - "for model in models:\n", - " model_object = preview_registry_ml_client.models.get(\n", - " model[\"name\"], version=model[\"version\"]\n", - " )\n", - " pipeline_object = evaluation_pipeline(\n", - " mlflow_model=Input(type=AssetTypes.MLFLOW_MODEL, path=f\"{model_object.id}\"),\n", - " )\n", - " # don't reuse cached results from previous jobs\n", - " pipeline_object.settings.force_rerun = True\n", - " pipeline_object.settings.default_compute = compute_cluster\n", - " pipeline_job = workspace_ml_client.jobs.create_or_update(\n", - " pipeline_object, experiment_name=experiment_name\n", - " )\n", - " # add model['name'] and pipeline_job.name as key value pairs to a dictionary\n", - " pipeline_jobs.append({\"model_name\": model[\"name\"], \"job_name\": pipeline_job.name})\n", - " # wait for the pipeline job to complete\n", - " workspace_ml_client.jobs.stream(pipeline_job.name)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 5. Review evaluation metrics\n", - "Viewing the job in AzureML studio is the best way to analyze logs, metrics and outputs of jobs. You can create custom charts and compare metics across different jobs. See https://learn.microsoft.com/en-us/azure/machine-learning/how-to-log-view-metrics?tabs=interactive#view-jobsruns-information-in-the-studio to learn more.\n", - "\n", - "![Model evaluation dashboard in AzureML studio](./text-generation-eval-dashboard.png)\n", - "\n", - "However, we may need to access and review metrics programmatically for which we will use MLflow, which is the recommended client for logging and querying metrics." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import mlflow, json\n", - "\n", - "mlflow_tracking_uri = workspace_ml_client.workspaces.get(\n", - " workspace_ml_client.workspace_name\n", - ").mlflow_tracking_uri\n", - "mlflow.set_tracking_uri(mlflow_tracking_uri)\n", - "\n", - "metrics_df = pd.DataFrame()\n", - "for job in pipeline_jobs:\n", - " # concat 'tags.mlflow.rootRunId=' and pipeline_job.name in single quotes as filter variable\n", - " filter = \"tags.mlflow.rootRunId='\" + job[\"job_name\"] + \"'\"\n", - " runs = mlflow.search_runs(\n", - " experiment_names=[experiment_name], filter_string=filter, output_format=\"list\"\n", - " )\n", - " # get the compute_metrics runs.\n", - " # using a hacky way till 'Bug 2320997: not able to show eval metrics in FT notebooks - mlflow client now showing display names' is fixed\n", - " for run in runs:\n", - " # else, check if run.data.metrics.accuracy exists\n", - " if \"exact_match\" in run.data.metrics:\n", - " # get the metrics from the mlflow run\n", - " run_metric = run.data.metrics\n", - " # add the model name to the run_metric dictionary\n", - " run_metric[\"model_name\"] = job[\"model_name\"]\n", - " # convert the run_metric dictionary to a pandas dataframe\n", - " temp_df = pd.DataFrame(run_metric, index=[0])\n", - " # concat the temp_df to the metrics_df\n", - " metrics_df = pd.concat([metrics_df, temp_df], ignore_index=True)\n", - "\n", - "# move the model_name columns to the first column\n", - "cols = metrics_df.columns.tolist()\n", - "cols = cols[-1:] + cols[:-1]\n", - "metrics_df = metrics_df[cols]\n", - "metrics_df.head()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3.10 - SDK V2", - "language": "python", - "name": "python310-sdkv2" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.5" - } - }, - "nbformat": 4, - "nbformat_minor": 1 -} diff --git a/sdk/python/foundation-models/system/evaluation/token-classification/README.md b/sdk/python/foundation-models/system/evaluation/token-classification/README.md deleted file mode 100644 index 6f82df11eb..0000000000 --- a/sdk/python/foundation-models/system/evaluation/token-classification/README.md +++ /dev/null @@ -1,22 +0,0 @@ -## Named Entity Recognition - -### List of supported keyword arguments: - -| Keyword Argument | Description | Type | Sample | -|:------------------------:|:-------------------------------------------------------------------------------|-----------|---------------------------------------------------------------| -| metrics | List for subset of metrics to be computed. All supported metrics listed below. | list | ["accuracy", "f1_score_macro", "f1_score_micro"] | -| labels_list | List for supported labels for tokens | list | ["B-PER", "I-PER", "O", "B-LOC", "I-LOC", "B-MISC", "I-MISC"] | -| custom_dimensions | Used to report telemetry data (can later be used to perform PII scrubbing) | dict | | - -### List of supported metrics: - -* f1_score_macro -* precision_score_weighted -* precision_score_macro -* f1_score_weighted -* precision_score_micro -* recall_score_weighted -* f1_score_micro -* accuracy -* recall_score_micro -* recall_score_macro \ No newline at end of file diff --git a/sdk/python/foundation-models/system/evaluation/token-classification/eval-config.json b/sdk/python/foundation-models/system/evaluation/token-classification/eval-config.json deleted file mode 100644 index 360dd40365..0000000000 --- a/sdk/python/foundation-models/system/evaluation/token-classification/eval-config.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "metrics": ["accuracy", "f1_score_macro", "f1_score_micro"] -} \ No newline at end of file diff --git a/sdk/python/foundation-models/system/evaluation/token-classification/news-articles-entity-recognition.ipynb b/sdk/python/foundation-models/system/evaluation/token-classification/news-articles-entity-recognition.ipynb deleted file mode 100644 index a1154858de..0000000000 --- a/sdk/python/foundation-models/system/evaluation/token-classification/news-articles-entity-recognition.ipynb +++ /dev/null @@ -1,450 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Text Named Entity Recognition Evaluation\n", - "\n", - "This sample shows how use the evaluate a group of models against a given set of metrics for the `text-named-entity-recognition` task. \n", - "\n", - "### Evaluation dataset\n", - "The CoNLL-2003 shared task data files contain four columns separated by a single space. Each word has been put on a separate line and there is an empty line after each sentence. The first item on each line is a word, the second a part-of-speech (POS) tag, the third a syntactic chunk tag and the fourth the named entity tag. The chunk tags and the named entity tags have the format I-TYPE which means that the word is inside a phrase of type TYPE. Only if two phrases of the same type immediately follow each other, the first word of the second phrase will have tag B-TYPE to show that it starts a new phrase. A word with tag O is not part of a phrase. Note the dataset uses IOB2 tagging scheme, whereas the original dataset uses IOB1. A copy of the [CoNLL-2003](https://huggingface.co/datasets/conll2003) dataset is available in the [conll2003](./conll2003) folder.\n", - "\n", - "### Model\n", - "The goal of evaluating models is to compare their performance on a variety of metrics. `text-named-entity-recognition` is generic task type that can be used for scenarios to recognise named entities such as persons, locations, organizations, etc. As such, the models you pick to compare must be finetuned for same scenario. Given that we have the CoNLL-2003 dataset, we would like to look for models finetuned for this specific scenario. We will review `jean-baptiste-camembert-ner` in this sample, which is available in the `azureml` system registry.\n", - "\n", - "If you'd like to evaluate models that are not in the system registry, you can import those models to your workspace or organization registry and then evaluate them using the approach outlined in this sample. Review the sample notebook for [importing models](../../import/import-model-from-huggingface.ipynb). \n", - "\n", - "### Outline\n", - "* Setup pre-requisites such as compute.\n", - "* Pick the models to evaluate.\n", - "* Pick and explore evaluate data.\n", - "* Configure the evaluation jobs.\n", - "* Run the evaluation jobs.\n", - "* Review the evaluation metrics. " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 1. Setup pre-requisites\n", - "* Install dependencies\n", - "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace ``, `` and `` below.\n", - "* Connect to `azureml` system registry\n", - "* Set an optional experiment name\n", - "* Check or create compute. A single GPU node can have multiple GPU cards. For example, in one node of `Standard_ND40rs_v2` there are 8 NVIDIA V100 GPUs while in `Standard_NC12s_v3`, there are 2 NVIDIA V100 GPUs. Refer to the [docs](https://learn.microsoft.com/en-us/azure/virtual-machines/sizes-gpu) for this information. The number of GPU cards per node is set in the param `gpus_per_node` below. Setting this value correctly will ensure utilization of all GPUs in the node. The recommended GPU compute SKUs can be found [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ncv3-series) and [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ndv2-series)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Install dependencies by running below cell. This is not an optional step if running in a new environment." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "\n", - "%pip install --upgrade azure-ai-ml\n", - "%pip install --upgrade azure-identity\n", - "%pip install --upgrade datasets==2.9.0" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "gather": { - "logged": 1679319346668 - }, - "jupyter": { - "outputs_hidden": false, - "source_hidden": false - }, - "nteract": { - "transient": { - "deleting": false - } - } - }, - "outputs": [], - "source": [ - "from azure.ai.ml import MLClient\n", - "from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential\n", - "from azure.ai.ml.entities import AmlCompute\n", - "import time\n", - "\n", - "try:\n", - " credential = DefaultAzureCredential()\n", - " credential.get_token(\"https://management.azure.com/.default\")\n", - "except Exception as ex:\n", - " credential = InteractiveBrowserCredential()\n", - "\n", - "workspace_ml_client = None\n", - "try:\n", - " workspace_ml_client = MLClient.from_config(credential)\n", - " subscription_id = workspace_ml_client.subscription_id\n", - " workspace = workspace_ml_client.workspace_name\n", - " resource_group = workspace_ml_client.resource_group_name\n", - "except Exception as ex:\n", - " print(ex)\n", - " # Enter details of your AML workspace\n", - " subscription_id = \"\"\n", - " resource_group = \"\"\n", - " workspace = \"\"\n", - " workspace_ml_client = MLClient(\n", - " credential, subscription_id, resource_group, workspace\n", - " )\n", - "\n", - "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n", - "preview_registry = \"azureml-staging\"\n", - "registry = \"azureml\"\n", - "\n", - "preview_registry_ml_client = MLClient(\n", - " credential, subscription_id, resource_group, registry_name=preview_registry\n", - ")\n", - "print(preview_registry_ml_client)\n", - "\n", - "registry_ml_client = MLClient(\n", - " credential, subscription_id, resource_group, registry_name=registry\n", - ")\n", - "registry_ml_client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# If you already have a gpu cluster, mention it here. Else will create a new one with the name 'gpu-cluster-big'\n", - "compute_cluster = \"gpu-cluster-big\"\n", - "try:\n", - " compute = workspace_ml_client.compute.get(compute_cluster)\n", - " print(f\"GPU compute '{compute_cluster}' found.\")\n", - "except Exception as ex:\n", - " print(f\"GPU compute '{compute_cluster}' not found. Creating new one.\")\n", - " compute = AmlCompute(\n", - " name=compute_cluster,\n", - " size=\"Standard_ND40rs_v2\",\n", - " max_instances=2, # For multi node training set this to an integer value more than 1\n", - " )\n", - " workspace_ml_client.compute.begin_create_or_update(compute).wait()\n", - "\n", - "# generating a unique timestamp that can be used for names and versions that need to be unique\n", - "timestamp = str(int(time.time()))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Below snippet will allow us to query number of GPU's present on the compute. We can use it to set `gpu_per_node` to ensure utilization of all GPUs in the node." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# This is the number of GPUs in a single node of the selected 'vm_size' compute.\n", - "# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.\n", - "# Setting this to more than the number of GPUs will result in an error.\n", - "gpus_per_node = 1 # default value\n", - "gpu_count_found = False\n", - "ws_computes = workspace_ml_client.compute.list_sizes()\n", - "for ws_compute in ws_computes:\n", - " if ws_compute.name.lower() == compute.size.lower():\n", - " gpus_per_node = ws_compute.gpus\n", - " print(f\"Number of GPUs in compute {ws_compute.name} are {ws_compute.gpus}\")\n", - "# if gpu_count_found not found, then print an error\n", - "if gpus_per_node > 0:\n", - " gpu_count_found = True\n", - "else:\n", - " gpu_count_found = False\n", - " print(f\"No GPUs found in compute. Number of GPUs in compute {compute.size} 0.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 2. Pick the models to evaluate\n", - "\n", - "Verify that the models selected for evaluation are available in system registry" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "gather": { - "logged": 1679319354708 - }, - "jupyter": { - "outputs_hidden": false, - "source_hidden": false - }, - "nteract": { - "transient": { - "deleting": false - } - } - }, - "outputs": [], - "source": [ - "# need to specify model versions until the bug to support fetching the latest version using latest label is fixed\n", - "models = [\n", - " {\"name\": \"Jean-Baptiste-camembert-ner\", \"version\": \"1\"},\n", - "]\n", - "for model in models:\n", - " model = preview_registry_ml_client.models.get(\n", - " model[\"name\"], version=model[\"version\"]\n", - " )\n", - " print(model.id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 3. Pick the test dataset for evaluation\n", - "A copy of the CoNLL2003 is available in the [conll2003](./conll2003/) folder. The next few cells show basic data preparation:\n", - "* Visualize some data rows\n", - "* We want this sample to run quickly, so we use a smaller dataset containing 10% of the original.\n", - "* To use the entire dataset, uncomment the cells below and run." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# !pip install datasets" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# from datasets import load_dataset\n", - "\n", - "# hf_test_data = load_dataset('conll2003')\n", - "\n", - "# hf_test_data['train'].to_pandas().head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "test_data = \"./conll2003/small-test.jsonl\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "pd.read_json(test_data, lines=True).head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4. Submit the evaluation jobs using the model and data as inputs\n", - "\n", - "Create the job that uses the `model_evaluation_pipeline` component. We will submit one job per model.\n", - "\n", - "Note that the metrics that the evaluation jobs need to calculate are specified in the [eval_config.json](./eval_config.json) file. We calculate `accuracy`, `f1_score_macro` and `f1_score_micro` in this sample.\n", - "\n", - "All supported evaluation configurations for `text-named-entity-recognition` can be found in [README](./README.md)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azure.ai.ml.dsl import pipeline\n", - "from azure.ai.ml import Input\n", - "from azure.ai.ml.constants import AssetTypes\n", - "\n", - "# fetch the pipeline component\n", - "pipeline_component_func = registry_ml_client.components.get(\n", - " name=\"model_evaluation_pipeline\", label=\"latest\"\n", - ")\n", - "\n", - "\n", - "# define the pipeline job\n", - "@pipeline()\n", - "def evaluation_pipeline(mlflow_model):\n", - " evaluation_job = pipeline_component_func(\n", - " # specify the foundation model available in the azureml system registry or a model from the workspace\n", - " # mlflow_model = Input(type=AssetTypes.MLFLOW_MODEL, path=f\"{mlflow_model_path}\"),\n", - " mlflow_model=mlflow_model,\n", - " # test data\n", - " test_data=Input(type=AssetTypes.URI_FILE, path=test_data),\n", - " # The following parameters map to the dataset fields\n", - " input_column_names=\"input_string\",\n", - " label_column_name=\"ner_tags_str\",\n", - " # Evaluation settings\n", - " task=\"text-named-entity-recognition\",\n", - " # config file containing the details of evaluation metrics to calculate\n", - " evaluation_config=Input(type=AssetTypes.URI_FILE, path=\"./eval-config.json\"),\n", - " # config cluster/device job is running on\n", - " # set device to GPU/CPU on basis if GPU count was found\n", - " device=\"gpu\" if gpu_count_found else \"cpu\",\n", - " )\n", - " return {\"evaluation_result\": evaluation_job.outputs.evaluation_result}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Submit the jobs, passing the model as a parameter to the pipeline created in the above step." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# submit the pipeline job for each model that we want to evaluate\n", - "# you could consider submitting the pipeline jobs in parallel, provided your cluster has multiple nodes\n", - "pipeline_jobs = []\n", - "\n", - "experiment_name = \"text-named-entity-recognition-evaluation\"\n", - "\n", - "for model in models:\n", - " model_object = preview_registry_ml_client.models.get(\n", - " model[\"name\"], version=model[\"version\"]\n", - " )\n", - " pipeline_object = evaluation_pipeline(\n", - " mlflow_model=Input(type=AssetTypes.MLFLOW_MODEL, path=f\"{model_object.id}\"),\n", - " )\n", - " # don't reuse cached results from previous jobs\n", - " pipeline_object.settings.force_rerun = True\n", - " pipeline_object.settings.default_compute = compute_cluster\n", - " pipeline_object.display_name = f\"eval-{model['name']}-{timestamp}\"\n", - " pipeline_job = workspace_ml_client.jobs.create_or_update(\n", - " pipeline_object, experiment_name=experiment_name\n", - " )\n", - " # add model['name'] and pipeline_job.name as key value pairs to a dictionary\n", - " pipeline_jobs.append({\"model_name\": model[\"name\"], \"job_name\": pipeline_job.name})\n", - " # wait for the pipeline job to complete\n", - " workspace_ml_client.jobs.stream(pipeline_job.name)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 5. Review evaluation metrics\n", - "Viewing the job in AzureML studio is the best way to analyze logs, metrics and outputs of jobs. You can create custom charts and compare metics across different jobs. See https://learn.microsoft.com/en-us/azure/machine-learning/how-to-log-view-metrics?tabs=interactive#view-jobsruns-information-in-the-studio to learn more.\n", - "\n", - "![Model evaluation dashboard in AzureML studio](./text-named-entity-recognition-eval-dashboard.png)\n", - "\n", - "However, we may need to access and review metrics programmatically for which we will use MLflow, which is the recommended client for logging and querying metrics." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import mlflow, json\n", - "\n", - "mlflow_tracking_uri = workspace_ml_client.workspaces.get(\n", - " workspace_ml_client.workspace_name\n", - ").mlflow_tracking_uri\n", - "mlflow.set_tracking_uri(mlflow_tracking_uri)\n", - "\n", - "metrics_df = pd.DataFrame()\n", - "for job in pipeline_jobs:\n", - " # concat 'tags.mlflow.rootRunId=' and pipeline_job.name in single quotes as filter variable\n", - " filter = \"tags.mlflow.rootRunId='\" + job[\"job_name\"] + \"'\"\n", - " runs = mlflow.search_runs(\n", - " experiment_names=[experiment_name], filter_string=filter, output_format=\"list\"\n", - " )\n", - " # get the compute_metrics runs.\n", - " # using a hacky way till 'Bug 2320997: not able to show eval metrics in FT notebooks - mlflow client now showing display names' is fixed\n", - " for run in runs:\n", - " # else, check if run.data.metrics.accuracy exists\n", - " if \"accuracy\" in run.data.metrics:\n", - " # get the metrics from the mlflow run\n", - " run_metric = run.data.metrics\n", - " # add the model name to the run_metric dictionary\n", - " run_metric[\"model_name\"] = job[\"model_name\"]\n", - " # convert the run_metric dictionary to a pandas dataframe\n", - " temp_df = pd.DataFrame(run_metric, index=[0])\n", - " # concat the temp_df to the metrics_df\n", - " metrics_df = pd.concat([metrics_df, temp_df], ignore_index=True)\n", - "\n", - "# move the model_name columns to the first column\n", - "cols = metrics_df.columns.tolist()\n", - "cols = cols[-1:] + cols[:-1]\n", - "metrics_df = metrics_df[cols]\n", - "metrics_df.head()" - ] - } - ], - "metadata": { - "kernel_info": { - "name": "python310-sdkv2" - }, - "kernelspec": { - "display_name": "Python 3.10 - SDK V2", - "language": "python", - "name": "python310-sdkv2" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.9" - }, - "microsoft": { - "host": { - "AzureML": { - "notebookHasBeenCompleted": true - } - }, - "ms_spell_check": { - "ms_spell_check_language": "en" - } - }, - "nteract": { - "version": "nteract-front-end@1.0.0" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/sdk/python/foundation-models/system/evaluation/token-classification/text-named-entity-recognition-eval-dashboard.png b/sdk/python/foundation-models/system/evaluation/token-classification/text-named-entity-recognition-eval-dashboard.png deleted file mode 100644 index 088d148de6..0000000000 Binary files a/sdk/python/foundation-models/system/evaluation/token-classification/text-named-entity-recognition-eval-dashboard.png and /dev/null differ diff --git a/sdk/python/foundation-models/system/evaluation/translation/README.md b/sdk/python/foundation-models/system/evaluation/translation/README.md deleted file mode 100644 index 6c6c0383e8..0000000000 --- a/sdk/python/foundation-models/system/evaluation/translation/README.md +++ /dev/null @@ -1,17 +0,0 @@ -## Translation - -### List of supported keyword arguments: - -| Keyword Argument | Description | Type | Sample | -|:-----------------:|:-------------------------------------------------------------------------------|-----------|------------------------------------------| -| metrics | List for subset of metrics to be computed. All supported metrics listed below. | list | ["bleu_1", "bleu_2", "bleu_3", "bleu_4"] | -| tokenizer | Tokenizer object to perform tokenization on provided input text | | | -| smoothing | Boolean flag to indicate if bleu score needs to be smoothened | boolean | false, true | -| custom_dimensions | Used to report telemetry data (can later be used to perform PII scrubbing) | dict | | - -### List of supported metrics: - -* bleu_1 -* bleu_2 -* bleu_3 -* bleu_4 \ No newline at end of file diff --git a/sdk/python/foundation-models/system/evaluation/translation/eval-config.json b/sdk/python/foundation-models/system/evaluation/translation/eval-config.json deleted file mode 100644 index 213d60cd45..0000000000 --- a/sdk/python/foundation-models/system/evaluation/translation/eval-config.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "metrics": ["bleu_1", "bleu_2", "bleu_3", "bleu_4"], - "smoothing": false -} \ No newline at end of file diff --git a/sdk/python/foundation-models/system/evaluation/translation/text-translation-eval-dashboard.png b/sdk/python/foundation-models/system/evaluation/translation/text-translation-eval-dashboard.png deleted file mode 100644 index 24a97420d0..0000000000 Binary files a/sdk/python/foundation-models/system/evaluation/translation/text-translation-eval-dashboard.png and /dev/null differ diff --git a/sdk/python/foundation-models/system/evaluation/translation/translation-romanian-to-english.ipynb b/sdk/python/foundation-models/system/evaluation/translation/translation-romanian-to-english.ipynb deleted file mode 100644 index 19b81a66ad..0000000000 --- a/sdk/python/foundation-models/system/evaluation/translation/translation-romanian-to-english.ipynb +++ /dev/null @@ -1,454 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Text Translation Evaluation - Translation between Romanian and English\n", - "\n", - "This sample shows how use the evaluate a group of models against a given set of metrics for the `text-translation` task. \n", - "\n", - "### Evaluation dataset\n", - "Translation dataset based on the data from statmt.org. Versions exist for different years using a combination of data sources. The base wmt allows you to create a custom dataset by choosing your own data/language pair. A copy of the [wmt16/ro-en](https://huggingface.co/datasets/wmt16/viewer/ro-en) dataset is available in the [wmt16_ro-en](./wmt16_ro-en) folder.\n", - "\n", - "### Model\n", - "The goal of evaluating models is to compare their performance on a variety of metrics. `text-translation` is generic task type that can be used for translation between two languages. As such, the models you pick to compare must be finetuned for same scenario. Given that we have the WMT16-RO-EN dataset, we would like to look for models finetuned for this specific scenario. We will compare `t5-base`, `t5-small` and `t5-large` in this sample, which are available in the `azureml` system registry.\n", - "\n", - "If you'd like to evaluate models that are not in the system registry, you can import those models to your workspace or organization registry and then evaluate them using the approach outlined in this sample. Review the sample notebook for [importing models](../../import/import-model-from-huggingface.ipynb). \n", - "\n", - "### Outline\n", - "* Setup pre-requisites such as compute.\n", - "* Pick the models to evaluate.\n", - "* Pick and explore evaluate data.\n", - "* Configure the evaluation jobs.\n", - "* Run the evaluation jobs.\n", - "* Review the evaluation metrics. " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 1. Setup pre-requisites\n", - "* Install dependencies\n", - "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace ``, `` and `` below.\n", - "* Connect to `azureml` system registry\n", - "* Set an optional experiment name\n", - "* Check or create compute. A single GPU node can have multiple GPU cards. For example, in one node of `Standard_ND40rs_v2` there are 8 NVIDIA V100 GPUs while in `Standard_NC12s_v3`, there are 2 NVIDIA V100 GPUs. Refer to the [docs](https://learn.microsoft.com/en-us/azure/virtual-machines/sizes-gpu) for this information. The number of GPU cards per node is set in the param `gpus_per_node` below. Setting this value correctly will ensure utilization of all GPUs in the node. The recommended GPU compute SKUs can be found [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ncv3-series) and [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ndv2-series)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Install dependencies by running below cell. This is not an optional step if running in a new environment." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "\n", - "%pip install --upgrade azure-ai-ml\n", - "%pip install --upgrade azure-identity\n", - "%pip install --upgrade datasets==2.9.0" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "gather": { - "logged": 1679319346668 - }, - "jupyter": { - "outputs_hidden": false, - "source_hidden": false - }, - "nteract": { - "transient": { - "deleting": false - } - } - }, - "outputs": [], - "source": [ - "from azure.ai.ml import MLClient\n", - "from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential\n", - "from azure.ai.ml.entities import AmlCompute\n", - "import time\n", - "\n", - "try:\n", - " credential = DefaultAzureCredential()\n", - " credential.get_token(\"https://management.azure.com/.default\")\n", - "except Exception as ex:\n", - " credential = InteractiveBrowserCredential()\n", - "\n", - "workspace_ml_client = None\n", - "try:\n", - " workspace_ml_client = MLClient.from_config(credential)\n", - " subscription_id = workspace_ml_client.subscription_id\n", - " workspace = workspace_ml_client.workspace_name\n", - " resource_group = workspace_ml_client.resource_group_name\n", - "except Exception as ex:\n", - " print(ex)\n", - " # Enter details of your AML workspace\n", - " subscription_id = \"\"\n", - " resource_group = \"\"\n", - " workspace = \"\"\n", - " workspace_ml_client = MLClient(\n", - " credential, subscription_id, resource_group, workspace\n", - " )\n", - "\n", - "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n", - "preview_registry = \"azureml-staging\"\n", - "registry = \"azureml\"\n", - "\n", - "preview_registry_ml_client = MLClient(\n", - " credential, subscription_id, resource_group, registry_name=preview_registry\n", - ")\n", - "print(preview_registry_ml_client)\n", - "\n", - "registry_ml_client = MLClient(\n", - " credential, subscription_id, resource_group, registry_name=registry\n", - ")\n", - "registry_ml_client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# If you already have a gpu cluster, mention it here. Else will create a new one with the name 'gpu-cluster-big'\n", - "compute_cluster = \"gpu-cluster-big\"\n", - "try:\n", - " compute = workspace_ml_client.compute.get(compute_cluster)\n", - " print(f\"GPU compute '{compute_cluster}' found.\")\n", - "except Exception as ex:\n", - " print(f\"GPU compute '{compute_cluster}' not found. Creating new one.\")\n", - " compute = AmlCompute(\n", - " name=compute_cluster,\n", - " size=\"Standard_ND40rs_v2\",\n", - " max_instances=2, # For multi node training set this to an integer value more than 1\n", - " )\n", - " workspace_ml_client.compute.begin_create_or_update(compute).wait()\n", - "\n", - "# generating a unique timestamp that can be used for names and versions that need to be unique\n", - "timestamp = str(int(time.time()))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Below snippet will allow us to query number of GPU's present on the compute. We can use it to set `gpu_per_node` to ensure utilization of all GPUs in the node." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# This is the number of GPUs in a single node of the selected 'vm_size' compute.\n", - "# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.\n", - "# Setting this to more than the number of GPUs will result in an error.\n", - "gpus_per_node = 1 # default value\n", - "gpu_count_found = False\n", - "ws_computes = workspace_ml_client.compute.list_sizes()\n", - "for ws_compute in ws_computes:\n", - " if ws_compute.name.lower() == compute.size.lower():\n", - " gpus_per_node = ws_compute.gpus\n", - " print(f\"Number of GPUs in compute {ws_compute.name} are {ws_compute.gpus}\")\n", - "# if gpu_count_found not found, then print an error\n", - "if gpus_per_node > 0:\n", - " gpu_count_found = True\n", - "else:\n", - " gpu_count_found = False\n", - " print(f\"No GPUs found in compute. Number of GPUs in compute {compute.size} 0.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 2. Pick the models to evaluate\n", - "\n", - "Verify that the models selected for evaluation are available in system registry" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "gather": { - "logged": 1679319354708 - }, - "jupyter": { - "outputs_hidden": false, - "source_hidden": false - }, - "nteract": { - "transient": { - "deleting": false - } - } - }, - "outputs": [], - "source": [ - "# need to specify model versions until the bug to support fetching the latest version using latest label is fixed\n", - "models = [\n", - " {\"name\": \"t5-base\", \"version\": \"1\"},\n", - " {\"name\": \"t5-large\", \"version\": \"1\"},\n", - " {\"name\": \"t5-small\", \"version\": \"1\"},\n", - "]\n", - "for model in models:\n", - " model = preview_registry_ml_client.models.get(\n", - " model[\"name\"], version=model[\"version\"]\n", - " )\n", - " print(model.id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 3. Pick the test dataset for evaluation\n", - "A copy of the wmt16/ro-en is available in the [wmt16/ro-en](./wmt16_ro-en/) folder. The next few cells show basic data preparation:\n", - "* Visualize some data rows\n", - "* We want this sample to run quickly, so we use a smaller dataset containing 10% of the original.\n", - "* To use the entire dataset, uncomment the cells below and run." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# !pip install datasets" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# from datasets import load_dataset\n", - "\n", - "# hf_test_data = load_dataset('wmt16', 'ro-en')\n", - "\n", - "# hf_test_data['train'].to_pandas().head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "test_data = \"./wmt16_ro-en/small-test.jsonl\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "pd.read_json(test_data, lines=True).head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4. Submit the evaluation jobs using the model and data as inputs\n", - "\n", - "Create the job that uses the `model_evaluation_pipeline` component. We will submit one job per model.\n", - "\n", - "Note that the metrics that the evaluation jobs need to calculate are specified in the [eval_config.json](./eval_config.json) file. We calculate `bleu_1`, `bleu_2`, `bleu_3` and `bleu_4` in this sample.\n", - "\n", - "All supported evaluation configurations for `text-translation` can be found in [README](./README.md)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azure.ai.ml.dsl import pipeline\n", - "from azure.ai.ml import Input\n", - "from azure.ai.ml.constants import AssetTypes\n", - "\n", - "# fetch the pipeline component\n", - "pipeline_component_func = registry_ml_client.components.get(\n", - " name=\"model_evaluation_pipeline\", label=\"latest\"\n", - ")\n", - "\n", - "\n", - "# define the pipeline job\n", - "@pipeline()\n", - "def evaluation_pipeline(mlflow_model):\n", - " evaluation_job = pipeline_component_func(\n", - " # specify the foundation model available in the azureml system registry or a model from the workspace\n", - " # mlflow_model = Input(type=AssetTypes.MLFLOW_MODEL, path=f\"{mlflow_model_path}\"),\n", - " mlflow_model=mlflow_model,\n", - " # test data\n", - " test_data=Input(type=AssetTypes.URI_FILE, path=test_data),\n", - " # The following parameters map to the dataset fields\n", - " input_column_names=\"input_string\",\n", - " label_column_name=\"ro\",\n", - " # Evaluation settings\n", - " task=\"text-translation\",\n", - " # config file containing the details of evaluation metrics to calculate\n", - " evaluation_config=Input(type=AssetTypes.URI_FILE, path=\"./eval-config.json\"),\n", - " # config cluster/device job is running on\n", - " # set device to GPU/CPU on basis if GPU count was found\n", - " device=\"gpu\" if gpu_count_found else \"cpu\",\n", - " )\n", - " return {\"evaluation_result\": evaluation_job.outputs.evaluation_result}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Submit the jobs, passing the model as a parameter to the pipeline created in the above step." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "# submit the pipeline job for each model that we want to evaluate\n", - "# you could consider submitting the pipeline jobs in parallel, provided your cluster has multiple nodes\n", - "pipeline_jobs = []\n", - "\n", - "experiment_name = \"text-translation-evaluation\"\n", - "\n", - "for model in models:\n", - " model_object = preview_registry_ml_client.models.get(\n", - " model[\"name\"], version=model[\"version\"]\n", - " )\n", - " pipeline_object = evaluation_pipeline(\n", - " mlflow_model=Input(type=AssetTypes.MLFLOW_MODEL, path=f\"{model_object.id}\"),\n", - " )\n", - " # don't reuse cached results from previous jobs\n", - " pipeline_object.settings.force_rerun = True\n", - " pipeline_object.settings.default_compute = compute_cluster\n", - " pipeline_object.display_name = f\"eval-{model['name']}-{timestamp}\"\n", - " pipeline_job = workspace_ml_client.jobs.create_or_update(\n", - " pipeline_object, experiment_name=experiment_name\n", - " )\n", - " # add model['name'] and pipeline_job.name as key value pairs to a dictionary\n", - " pipeline_jobs.append({\"model_name\": model[\"name\"], \"job_name\": pipeline_job.name})\n", - " # wait for the pipeline job to complete\n", - " workspace_ml_client.jobs.stream(pipeline_job.name)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 5. Review evaluation metrics\n", - "Viewing the job in AzureML studio is the best way to analyze logs, metrics and outputs of jobs. You can create custom charts and compare metics across different jobs. See https://learn.microsoft.com/en-us/azure/machine-learning/how-to-log-view-metrics?tabs=interactive#view-jobsruns-information-in-the-studio to learn more.\n", - "\n", - "![Model evaluation dashboard in AzureML studio](./text-translation-eval-dashboard.png)\n", - "\n", - "However, we may need to access and review metrics programmatically for which we will use MLflow, which is the recommended client for logging and querying metrics." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import mlflow, json\n", - "\n", - "mlflow_tracking_uri = workspace_ml_client.workspaces.get(\n", - " workspace_ml_client.workspace_name\n", - ").mlflow_tracking_uri\n", - "mlflow.set_tracking_uri(mlflow_tracking_uri)\n", - "\n", - "metrics_df = pd.DataFrame()\n", - "for job in pipeline_jobs:\n", - " # concat 'tags.mlflow.rootRunId=' and pipeline_job.name in single quotes as filter variable\n", - " filter = \"tags.mlflow.rootRunId='\" + job[\"job_name\"] + \"'\"\n", - " runs = mlflow.search_runs(\n", - " experiment_names=[experiment_name], filter_string=filter, output_format=\"list\"\n", - " )\n", - " # get the compute_metrics runs.\n", - " # using a hacky way till 'Bug 2320997: not able to show eval metrics in FT notebooks - mlflow client now showing display names' is fixed\n", - " for run in runs:\n", - " # else, check if run.data.metrics.accuracy exists\n", - " if \"bleu_1\" in run.data.metrics:\n", - " # get the metrics from the mlflow run\n", - " run_metric = run.data.metrics\n", - " # add the model name to the run_metric dictionary\n", - " run_metric[\"model_name\"] = job[\"model_name\"]\n", - " # convert the run_metric dictionary to a pandas dataframe\n", - " temp_df = pd.DataFrame(run_metric, index=[0])\n", - " # concat the temp_df to the metrics_df\n", - " metrics_df = pd.concat([metrics_df, temp_df], ignore_index=True)\n", - "\n", - "# move the model_name columns to the first column\n", - "cols = metrics_df.columns.tolist()\n", - "cols = cols[-1:] + cols[:-1]\n", - "metrics_df = metrics_df[cols]\n", - "metrics_df.head()" - ] - } - ], - "metadata": { - "kernel_info": { - "name": "python310-sdkv2" - }, - "kernelspec": { - "display_name": "Python 3.10 - SDK V2", - "language": "python", - "name": "python310-sdkv2" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.9" - }, - "microsoft": { - "host": { - "AzureML": { - "notebookHasBeenCompleted": true - } - }, - "ms_spell_check": { - "ms_spell_check_language": "en" - } - }, - "nteract": { - "version": "nteract-front-end@1.0.0" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/sdk/python/foundation-models/system/finetune/question-answering/extractive-qa.ipynb b/sdk/python/foundation-models/system/finetune/question-answering/extractive-qa.ipynb deleted file mode 100644 index bda741c304..0000000000 --- a/sdk/python/foundation-models/system/finetune/question-answering/extractive-qa.ipynb +++ /dev/null @@ -1,625 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Question Answering - Extractive Q&A with the SQUAD (Wikipedia Q&A) dataset\n", - "\n", - "This sample shows how to use `question-answering` components from the `azureml` system registry to fine tune a model to extract answers from a given context using the SQUAD dataset. We then deploy it to an online endpoint for real time inference. The model is trained on tiny sample of the dataset with a small number of epochs to illustrate the fine tuning approach.\n", - "\n", - "### Training data\n", - "We will use the [SQUAD](https://huggingface.co/datasets/squad) dataset. A copy of this dataset is available in the [squad-dataset](./squad-dataset/) folder for easy access. The [original source](https://rajpurkar.github.io/SQuAD-explorer/) of dataset describes it as follows: _\"Stanford Question Answering Dataset (SQuAD) is a reading comprehension dataset, consisting of questions posed by crowdworkers on a set of Wikipedia articles, where the answer to every question is a segment of text, or span, from the corresponding reading passage, or the question might be unanswerable.\"_\n", - "\n", - "### Model\n", - "Models that can perform the `fill-mask` task are generally good foundation models to fine tune for `question-answering`, specifically the extractive Q&A type. We will use the `bert-base-uncased` model in this notebook. If you opened this notebook from a specific model card, remember to replace the specific model name. Optionally, if you need to fine tune a model that is available on HuggingFace, but not available in `azureml` system registry, you can either [import](https://github.com/Azure/azureml-examples) the model or use the `huggingface_id` parameter instruct the components to pull the model directly from HuggingFace. \n", - "\n", - "### Outline\n", - "* Setup pre-requisites such as compute.\n", - "* Pick a model to fine tune.\n", - "* Pick and explore training data.\n", - "* Configure the fine tuning job.\n", - "* Run the fine tuning job.\n", - "* Register the fine tuned model. \n", - "* Deploy the fine tuned model for real time inference.\n", - "* Clean up resources. \n", - "\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 1. Setup pre-requisites\n", - "* Install dependencies\n", - "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace ``, `` and `` below.\n", - "* Connect to `azureml` system registry\n", - "* Set an optional experiment name\n", - "* Check or create compute. A single GPU node can have multiple GPU cards. For example, in one node of `Standard_ND40rs_v2` there are 8 NVIDIA V100 GPUs while in `Standard_NC12s_v3`, there are 2 NVIDIA V100 GPUs. Refer to the [docs](https://learn.microsoft.com/en-us/azure/virtual-machines/sizes-gpu) for this information. The number of GPU cards per node is set in the param `gpus_per_node` below. Setting this value correctly will ensure utilization of all GPUs in the node. The recommended GPU compute SKUs can be found [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ncv3-series) and [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ndv2-series)." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Install dependencies by running below cell. This is not an optional step if running in a new environment." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%pip install azure-ai-ml\n", - "%pip install azure-identity\n", - "%pip install datasets==2.9.0\n", - "%pip install mlflow\n", - "%pip install azureml-mlflow" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azure.ai.ml import MLClient\n", - "from azure.identity import (\n", - " DefaultAzureCredential,\n", - " InteractiveBrowserCredential,\n", - " ClientSecretCredential,\n", - ")\n", - "from azure.ai.ml.entities import AmlCompute\n", - "import time\n", - "\n", - "try:\n", - " credential = DefaultAzureCredential()\n", - " credential.get_token(\"https://management.azure.com/.default\")\n", - "except Exception as ex:\n", - " credential = InteractiveBrowserCredential()\n", - "\n", - "try:\n", - " workspace_ml_client = MLClient.from_config(credential=credential)\n", - "except:\n", - " workspace_ml_client = MLClient(\n", - " credential,\n", - " subscription_id=\"\",\n", - " resource_group_name=\"\",\n", - " workspace_name=\"\",\n", - " )\n", - "\n", - "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n", - "registry_ml_client = MLClient(credential, registry_name=\"azureml\")\n", - "\n", - "experiment_name = \"question-answering-extractive-qna\"\n", - "\n", - "# If you already have a gpu cluster, mention it here. Else will create a new one with the name 'gpu-cluster-big'\n", - "compute_cluster = \"gpu-cluster-big\"\n", - "try:\n", - " compute = workspace_ml_client.compute.get(compute_cluster)\n", - "except Exception as ex:\n", - " compute = AmlCompute(\n", - " name=compute_cluster,\n", - " size=\"Standard_ND40rs_v2\",\n", - " max_instances=2, # For multi node training set this to an integer value more than 1\n", - " )\n", - " workspace_ml_client.compute.begin_create_or_update(compute).wait()\n", - "\n", - "# This is the number of GPUs in a single node of the selected 'vm_size' compute.\n", - "# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.\n", - "# Setting this to more than the number of GPUs will result in an error.\n", - "gpu_count_found = False\n", - "workspace_compute_sku_list = workspace_ml_client.compute.list_sizes()\n", - "available_sku_sizes = []\n", - "for compute_sku in workspace_compute_sku_list:\n", - " available_sku_sizes.append(compute_sku.name)\n", - " if compute_sku.name.lower() == compute.size.lower():\n", - " gpus_per_node = compute_sku.gpus\n", - " gpu_count_found = True\n", - "# if gpu_count_found not found, then print an error\n", - "if gpu_count_found:\n", - " print(f\"Number of GPU's in copute {compute.size}: {gpus_per_node}\")\n", - "else:\n", - " raise ValueError(\n", - " f\"Number of GPU's in copute {compute.size} not found. Available skus are: {available_sku_sizes}.\"\n", - " f\"This should not happen. Please check the selected compute cluster: {compute_cluster} and try again.\"\n", - " )\n", - "# CPU based finetune works only for single-node single-process\n", - "if gpus_per_node == 0:\n", - " print(\n", - " \"WARNING! Selected compute doesn't have GPU. CPU based finetune is experimental and works on a single process in a single node\"\n", - " )\n", - " gpus_per_node = 1\n", - "\n", - "# genrating a unique timestamp that can be used for names and versions that need to be unique\n", - "timestamp = str(int(time.time()))" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 2. Pick a foundation model to fine tune\n", - "\n", - "Models that support `fill-mask` tasks are good candidates to fine tune for extractive Q&A style `question answering`. You can browse these models in the Model Catalog in the AzureML Studio, filtering by the `fill-mask` task. In this example, we use the `bert-base-uncased` model. If you have opened this notebook for a different model, replace the model name and version accordingly. \n", - "\n", - "Note the model id property of the model. This will be passed as input to the fine tuning job. This is also available as the `Asset ID` field in model details page in AzureML Studio Model Catalog. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "model_name = \"bert-base-uncased\"\n", - "model_version = \"1\"\n", - "foundation_model = registry_ml_client.models.get(model_name, model_version)\n", - "print(\n", - " \"\\n\\nUsing model name: {0}, version: {1}, id: {2} for fine tuning\".format(\n", - " foundation_model.name, foundation_model.version, foundation_model.id\n", - " )\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 3. Pick the dataset for fine-tuning the model\n", - "\n", - "A copy of the SQUAD dataset is available in the [squad-dataset](./squad-dataset/) folder. The next few cells show basic data preparation for fine tuning:\n", - "* Visualize some data rows. Take note of the dataset fields: `question`, `context`, `answers`, `id` and `title`. The `answers` field has `start_key` and `text` fields in json format inside the `answers` field . The keys `question` and `context`, `answers`, `answer_start` and `text` are the relevant fields that need to be mapped to the parameters of the fine tuning pipeline.\n", - "* The dataset does not have a test split, split test into two halves, one for test and other for validation.\n", - "* We want this sample to run quickly, so save smaller `train` and `validation` files containing 5% of the original. This means the fine tuned model will have lower accuracy, hence it should not be put to real-world use. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# load the train.jsonl and validation.jsonl files from the ./squad-dataset/ folder and show first 5 rows\n", - "import pandas as pd\n", - "\n", - "pd.set_option(\n", - " \"display.max_colwidth\", 0\n", - ") # set the max column width to 0 to display the full text\n", - "train_df = pd.read_json(\"squad-dataset/train.jsonl\", lines=True)\n", - "validation_df = pd.read_json(\"squad-dataset/validation.jsonl\", lines=True)\n", - "train_df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# save 5% of the rows from the train dataframe into files with small_ prefix in the ./squad-dataset folder\n", - "train_df.sample(frac=0.05).to_json(\n", - " \"./squad-dataset/small_train.jsonl\", orient=\"records\", lines=True\n", - ")\n", - "# the original dataset does not have a test split, so split the validation dataframe into validation and test dataframes equally\n", - "validation_df, test_df = (\n", - " validation_df[: len(validation_df) // 2],\n", - " validation_df[len(validation_df) // 2 :],\n", - ")\n", - "# save 5% of the rows from the validation and test dataframes into files with small_ prefix in the ./squad-dataset folder\n", - "validation_df.sample(frac=0.05).to_json(\n", - " \"./squad-dataset/small_validation.jsonl\", orient=\"records\", lines=True\n", - ")\n", - "test_df.sample(frac=0.05).to_json(\n", - " \"./squad-dataset/small_test.jsonl\", orient=\"records\", lines=True\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4. Submit the fine tuning job using the the model and data as inputs\n", - " \n", - "Create the job that uses the `question-answering` pipeline component. [Learn more]() about all the parameters supported for fine tuning." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azure.ai.ml.dsl import pipeline\n", - "from azure.ai.ml.entities import CommandComponent, PipelineComponent, Job, Component\n", - "from azure.ai.ml import PyTorchDistribution, Input\n", - "\n", - "# fetch the pipeline component\n", - "pipeline_component_func = registry_ml_client.components.get(\n", - " name=\"question_answering_pipeline\", label=\"latest\"\n", - ")\n", - "\n", - "\n", - "# define the pipeline job\n", - "@pipeline()\n", - "def create_pipeline():\n", - " finetuning_job = pipeline_component_func(\n", - " # specify the foundation model available in the azureml system registry id identified in step #3\n", - " mlflow_model_path=foundation_model.id,\n", - " # huggingface_id = 'bert-base-uncased', # if you want to use a huggingface model, uncomment this line and comment the above line\n", - " compute_model_import=compute_cluster,\n", - " compute_preprocess=compute_cluster,\n", - " compute_finetune=compute_cluster,\n", - " compute_model_evaluation=compute_cluster,\n", - " # map the dataset splits to parameters\n", - " train_file_path=Input(\n", - " type=\"uri_file\", path=\"./squad-dataset/small_train.jsonl\"\n", - " ),\n", - " validation_file_path=Input(\n", - " type=\"uri_file\", path=\"./squad-dataset/small_validation.jsonl\"\n", - " ),\n", - " test_file_path=Input(type=\"uri_file\", path=\"./squad-dataset/small_test.jsonl\"),\n", - " evaluation_config=Input(\n", - " type=\"uri_file\", path=\"./question-answering-config.json\"\n", - " ),\n", - " # The following parameters map to the dataset fields\n", - " # the question whose answer needs to be extracted from the provided context\n", - " # question_key parameter maps to the \"question\" field in the SQuAD dataset\n", - " question_key=\"question\",\n", - " # the context that contains the answer to the question\n", - " # context_key parameter maps to the \"context\" field in the SQuAD dataset\n", - " context_key=\"context\",\n", - " # The value of this field is text in json format with two nested keys, answer_start_key and answer_text_key with their corresponding values\n", - " # answers_key parameter maps to the \"answers\" field in the SQuAD dataset\n", - " answers_key=\"answers\",\n", - " # Refers to the position where the answer beings in context. Needs a value that maps to a nested key in the values of the answers_key parameter.\n", - " # in the SQuAD dataset, the answer_start_key maps \"answer_start\" under \"answer\"\n", - " answer_start_key=\"answer_start\",\n", - " # Contains the answer to the question. Needs a value that maps to a nested key in the values of the answers_key parameter\n", - " # in the SQuAD dataset, the answer_text_key maps to \"text\" under \"answer\"\n", - " answer_text_key=\"text\",\n", - " # training settings\n", - " number_of_gpu_to_use_finetuning=gpus_per_node, # set to the number of GPUs available in the compute\n", - " num_train_epochs=2,\n", - " learning_rate=2e-5,\n", - " )\n", - " return {\n", - " # map the output of the fine tuning job to the output of the pipeline job so that we can easily register the fine tuned model\n", - " # registering the model is required to deploy the model to an online or batch endpoint\n", - " \"trained_model\": finetuning_job.outputs.mlflow_model_folder\n", - " }\n", - "\n", - "\n", - "pipeline_object = create_pipeline()\n", - "\n", - "# don't use cached results from previous jobs\n", - "pipeline_object.settings.force_rerun = True" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Submit the job" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# submit the pipeline job\n", - "pipeline_job = workspace_ml_client.jobs.create_or_update(\n", - " pipeline_object, experiment_name=experiment_name\n", - ")\n", - "# wait for the pipeline job to complete\n", - "workspace_ml_client.jobs.stream(pipeline_job.name)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 5. Review training and evaluation metrics\n", - "Viewing the job in AzureML studio is the best way to analyze logs, metrics and outputs of jobs. You can create custom charts and compare metics across different jobs. See https://learn.microsoft.com/en-us/azure/machine-learning/how-to-log-view-metrics?tabs=interactive#view-jobsruns-information-in-the-studio to learn more. \n", - "\n", - "However, we may need to access and review metrics programmatically for which we will use MLflow, which is the recommended client for logging and querying metrics." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import mlflow, json\n", - "\n", - "mlflow_tracking_uri = workspace_ml_client.workspaces.get(\n", - " workspace_ml_client.workspace_name\n", - ").mlflow_tracking_uri\n", - "mlflow.set_tracking_uri(mlflow_tracking_uri)\n", - "# concat 'tags.mlflow.rootRunId=' and pipeline_job.name in single quotes as filter variable\n", - "filter = \"tags.mlflow.rootRunId='\" + pipeline_job.name + \"'\"\n", - "runs = mlflow.search_runs(\n", - " experiment_names=[experiment_name], filter_string=filter, output_format=\"list\"\n", - ")\n", - "training_run = None\n", - "evaluation_run = None\n", - "# get the training and evaluation runs.\n", - "# using a hacky way till 'Bug 2320997: not able to show eval metrics in FT notebooks - mlflow client now showing display names' is fixed\n", - "for run in runs:\n", - " # check if run.data.metrics.epoch exists\n", - " if \"epoch\" in run.data.metrics:\n", - " training_run = run\n", - " # else, check if run.data.metrics.accuracy exists\n", - " elif \"exact_match\" in run.data.metrics:\n", - " evaluation_run = run" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "if training_run:\n", - " print(\"Training metrics:\\n\\n\")\n", - " print(json.dumps(training_run.data.metrics, indent=2))\n", - "else:\n", - " print(\"No Training job found\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "if evaluation_run:\n", - " print(\"Evaluation metrics:\\n\\n\")\n", - " print(json.dumps(evaluation_run.data.metrics, indent=2))\n", - "else:\n", - " print(\"No Evaluation job found\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 6. Register the fine tuned model with the workspace\n", - "\n", - "We will register the model from the output of the fine tuning job. This will track lineage between the fine tuned model and the fine tuning job. The fine tuning job, further, tracks lineage to the foundation model, data and training code." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azure.ai.ml.entities import Model\n", - "from azure.ai.ml.constants import AssetTypes\n", - "\n", - "# check if the `trained_model` output is available\n", - "print(\"pipeline job outputs: \", workspace_ml_client.jobs.get(pipeline_job.name).outputs)\n", - "\n", - "# fetch the model from pipeline job output - not working, hence fetching from fine tune child job\n", - "model_path_from_job = \"azureml://jobs/{0}/outputs/{1}\".format(\n", - " pipeline_job.name, \"trained_model\"\n", - ")\n", - "\n", - "finetuned_model_name = model_name + \"-extractive-qna\"\n", - "finetuned_model_name = finetuned_model_name.replace(\"/\", \"-\")\n", - "print(\"path to register model: \", model_path_from_job)\n", - "prepare_to_register_model = Model(\n", - " path=model_path_from_job,\n", - " type=AssetTypes.MLFLOW_MODEL,\n", - " name=finetuned_model_name,\n", - " version=timestamp, # use timestamp as version to avoid version conflict\n", - " description=model_name + \" fine tuned model for extractive Q&A\",\n", - ")\n", - "print(\"prepare to register model: \\n\", prepare_to_register_model)\n", - "# register the model from pipeline job output\n", - "registered_model = workspace_ml_client.models.create_or_update(\n", - " prepare_to_register_model\n", - ")\n", - "print(\"registered model: \\n\", registered_model)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 7. Deploy the fine tuned model to an online endpoint\n", - "Online endpoints give a durable REST API that can be used to integrate with applications that need to use the model." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import time, sys\n", - "from azure.ai.ml.entities import ManagedOnlineEndpoint, ManagedOnlineDeployment\n", - "\n", - "# Create online endpoint - endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n", - "\n", - "online_endpoint_name = \"ext-qna-\" + timestamp\n", - "# create an online endpoint\n", - "endpoint = ManagedOnlineEndpoint(\n", - " name=online_endpoint_name,\n", - " description=\"Online endpoint for \"\n", - " + registered_model.name\n", - " + \", fine tuned model for emotion detection\",\n", - " auth_mode=\"key\",\n", - ")\n", - "workspace_ml_client.begin_create_or_update(endpoint).wait()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can find here the list of SKU's supported for deployment - [Managed online endpoints SKU list](https://learn.microsoft.com/en-us/azure/machine-learning/reference-managed-online-endpoints-vm-sku-list)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# create a deployment\n", - "demo_deployment = ManagedOnlineDeployment(\n", - " name=\"demo\",\n", - " endpoint_name=online_endpoint_name,\n", - " model=registered_model.id,\n", - " instance_type=\"Standard_DS3_v2\",\n", - " instance_count=1,\n", - ")\n", - "workspace_ml_client.online_deployments.begin_create_or_update(demo_deployment).wait()\n", - "endpoint.traffic = {\"demo\": 100}\n", - "workspace_ml_client.begin_create_or_update(endpoint).result()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 8. Test the endpoint with sample data\n", - "\n", - "We will fetch some sample data from the test dataset and submit to online endpoint for inference. We will then show the display the scored labels alongside the ground truth labels" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# read ./squad-dataset/small_test.jsonl into a pandas dataframe\n", - "import pandas as pd\n", - "import json\n", - "\n", - "test_df = pd.read_json(\"./squad-dataset/small_test.jsonl\", orient=\"records\", lines=True)\n", - "# take 10 random samples\n", - "test_df = test_df.sample(n=10)\n", - "# rebuild index\n", - "test_df.reset_index(drop=True, inplace=True)\n", - "# flatten the json object in the \"answer\" column with the keys \"answer_start\" and \"text\"\n", - "json_struct = json.loads(test_df.to_json(orient=\"records\"))\n", - "test_df = pd.json_normalize(json_struct)\n", - "# drop id and title columns\n", - "test_df = test_df.drop(columns=[\"id\", \"title\"])\n", - "test_df.head(10)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# create a json object with \"inputs\" as key and a list of json objects with \"question\" and \"context\" as keys\n", - "test_json = {\n", - " \"inputs\": {\n", - " \"question\": test_df[\"question\"].tolist(),\n", - " \"context\": test_df[\"context\"].tolist(),\n", - " }\n", - "}\n", - "print(test_json)\n", - "# write the json object to a file named sample_score.json in the ./squad-dataset folder\n", - "with open(\"./squad-dataset/sample_score.json\", \"w\") as f:\n", - " json.dump(test_json, f)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# score the sample_score.json file using the online endpoint with the azureml endpoint invoke method\n", - "response = workspace_ml_client.online_endpoints.invoke(\n", - " endpoint_name=online_endpoint_name,\n", - " deployment_name=\"demo\",\n", - " request_file=\"./squad-dataset/sample_score.json\",\n", - ")\n", - "print(\"raw response: \\n\", response, \"\\n\")\n", - "# convert the response to a pandas dataframe and rename the label column as scored_label\n", - "response_df = pd.read_json(response)\n", - "response_df = response_df.rename(columns={0: \"scored_answer\"})\n", - "response_df.head(10)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# merge the test dataframe and the response dataframe on the index\n", - "merged_df = pd.merge(test_df, response_df, left_index=True, right_index=True)\n", - "# drop the answers.answer_start, start and end columns and rename the answer column to scored_answer\n", - "merged_df = merged_df.drop(columns=[\"answers.answer_start\"])\n", - "# rename the answers.text column to ground_truth_answers\n", - "merged_df = merged_df.rename(columns={\"answers.text\": \"ground_truth_answers\"})\n", - "merged_df.head(10)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 9. Delete the online endpoint\n", - "Don't forget to delete the online endpoint, else you will leave the billing meter running for the compute used by the endpoint" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "workspace_ml_client.online_endpoints.begin_delete(name=online_endpoint_name).wait()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "notebooks-venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/sdk/python/foundation-models/system/finetune/question-answering/question-answering-config.json b/sdk/python/foundation-models/system/finetune/question-answering/question-answering-config.json deleted file mode 100644 index 15165acfe5..0000000000 --- a/sdk/python/foundation-models/system/finetune/question-answering/question-answering-config.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "metrics": ["exact_match", "f1_score"], - "regexes_to_ignore": ["$[A-Z]+"], - "ignore_case": false, - "ignore_numbers": false, - "ignore_punctuations": true -} \ No newline at end of file diff --git a/sdk/python/foundation-models/system/finetune/question-answering/squad-dataset/download-dataset.py b/sdk/python/foundation-models/system/finetune/question-answering/squad-dataset/download-dataset.py deleted file mode 100644 index e9ffb0b999..0000000000 --- a/sdk/python/foundation-models/system/finetune/question-answering/squad-dataset/download-dataset.py +++ /dev/null @@ -1,28 +0,0 @@ -# import library to parse command line arguments -import argparse, os - -parser = argparse.ArgumentParser() -# add an argument to specify a dataset name to download -parser.add_argument("--dataset", type=str, default="squad", help="dataset name") -# add an argument to specify the directory to download the dataset to -parser.add_argument( - "--download_dir", - type=str, - default="data", - help="directory to download the dataset to", -) -args = parser.parse_args() - -# create the download directory if it does not exist -if not os.path.exists(args.download_dir): - os.makedirs(args.download_dir) - -# import hugging face datasets library -from datasets import load_dataset, get_dataset_split_names - -for split in get_dataset_split_names(args.dataset): - # load the split of the dataset - dataset = load_dataset(args.dataset, split=split) - # save the split of the dataset to the download directory as json lines file - dataset.to_json(os.path.join(args.download_dir, f"{split}.jsonl")) - # print dataset features diff --git a/sdk/python/foundation-models/system/finetune/question-answering/squad-dataset/sample_score.json b/sdk/python/foundation-models/system/finetune/question-answering/squad-dataset/sample_score.json deleted file mode 100644 index 56a73ed392..0000000000 --- a/sdk/python/foundation-models/system/finetune/question-answering/squad-dataset/sample_score.json +++ /dev/null @@ -1 +0,0 @@ -{"inputs": {"question": ["Who would have been the lowest-ranked class?", "What is the Republic of Kenya named after?", "Who was the creator of the theme for the 50th Anniversary special?", "What is the only divisor besides 1 that a prime number can have?", "In what year did ENR compile data in nine market segments?", "When was Iqbal elected president of the Muslim League?", "How fast were the winds around St. Augustine in the 1964 hurricane?", "Who assisted Wesley with ordaining Whatcoat and Vasey as presbyters?", "What goal does Islamism have when it comes to society and government?", "What group is Newcastle native Andy Taylor the former lead guitarist of?"], "context": ["Kublai's government after 1262 was a compromise between preserving Mongol interests in China and satisfying the demands of his Chinese subjects. He instituted the reforms proposed by his Chinese advisers by centralizing the bureaucracy, expanding the circulation of paper money, and maintaining the traditional monopolies on salt and iron. He restored the Imperial Secretariat and left the local administrative structure of past Chinese dynasties unchanged. However, Kublai rejected plans to revive the Confucian imperial examinations and divided Yuan society into three, later four, classes with the Han Chinese occupying the lowest rank. Kublai's Chinese advisers still wielded significant power in the government, but their official rank was nebulous.", "The Republic of Kenya is named after Mount Kenya. The origin of the name Kenya is not clear, but perhaps linked to the Kikuyu, Embu and Kamba words Kirinyaga, Kirenyaa and Kiinyaa which mean \"God's resting place\" in all three languages. If so, then the British may not so much have mispronounced it ('Keenya'), as misspelled it. Prehistoric volcanic eruptions of Mount Kenya (now extinct) may have resulted in its association with divinity and creation among the indigenous Bantu ethnic groups, who are the native inhabitants of the agricultural land surrounding Mount Kenya.[original research?]", "A new arrangement of the theme, once again by Gold, was introduced in the 2007 Christmas special episode, \"Voyage of the Damned\"; Gold returned as composer for the 2010 series. He was responsible for a new version of the theme which was reported to have had a hostile reception from some viewers. In 2011, the theme tune charted at number 228 of radio station Classic FM's Hall of Fame, a survey of classical music tastes. A revised version of Gold's 2010 arrangement had its debut over the opening titles of the 2012 Christmas special \"The Snowmen\", and a further revision of the arrangement was made for the 50th Anniversary special \"The Day of the Doctor\" in November 2013.[citation needed]", "A prime number (or a prime) is a natural number greater than 1 that has no positive divisors other than 1 and itself. A natural number greater than 1 that is not a prime number is called a composite number. For example, 5 is prime because 1 and 5 are its only positive integer factors, whereas 6 is composite because it has the divisors 2 and 3 in addition to 1 and 6. The fundamental theorem of arithmetic establishes the central role of primes in number theory: any integer greater than 1 can be expressed as a product of primes that is unique up to ordering. The uniqueness in this theorem requires excluding 1 as a prime because one can include arbitrarily many instances of 1 in any factorization, e.g., 3, 1 \u00b7 3, 1 \u00b7 1 \u00b7 3, etc. are all valid factorizations of 3.", "Engineering News-Record (ENR) is a trade magazine for the construction industry. Each year, ENR compiles and reports on data about the size of design and construction companies. They publish a list of the largest companies in the United States (Top-40) and also a list the largest global firms (Top-250, by amount of work they are doing outside their home country). In 2014, ENR compiled the data in nine market segments. It was divided as transportation, petroleum, buildings, power, industrial, water, manufacturing, sewer/waste, telecom, hazardous waste plus a tenth category for other projects. In their reporting on the Top 400, they used data on transportation, sewer, hazardous waste and water to rank firms as heavy contractors.", "Iqbal expressed fears that not only would secularism and secular nationalism weaken the spiritual foundations of Islam and Muslim society, but that India's Hindu-majority population would crowd out Muslim heritage, culture and political influence. In his travels to Egypt, Afghanistan, Palestine and Syria, he promoted ideas of greater Islamic political co-operation and unity, calling for the shedding of nationalist differences. Sir Muhammad Iqbal was elected president of the Muslim League in 1930 at its session in Allahabad as well as for the session in Lahore in 1932. In his Allahabad Address on 29 December 1930, Iqbal outlined a vision of an independent state for Muslim-majority provinces in northwestern India. This address later inspired the Pakistan movement.", "Jacksonville has suffered less damage from hurricanes than most other east coast cities, although the threat does exist for a direct hit by a major hurricane. The city has only received one direct hit from a hurricane since 1871; however, Jacksonville has experienced hurricane or near-hurricane conditions more than a dozen times due to storms crossing the state from the Gulf of Mexico to the Atlantic Ocean, or passing to the north or south in the Atlantic and brushing past the area. The strongest effect on Jacksonville was from Hurricane Dora in 1964, the only recorded storm to hit the First Coast with sustained hurricane-force winds. The eye crossed St. Augustine with winds that had just barely diminished to 110 mph (180 km/h), making it a strong Category 2 on the Saffir-Simpson Scale. Jacksonville also suffered damage from 2008's Tropical Storm Fay which crisscrossed the state, bringing parts of Jacksonville under darkness for four days. Similarly, four years prior to this, Jacksonville was inundated by Hurricane Frances and Hurricane Jeanne, which made landfall south of the area. These tropical cyclones were the costliest indirect hits to Jacksonville. Hurricane Floyd in 1999 caused damage mainly to Jacksonville Beach. During Floyd, the Jacksonville Beach pier was severely damaged, and later demolished. The rebuilt pier was later damaged by Fay, but not destroyed. Tropical Storm Bonnie would cause minor damage in 2004, spawning a minor tornado in the process. On May 28, 2012, Jacksonville was hit by Tropical Storm Beryl, packing winds up to 70 miles per hour (113 km/h) which made landfall near Jacksonville Beach.", "Some argue that The United Methodist Church can lay a claim on apostolic succession, as understood in the traditional sense. As a result of the American Revolution, John Wesley was compelled in 1784 to break with standard practice and ordain two of his lay preachers as presbyters, Thomas Vasey and Richard Whatcoat. Dr. Thomas Coke, already an Anglican priest, assisted Wesley in this action. Coke was then \"set apart\" as a superintendent (bishop) by Wesley and dispatched with Vasey and Whatcoat to America to take charge of Methodist activities there. In defense of his action to ordain, Wesley himself cited an ancient opinion from the Church of Alexandria, which held that bishops and presbyters constituted one order and therefore, bishops are to be elected from and by the presbyterate. He knew that for two centuries the succession of bishops in the Church of Alexandria was preserved through ordination by presbyters alone and was considered valid by the ancient church. Methodists today who would argue for apostolic succession would do so on these grounds.", "Islamism, also known as Political Islam (Arabic: \u0625\u0633\u0644\u0627\u0645 \u0633\u064a\u0627\u0633\u064a\u200e isl\u0101m siy\u0101s\u012b), is an Islamic revival movement often characterized by moral conservatism, literalism, and the attempt \"to implement Islamic values in all spheres of life.\" Islamism favors the reordering of government and society in accordance with the Shari'a. The different Islamist movements have been described as \"oscillating between two poles\": at one end is a strategy of Islamization of society through state power seized by revolution or invasion; at the other \"reformist\" pole Islamists work to Islamize society gradually \"from the bottom up\". The movements have \"arguably altered the Middle East more than any trend since the modern states gained independence\", redefining \"politics and even borders\" according to one journalist (Robin Wright).", "Lindisfarne are a folk-rock group with a strong Tyneside connection. Their most famous song, \"Fog on the Tyne\" (1971), was covered by Geordie ex-footballer Paul Gascoigne in 1990. Venom, reckoned by many to be the originators of black metal and extremely influential to the extreme metal scene as a whole, formed in Newcastle in 1979. Folk metal band Skyclad, often regarded as the first folk metal band, also formed in Newcastle after the break-up of Martin Walkyier thrash metal band, Sabbat. Andy Taylor, former lead guitarist of Duran Duran was born here in 1961. Brian Johnson was a member of local rock band Geordie before becoming the lead vocalist of AC/DC."]}} \ No newline at end of file diff --git a/sdk/python/foundation-models/system/finetune/summarization/news-summary-dataset/download-dataset.py b/sdk/python/foundation-models/system/finetune/summarization/download-dataset.py similarity index 86% rename from sdk/python/foundation-models/system/finetune/summarization/news-summary-dataset/download-dataset.py rename to sdk/python/foundation-models/system/finetune/summarization/download-dataset.py index c25ad19702..cfea8436b4 100644 --- a/sdk/python/foundation-models/system/finetune/summarization/news-summary-dataset/download-dataset.py +++ b/sdk/python/foundation-models/system/finetune/summarization/download-dataset.py @@ -3,10 +3,10 @@ parser = argparse.ArgumentParser() # add an argument to specify a dataset name to download -parser.add_argument("--dataset", type=str, default="squad", help="dataset name") +parser.add_argument("--dataset", type=str, default="cnn_dailymail", help="dataset name") # add an argument to specify the config name of the dataset parser.add_argument( - "--config_name", type=str, default="plain_text", help="config name of the dataset" + "--config_name", type=str, default="3.0.0", help="config name of the dataset" ) # argument to save a fraction of the dataset parser.add_argument( @@ -16,7 +16,7 @@ parser.add_argument( "--download_dir", type=str, - default="data", + default="./news-summary-dataset", help="directory to download the dataset to", ) args = parser.parse_args() diff --git a/sdk/python/foundation-models/system/finetune/summarization/news-summary-dataset/sample_score.json b/sdk/python/foundation-models/system/finetune/summarization/news-summary-dataset/sample_score.json deleted file mode 100644 index fea5783af5..0000000000 --- a/sdk/python/foundation-models/system/finetune/summarization/news-summary-dataset/sample_score.json +++ /dev/null @@ -1 +0,0 @@ -{"inputs": {"input_string": ["(CNN)Deputies rushed Kenneth Morgan Stancil III from court Thursday after the 20-year-old murder suspect swore at a judge and tried to flip over a table. Stancil is accused of killing an employee Monday at Wayne Community College in Goldsboro, North Carolina. Relatives have said victim Ron Lane was gay, CNN affiliate WNCN reported, and investigators are looking into whether the shooting was a hate crime. Authorities arrested Stancil after he was found sleeping on a Florida beach on Tuesday. Just a few minutes into Thursday's hearing on the first-degree murder charge he faces, Stancil snapped back at the judge after he was offered a court-appointed lawyer. \"No, I don't need one,\" said Stancil, who stood before the judge with his legs shackled and his arms handcuffed in front of him. \"You know what I'm saying? I knew I would get life anyway.\" Superior Court Judge Arnold O. Jones interjected, pointing out that the maximum sentence Stancil faces is the death penalty. \"Yes, I know that,\" Stancil fired back. \"But when I knew what I had to do and I knew when I got caught, you know, I knew in my mind that I could get life, I could get the death penalty. You know what I'm saying? Do you follow my topic? I would have killed you, you know what I'm saying, if you're a f---ing child molester.\" The judge told him not to swear. \"I don't give a f--- what you want,\" Stancil said, lunging forward and lifting up the table in front of him. Deputies quickly corralled him and hustled him from the courtroom. The hearing resumed about 25 minutes later, when Stancil was brought back into the courtroom, this time with his arms handcuffed behind him. When asked again by Jones whether he wanted a lawyer, his response was quick -- and calm. \"Yes, sir,\" he said. In an interview with CNN affiliate WRAL, Stancil described himself as a neo-Nazi and said he hates gay people \"with a passion.\" Stancil had worked for Lane, the school's print shop operator, as part of a work-study program, but was let go from the program in early March because of poor attendance, college officials said. During the interview, and during a court appearance in Florida on Tuesday, Stancil said Lane deserved to die, accusing him of being a child molester who'd made advances in online messages to Stancil's 16-year-old brother. Lane's family has described those accusations as untrue and slanderous. His cousin, Steve Smith, told WRAL that Lane never made sexual advances toward children or anyone with whom he worked. He described him as a loving man who was dedicated to family and friends. \"Yes, Ron was gay. But people need to get over it,\" Smith said. \"That's between him and the Lord, him and his savior.\""]}} \ No newline at end of file diff --git a/sdk/python/foundation-models/system/finetune/summarization/news-summary.ipynb b/sdk/python/foundation-models/system/finetune/summarization/news-summary.ipynb deleted file mode 100644 index a4c9433fb3..0000000000 --- a/sdk/python/foundation-models/system/finetune/summarization/news-summary.ipynb +++ /dev/null @@ -1,613 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Summarization - Generate news headlines style summary \n", - "\n", - "This sample shows how to use `summarization` components from the `azureml` system registry to fine tune a model to generate summary of a news article. We then deploy it to an online endpoint for real time inference. The model is trained on tiny sample of the dataset with a small number of epochs to illustrate the fine tuning approach.\n", - "\n", - "### Training data\n", - "We will use the [CNN DailyMail](https://huggingface.co/datasets/cnn_dailymail) dataset. A copy of this dataset is available in the [news-summary-dataset](./news-summary-dataset/) folder for easy access. \n", - "\n", - "### Model\n", - "Models that can perform the `translation` task are generally good foundation models to fine tune for `summarization`. We will use the `t5-small` model in this notebook. If you opened this notebook from a specific model card, remember to replace the specific model name. Optionally, if you need to fine tune a model that is available on HuggingFace, but not available in `azureml` system registry, you can either [import](https://github.com/Azure/azureml-examples) the model or use the `huggingface_id` parameter instruct the components to pull the model directly from HuggingFace. \n", - "\n", - "### Outline\n", - "* Setup pre-requisites such as compute.\n", - "* Pick a model to fine tune.\n", - "* Pick and explore training data.\n", - "* Configure the fine tuning job.\n", - "* Run the fine tuning job.\n", - "* Register the fine tuned model. \n", - "* Deploy the fine tuned model for real time inference.\n", - "* Clean up resources." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 1. Setup pre-requisites\n", - "* Install dependencies\n", - "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace ``, `` and `` below.\n", - "* Connect to `azureml` system registry\n", - "* Set an optional experiment name\n", - "* Check or create compute. A single GPU node can have multiple GPU cards. For example, in one node of `Standard_ND40rs_v2` there are 8 NVIDIA V100 GPUs while in `Standard_NC12s_v3`, there are 2 NVIDIA V100 GPUs. Refer to the [docs](https://learn.microsoft.com/en-us/azure/virtual-machines/sizes-gpu) for this information. The number of GPU cards per node is set in the param `gpus_per_node` below. Setting this value correctly will ensure utilization of all GPUs in the node. The recommended GPU compute SKUs can be found [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ncv3-series) and [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ndv2-series)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Install dependencies by running below cell. This is not an optional step if running in a new environment." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%pip install azure-ai-ml\n", - "%pip install azure-identity\n", - "%pip install datasets==2.9.0\n", - "%pip install mlflow\n", - "%pip install azureml-mlflow" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azure.ai.ml import MLClient\n", - "from azure.identity import (\n", - " DefaultAzureCredential,\n", - " InteractiveBrowserCredential,\n", - " ClientSecretCredential,\n", - ")\n", - "from azure.ai.ml.entities import AmlCompute\n", - "import time\n", - "\n", - "try:\n", - " credential = DefaultAzureCredential()\n", - " credential.get_token(\"https://management.azure.com/.default\")\n", - "except Exception as ex:\n", - " credential = InteractiveBrowserCredential()\n", - "\n", - "try:\n", - " workspace_ml_client = MLClient.from_config(credential=credential)\n", - "except:\n", - " workspace_ml_client = MLClient(\n", - " credential,\n", - " subscription_id=\"\",\n", - " resource_group_name=\"\",\n", - " workspace_name=\"\",\n", - " )\n", - "\n", - "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n", - "registry_ml_client = MLClient(credential, registry_name=\"azureml\")\n", - "\n", - "experiment_name = \"summarization-news-summary\"\n", - "\n", - "# If you already have a gpu cluster, mention it here. Else will create a new one with the name 'gpu-cluster-big'\n", - "compute_cluster = \"gpu-cluster-big\"\n", - "try:\n", - " compute = workspace_ml_client.compute.get(compute_cluster)\n", - "except Exception as ex:\n", - " compute = AmlCompute(\n", - " name=compute_cluster,\n", - " size=\"Standard_ND40rs_v2\",\n", - " max_instances=2, # For multi node training set this to an integer value more than 1\n", - " )\n", - " workspace_ml_client.compute.begin_create_or_update(compute).wait()\n", - "\n", - "# This is the number of GPUs in a single node of the selected 'vm_size' compute.\n", - "# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.\n", - "# Setting this to more than the number of GPUs will result in an error.\n", - "gpu_count_found = False\n", - "workspace_compute_sku_list = workspace_ml_client.compute.list_sizes()\n", - "available_sku_sizes = []\n", - "for compute_sku in workspace_compute_sku_list:\n", - " available_sku_sizes.append(compute_sku.name)\n", - " if compute_sku.name.lower() == compute.size.lower():\n", - " gpus_per_node = compute_sku.gpus\n", - " gpu_count_found = True\n", - "# if gpu_count_found not found, then print an error\n", - "if gpu_count_found:\n", - " print(f\"Number of GPU's in copute {compute.size}: {gpus_per_node}\")\n", - "else:\n", - " raise ValueError(\n", - " f\"Number of GPU's in copute {compute.size} not found. Available skus are: {available_sku_sizes}.\"\n", - " f\"This should not happen. Please check the selected compute cluster: {compute_cluster} and try again.\"\n", - " )\n", - "# CPU based finetune works only for single-node single-process\n", - "if gpus_per_node == 0:\n", - " print(\n", - " \"WARNING! Selected compute doesn't have GPU. CPU based finetune is experimental and works on a single process in a single node\"\n", - " )\n", - " gpus_per_node = 1\n", - "\n", - "# genrating a unique timestamp that can be used for names and versions that need to be unique\n", - "timestamp = str(int(time.time()))" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 2. Pick a foundation model to fine tune\n", - "\n", - "Models that support `translation` tasks are good candidates to fine tune for `summarization`. You can browse these models in the Model Catalog in the AzureML Studio, filtering by the `translation` task. In this example, we use the `t5-small` model. If you have opened this notebook for a different model, replace the model name and version accordingly. \n", - "\n", - "Note the model id property of the model. This will be passed as input to the fine tuning job. This is also available as the `Asset ID` field in model details page in AzureML Studio Model Catalog. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "model_name = \"t5-small\"\n", - "model_version = \"1\"\n", - "foundation_model = registry_ml_client.models.get(model_name, model_version)\n", - "print(\n", - " \"\\n\\nUsing model name: {0}, version: {1}, id: {2} for fine tuning\".format(\n", - " foundation_model.name, foundation_model.version, foundation_model.id\n", - " )\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 3. Pick the dataset for fine-tuning the model\n", - "\n", - "> The [CNN DailyMail](https://huggingface.co/datasets/cnn_dailymail) dataset is larger than 1GB when uncompressed. The [download-dataset.py](./news-summary-dataset/download-dataset.py) has supports downloading a smaller fraction of the dataset. The files in the [](./news-summary-dataset/) folder contain about 3% of the original dataset rows. \n", - "\n", - "A copy of the dataset is available in the [news-summary-dataset](./news-summary-dataset/) folder. \n", - "* Visualize some data rows. \n", - "* We want this sample to run quickly, so save smaller `train`, `validation` and `test` files containing 20% of the already trimmed rows. This means the fine tuned model will have lower accuracy, hence it should not be put to real-world use. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "\n", - "pd.set_option(\n", - " \"display.max_colwidth\", 0\n", - ") # set the max column width to 0 to display the full text\n", - "# load the train.jsonl, test.jsonl and validation.jsonl files from the ./news-summary-dataset/ folder and show first 5 rows\n", - "train_df = pd.read_json(\"./news-summary-dataset/train.jsonl\", lines=True)\n", - "validation_df = pd.read_json(\"./news-summary-dataset/validation.jsonl\", lines=True)\n", - "test_df = pd.read_json(\"./news-summary-dataset/test.jsonl\", lines=True)\n", - "# drop the id column as it is not needed for fine tuning\n", - "train_df.drop(columns=[\"id\"], inplace=True)\n", - "validation_df.drop(columns=[\"id\"], inplace=True)\n", - "test_df.drop(columns=[\"id\"], inplace=True)\n", - "train_df.head(2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# save 20% of the rows from the dataframes into files with small_ prefix in the ./news-summary-dataset folder\n", - "train_df.sample(frac=0.2).to_json(\n", - " \"./news-summary-dataset/small_train.jsonl\", orient=\"records\", lines=True\n", - ")\n", - "validation_df.sample(frac=0.2).to_json(\n", - " \"./news-summary-dataset/small_validation.jsonl\", orient=\"records\", lines=True\n", - ")\n", - "test_df.sample(frac=0.2).to_json(\n", - " \"./news-summary-dataset/small_test.jsonl\", orient=\"records\", lines=True\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4. Submit the fine tuning job using the the model and data as inputs\n", - " \n", - "Create the job that uses the `summarization` pipeline component. [Learn more]() about all the parameters supported for fine tuning." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azure.ai.ml.dsl import pipeline\n", - "from azure.ai.ml.entities import CommandComponent, PipelineComponent, Job, Component\n", - "from azure.ai.ml import PyTorchDistribution, Input\n", - "\n", - "# fetch the pipeline component\n", - "pipeline_component_func = registry_ml_client.components.get(\n", - " name=\"summarization_pipeline\", label=\"latest\"\n", - ")\n", - "\n", - "\n", - "# define the pipeline job\n", - "@pipeline()\n", - "def create_pipeline():\n", - " finetuning_job = pipeline_component_func(\n", - " # specify the foundation model available in the azureml system registry id identified in step #3\n", - " mlflow_model_path=foundation_model.id,\n", - " # huggingface_id = 'bert-base-uncased', # if you want to use a huggingface model, uncomment this line and comment the above line\n", - " compute_model_import=compute_cluster,\n", - " compute_preprocess=compute_cluster,\n", - " compute_finetune=compute_cluster,\n", - " compute_model_evaluation=compute_cluster,\n", - " # map the dataset splits to parameters\n", - " train_file_path=Input(\n", - " type=\"uri_file\", path=\"./news-summary-dataset/small_train.jsonl\"\n", - " ),\n", - " validation_file_path=Input(\n", - " type=\"uri_file\", path=\"./news-summary-dataset/small_validation.jsonl\"\n", - " ),\n", - " test_file_path=Input(\n", - " type=\"uri_file\", path=\"./news-summary-dataset/small_test.jsonl\"\n", - " ),\n", - " evaluation_config=Input(type=\"uri_file\", path=\"./summarization-config.json\"),\n", - " # The following parameters map to the dataset fields\n", - " # document_key parameter maps to the \"article\" field in the news summary dataset\n", - " document_key=\"article\",\n", - " # summary_key parameter maps to the \"highlights\" field in the news summary dataset\n", - " summary_key=\"highlights\",\n", - " # training settings\n", - " number_of_gpu_to_use_finetuning=gpus_per_node, # set to the number of GPUs available in the compute\n", - " num_train_epochs=2,\n", - " learning_rate=2e-5,\n", - " )\n", - " return {\n", - " # map the output of the fine tuning job to the output of the pipeline job so that we can easily register the fine tuned model\n", - " # registering the model is required to deploy the model to an online or batch endpoint\n", - " \"trained_model\": finetuning_job.outputs.mlflow_model_folder\n", - " }\n", - "\n", - "\n", - "pipeline_object = create_pipeline()\n", - "\n", - "# don't use cached results from previous jobs\n", - "pipeline_object.settings.force_rerun = True" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Submit the job" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# submit the pipeline job\n", - "pipeline_job = workspace_ml_client.jobs.create_or_update(\n", - " pipeline_object, experiment_name=experiment_name\n", - ")\n", - "# wait for the pipeline job to complete\n", - "workspace_ml_client.jobs.stream(pipeline_job.name)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 5. Review training and evaluation metrics\n", - "Viewing the job in AzureML studio is the best way to analyze logs, metrics and outputs of jobs. You can create custom charts and compare metics across different jobs. See https://learn.microsoft.com/en-us/azure/machine-learning/how-to-log-view-metrics?tabs=interactive#view-jobsruns-information-in-the-studio to learn more. \n", - "\n", - "However, we may need to access and review metrics programmatically for which we will use MLflow, which is the recommended client for logging and querying metrics." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import mlflow, json\n", - "\n", - "mlflow_tracking_uri = workspace_ml_client.workspaces.get(\n", - " workspace_ml_client.workspace_name\n", - ").mlflow_tracking_uri\n", - "mlflow.set_tracking_uri(mlflow_tracking_uri)\n", - "# concat 'tags.mlflow.rootRunId=' and pipeline_job.name in single quotes as filter variable\n", - "filter = \"tags.mlflow.rootRunId='\" + pipeline_job.name + \"'\"\n", - "runs = mlflow.search_runs(\n", - " experiment_names=[experiment_name], filter_string=filter, output_format=\"list\"\n", - ")\n", - "training_run = None\n", - "evaluation_run = None\n", - "# get the training and evaluation runs.\n", - "# using a hacky way till 'Bug 2320997: not able to show eval metrics in FT notebooks - mlflow client now showing display names' is fixed\n", - "for run in runs:\n", - " # check if run.data.metrics.epoch exists\n", - " if \"epoch\" in run.data.metrics:\n", - " training_run = run\n", - " # else, check if run.data.metrics.accuracy exists\n", - " elif \"rouge1\" in run.data.metrics:\n", - " evaluation_run = run" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "if training_run:\n", - " print(\"Training metrics:\\n\\n\")\n", - " print(json.dumps(training_run.data.metrics, indent=2))\n", - "else:\n", - " print(\"No Training job found\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "if evaluation_run:\n", - " print(\"Evaluation metrics:\\n\\n\")\n", - " print(json.dumps(evaluation_run.data.metrics, indent=2))\n", - "else:\n", - " print(\"No Evaluation job found\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 6. Register the fine tuned model with the workspace\n", - "\n", - "We will register the model from the output of the fine tuning job. This will track lineage between the fine tuned model and the fine tuning job. The fine tuning job, further, tracks lineage to the foundation model, data and training code." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azure.ai.ml.entities import Model\n", - "from azure.ai.ml.constants import AssetTypes\n", - "\n", - "# check if the `trained_model` output is available\n", - "print(\"pipeline job outputs: \", workspace_ml_client.jobs.get(pipeline_job.name).outputs)\n", - "\n", - "# fetch the model from pipeline job output - not working, hence fetching from fine tune child job\n", - "model_path_from_job = \"azureml://jobs/{0}/outputs/{1}\".format(\n", - " pipeline_job.name, \"trained_model\"\n", - ")\n", - "\n", - "finetuned_model_name = model_name + \"-news-summary\"\n", - "finetuned_model_name = finetuned_model_name.replace(\"/\", \"-\")\n", - "print(\"path to register model: \", model_path_from_job)\n", - "prepare_to_register_model = Model(\n", - " path=model_path_from_job,\n", - " type=AssetTypes.MLFLOW_MODEL,\n", - " name=finetuned_model_name,\n", - " version=timestamp, # use timestamp as version to avoid version conflict\n", - " description=model_name + \" fine tuned model for summarizing news articles\",\n", - ")\n", - "print(\"prepare to register model: \\n\", prepare_to_register_model)\n", - "# register the model from pipeline job output\n", - "registered_model = workspace_ml_client.models.create_or_update(\n", - " prepare_to_register_model\n", - ")\n", - "print(\"registered model: \\n\", registered_model)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 7. Deploy the fine tuned model to an online endpoint\n", - "Online endpoints give a durable REST API that can be used to integrate with applications that need to use the model." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import time, sys\n", - "from azure.ai.ml.entities import ManagedOnlineEndpoint, ManagedOnlineDeployment\n", - "\n", - "# Create online endpoint - endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n", - "\n", - "online_endpoint_name = \"news-summary-\" + timestamp\n", - "# create an online endpoint\n", - "endpoint = ManagedOnlineEndpoint(\n", - " name=online_endpoint_name,\n", - " description=\"Online endpoint for \"\n", - " + registered_model.name\n", - " + \", fine tuned model for emotion detection\",\n", - " auth_mode=\"key\",\n", - ")\n", - "workspace_ml_client.begin_create_or_update(endpoint).wait()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can find here the list of SKU's supported for deployment - [Managed online endpoints SKU list](https://learn.microsoft.com/en-us/azure/machine-learning/reference-managed-online-endpoints-vm-sku-list)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# create a deployment\n", - "demo_deployment = ManagedOnlineDeployment(\n", - " name=\"demo\",\n", - " endpoint_name=online_endpoint_name,\n", - " model=registered_model.id,\n", - " instance_type=\"Standard_DS3_v2\",\n", - " instance_count=1,\n", - ")\n", - "workspace_ml_client.online_deployments.begin_create_or_update(demo_deployment).wait()\n", - "endpoint.traffic = {\"demo\": 100}\n", - "workspace_ml_client.begin_create_or_update(endpoint).result()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 8. Test the endpoint with sample data\n", - "\n", - "We will fetch some sample data from the test dataset and submit to online endpoint for inference. We will then show the display the scored labels alongside the ground truth labels" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# read ./news-summary-dataset/small_test.jsonl into a pandas dataframe\n", - "import pandas as pd\n", - "import json\n", - "\n", - "test_df = pd.read_json(\n", - " \"./news-summary-dataset/test.jsonl\", orient=\"records\", lines=True\n", - ")\n", - "# drop the id column\n", - "test_df.drop(columns=[\"id\"], inplace=True)\n", - "# take 1 random sample\n", - "test_df = test_df.sample(n=1)\n", - "# rebuild index\n", - "test_df.reset_index(drop=True, inplace=True)\n", - "# rename the highlights column to ground_truth_summary\n", - "test_df.rename(columns={\"highlights\": \"ground_truth_summary\"}, inplace=True)\n", - "test_df.head(1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# create a json object with the key as \"inputs\" and value as a list of values from the article column of the test dataframe\n", - "test_json = {\"inputs\": {\"input_string\": test_df[\"article\"].tolist()}}\n", - "# save the json object to a file named sample_score.json in the ./emotion-dataset folder\n", - "with open(\"./news-summary-dataset/sample_score.json\", \"w\") as f:\n", - " json.dump(test_json, f)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "> If the input data is long or number of records are too may, you may run into the following error: \"Failed to test real-time endpoint\n", - "upstream request timeout Please check this guide to understand why this error code might have been returned [https://docs.microsoft.com/en-us/azure/machine-learning/how-to-troubleshoot-online-endpoints#http-status-codes]\". Try to submit smaller and fewer inputs." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# score the sample_score.json file using the online endpoint with the azureml endpoint invoke method\n", - "response = workspace_ml_client.online_endpoints.invoke(\n", - " endpoint_name=online_endpoint_name,\n", - " deployment_name=\"demo\",\n", - " request_file=\"./news-summary-dataset/sample_score.json\",\n", - ")\n", - "print(\"raw response: \\n\", response, \"\\n\")\n", - "# convert the response to a pandas dataframe and rename the label column as scored_label\n", - "response_df = pd.read_json(response)\n", - "# rename summary_text column to scored_summary\n", - "response_df.rename(columns={0: \"scored_summary\"}, inplace=True)\n", - "response_df.head(1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# merge the test dataframe and the response dataframe on the index\n", - "merged_df = pd.merge(test_df, response_df, left_index=True, right_index=True)\n", - "merged_df.head(1)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 9. Delete the online endpoint\n", - "Don't forget to delete the online endpoint, else you will leave the billing meter running for the compute used by the endpoint" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "workspace_ml_client.online_endpoints.begin_delete(name=online_endpoint_name).wait()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "notebooks-venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/sdk/python/foundation-models/system/finetune/text-classification/emotion-dataset/label.json b/sdk/python/foundation-models/system/finetune/text-classification/emotion-dataset/label.json deleted file mode 100644 index a3d64dfc98..0000000000 --- a/sdk/python/foundation-models/system/finetune/text-classification/emotion-dataset/label.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "id2label": { - "0": "anger", - "1": "fear", - "2": "joy", - "3": "love", - "4": "sadness", - "5": "surprise" - }, - "label2id": { - "anger": 0, - "fear": 1, - "joy": 2, - "love": 3, - "sadness": 4, - "surprise": 5 - } -} \ No newline at end of file diff --git a/sdk/python/foundation-models/system/finetune/text-classification/emotion-dataset/sample_score.json b/sdk/python/foundation-models/system/finetune/text-classification/emotion-dataset/sample_score.json deleted file mode 100644 index 7e526c30ee..0000000000 --- a/sdk/python/foundation-models/system/finetune/text-classification/emotion-dataset/sample_score.json +++ /dev/null @@ -1 +0,0 @@ -{"inputs": {"input_string": ["i plan to do so by obtaining an mba and from that mba program i feel that the most valuable outcomes i would like", "i came out of the airport that makes me feel irritable uncomfortable and even sadder", "i couldnt help feeling for him and this awful predicament he lives with on a daily and nightly basis and i was just so glad that once bel started to see the light he stuck it out and stood by daniel whilst no one else did including his family who im afraid i got really disgusted with", "i feel like i should have some sort of rockstar razzle dazzle lifestyle but i would at least like to spend a third of my life doing something i feel is worthwhile", "i love to dance but often feel inhibited by my own body unsure what i am capable of hyper concerned about other people watching me and having opinions on my style or just feeling awkward as if i have no idea what i am supposed to do here", "i feel so greedy so needy so helpless", "i feel is entirely more dangerous", "i mean i feel like a broke record sometimes", "i now im graduating in two days but i feel so sad right now", "i feel empty inside not surprising considering i havent eaten all day"]}} \ No newline at end of file diff --git a/sdk/python/foundation-models/system/finetune/text-classification/emotion-detection.ipynb b/sdk/python/foundation-models/system/finetune/text-classification/emotion-detection.ipynb deleted file mode 100644 index fd99a247ea..0000000000 --- a/sdk/python/foundation-models/system/finetune/text-classification/emotion-detection.ipynb +++ /dev/null @@ -1,628 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Text Classification - Emotion Detection \n", - "\n", - "This sample shows how use `text-classification` components from the `azureml` system registry to fine tune a model to detect emotions using emotion dataset. We then deploy the fine tuned model to an online endpoint for real time inference. The model is trained on tiny sample of the dataset with a small number of epochs to illustrate the fine tuning approach.\n", - "\n", - "### Training data\n", - "We will use the [emotion](https://huggingface.co/datasets/dair-ai/emotion) dataset. A copy of this dataset is available in the [emotion-dataset](./emotion-dataset/) folder. \n", - "\n", - "### Model\n", - "Models that can perform the `fill-mask` task are generally good foundation models to fine tune for `text-classification`. We will use the `bert-base-uncased` model in this notebook. If you opened this notebook from a specific model card, remember to replace the specific model name. Optionally, if you need to fine tune a model that is available on HuggingFace, but not available in `azureml` system registry, you can either [import](https://github.com/Azure/azureml-examples) the model or use the `huggingface_id` parameter instruct the components to pull the model directly from HuggingFace. \n", - "\n", - "### Outline\n", - "* Setup pre-requisites such as compute.\n", - "* Pick a model to fine tune.\n", - "* Pick and explore training data.\n", - "* Configure the fine tuning job.\n", - "* Run the fine tuning job.\n", - "* Review training and evaluation metrics. \n", - "* Register the fine tuned model. \n", - "* Deploy the fine tuned model for real time inference.\n", - "* Clean up resources. " - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 1. Setup pre-requisites\n", - "* Install dependencies\n", - "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace ``, `` and `` below.\n", - "* Connect to `azureml` system registry\n", - "* Set an optional experiment name\n", - "* Check or create compute. A single GPU node can have multiple GPU cards. For example, in one node of `Standard_ND40rs_v2` there are 8 NVIDIA V100 GPUs while in `Standard_NC12s_v3`, there are 2 NVIDIA V100 GPUs. Refer to the [docs](https://learn.microsoft.com/en-us/azure/virtual-machines/sizes-gpu) for this information. The number of GPU cards per node is set in the param `gpus_per_node` below. Setting this value correctly will ensure utilization of all GPUs in the node. The recommended GPU compute SKUs can be found [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ncv3-series) and [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ndv2-series)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Install dependencies by running below cell. This is not an optional step if running in a new environment." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%pip install azure-ai-ml\n", - "%pip install azure-identity\n", - "%pip install datasets==2.9.0\n", - "%pip install mlflow\n", - "%pip install azureml-mlflow" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azure.ai.ml import MLClient\n", - "from azure.identity import (\n", - " DefaultAzureCredential,\n", - " InteractiveBrowserCredential,\n", - " ClientSecretCredential,\n", - ")\n", - "from azure.ai.ml.entities import AmlCompute\n", - "import time\n", - "\n", - "try:\n", - " credential = DefaultAzureCredential()\n", - " credential.get_token(\"https://management.azure.com/.default\")\n", - "except Exception as ex:\n", - " credential = InteractiveBrowserCredential()\n", - "\n", - "try:\n", - " workspace_ml_client = MLClient.from_config(credential=credential)\n", - "except:\n", - " workspace_ml_client = MLClient(\n", - " credential,\n", - " subscription_id=\"\",\n", - " resource_group_name=\"\",\n", - " workspace_name=\"\",\n", - " )\n", - "\n", - "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n", - "registry_ml_client = MLClient(credential, registry_name=\"azureml\")\n", - "\n", - "experiment_name = \"text-classification-emotion-detection\"\n", - "\n", - "# If you already have a gpu cluster, mention it here. Else will create a new one with the name 'gpu-cluster-big'\n", - "compute_cluster = \"gpu-cluster-big\"\n", - "try:\n", - " compute = workspace_ml_client.compute.get(compute_cluster)\n", - "except Exception as ex:\n", - " compute = AmlCompute(\n", - " name=compute_cluster,\n", - " size=\"Standard_ND40rs_v2\",\n", - " max_instances=2, # For multi node training set this to an integer value more than 1\n", - " )\n", - " workspace_ml_client.compute.begin_create_or_update(compute).wait()\n", - "\n", - "# This is the number of GPUs in a single node of the selected 'vm_size' compute.\n", - "# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.\n", - "# Setting this to more than the number of GPUs will result in an error.\n", - "gpu_count_found = False\n", - "workspace_compute_sku_list = workspace_ml_client.compute.list_sizes()\n", - "available_sku_sizes = []\n", - "for compute_sku in workspace_compute_sku_list:\n", - " available_sku_sizes.append(compute_sku.name)\n", - " if compute_sku.name.lower() == compute.size.lower():\n", - " gpus_per_node = compute_sku.gpus\n", - " gpu_count_found = True\n", - "# if gpu_count_found not found, then print an error\n", - "if gpu_count_found:\n", - " print(f\"Number of GPU's in copute {compute.size}: {gpus_per_node}\")\n", - "else:\n", - " raise ValueError(\n", - " f\"Number of GPU's in copute {compute.size} not found. Available skus are: {available_sku_sizes}.\"\n", - " f\"This should not happen. Please check the selected compute cluster: {compute_cluster} and try again.\"\n", - " )\n", - "# CPU based finetune works only for single-node single-process\n", - "if gpus_per_node == 0:\n", - " print(\n", - " \"WARNING! Selected compute doesn't have GPU. CPU based finetune is experimental and works on a single process in a single node\"\n", - " )\n", - " gpus_per_node = 1\n", - "\n", - "# genrating a unique timestamp that can be used for names and versions that need to be unique\n", - "timestamp = str(int(time.time()))" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 2. Pick a foundation model to fine tune\n", - "\n", - "Models that support `fill-mask` tasks are good candidates to fine tune for `text-classification`. You can browse these models in the Model Catalog in the AzureML Studio, filtering by the `fill-mask` task. In this example, we use the `bert-base-uncased` model. If you have opened this notebook for a different model, replace the model name and version accordingly. \n", - "\n", - "Note the model id property of the model. This will be passed as input to the fine tuning job. This is also available as the `Asset ID` field in model details page in AzureML Studio Model Catalog. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "model_name = \"bert-base-uncased\"\n", - "model_version = \"1\"\n", - "foundation_model = registry_ml_client.models.get(model_name, model_version)\n", - "print(\n", - " \"\\n\\nUsing model name: {0}, version: {1}, id: {2} for fine tuning\".format(\n", - " foundation_model.name, foundation_model.version, foundation_model.id\n", - " )\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 3. Pick the dataset for fine-tuning the model\n", - "\n", - "A copy of the emotion dataset is available in the [emotion-dataset](./emotion-dataset/) folder. The next few cells show basic data preparation for fine tuning:\n", - "* Visualize some data rows\n", - "* Replace numerical categories in data with the actual string labels. This mapping is available in the [./emotion-dataset/label.json](./emotion-dataset/label.json). This step is needed if you want string labels such as `anger`, `joy`, etc. returned when scoring the model. If you skip this step, the model will return numerical categories such as 0, 1, 2, etc. and you will have to map them to what the category represents yourself. \n", - "* We want this sample to run quickly, so save smaller `train`, `validation` and `test` files containing 10% of the original. This means the fine tuned model will have lower accuracy, hence it should not be put to real-world use. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# load the ./emotion-dataset/train.jsonl file into a pandas dataframe and show the first 5 rows\n", - "import pandas as pd\n", - "\n", - "pd.set_option(\n", - " \"display.max_colwidth\", 0\n", - ") # set the max column width to 0 to display the full text\n", - "df = pd.read_json(\"./emotion-dataset/train.jsonl\", lines=True)\n", - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# load the id2label json element of the ./emotion-dataset/label.json file into pandas table with keys as 'label' column of int64 type and values as 'label_string' column as string type\n", - "import json\n", - "\n", - "with open(\"./emotion-dataset/label.json\") as f:\n", - " id2label = json.load(f)\n", - " id2label = id2label[\"id2label\"]\n", - " label_df = pd.DataFrame.from_dict(\n", - " id2label, orient=\"index\", columns=[\"label_string\"]\n", - " )\n", - " label_df[\"label\"] = label_df.index.astype(\"int64\")\n", - " label_df = label_df[[\"label\", \"label_string\"]]\n", - "label_df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# load test.jsonl, train.jsonl and validation.jsonl form the ./emotion-dataset folder into pandas dataframes\n", - "test_df = pd.read_json(\"./emotion-dataset/test.jsonl\", lines=True)\n", - "train_df = pd.read_json(\"./emotion-dataset/train.jsonl\", lines=True)\n", - "validation_df = pd.read_json(\"./emotion-dataset/validation.jsonl\", lines=True)\n", - "# join the train, validation and test dataframes with the id2label dataframe to get the label_string column\n", - "train_df = train_df.merge(label_df, on=\"label\", how=\"left\")\n", - "validation_df = validation_df.merge(label_df, on=\"label\", how=\"left\")\n", - "test_df = test_df.merge(label_df, on=\"label\", how=\"left\")\n", - "# show the first 5 rows of the train dataframe\n", - "train_df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# save 10% of the rows from the train, validation and test dataframes into files with small_ prefix in the ./emotion-dataset folder\n", - "train_df.sample(frac=0.1).to_json(\n", - " \"./emotion-dataset/small_train.jsonl\", orient=\"records\", lines=True\n", - ")\n", - "validation_df.sample(frac=0.1).to_json(\n", - " \"./emotion-dataset/small_validation.jsonl\", orient=\"records\", lines=True\n", - ")\n", - "test_df.sample(frac=0.1).to_json(\n", - " \"./emotion-dataset/small_test.jsonl\", orient=\"records\", lines=True\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4. Submit the fine tuning job using the the model and data as inputs\n", - " \n", - "Create the job that uses the `text-classification` pipeline component. [Learn more]() about all the parameters supported for fine tuning." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azure.ai.ml.dsl import pipeline\n", - "from azure.ai.ml.entities import CommandComponent, PipelineComponent, Job, Component\n", - "from azure.ai.ml import PyTorchDistribution, Input\n", - "\n", - "# fetch the pipeline component\n", - "pipeline_component_func = registry_ml_client.components.get(\n", - " name=\"text_classification_pipeline\", label=\"latest\"\n", - ")\n", - "\n", - "\n", - "# define the pipeline job\n", - "@pipeline()\n", - "def create_pipeline():\n", - " finetuning_job = pipeline_component_func(\n", - " # specify the foundation model available in the azureml system registry id identified in step #3\n", - " mlflow_model_path=foundation_model.id,\n", - " # huggingface_id = 'bert-base-uncased', # if you want to use a huggingface model, uncomment this line and comment the above line\n", - " compute_model_import=compute_cluster,\n", - " compute_preprocess=compute_cluster,\n", - " compute_finetune=compute_cluster,\n", - " compute_model_evaluation=compute_cluster,\n", - " # map the dataset splits to parameters\n", - " train_file_path=Input(\n", - " type=\"uri_file\", path=\"./emotion-dataset/small_train.jsonl\"\n", - " ),\n", - " validation_file_path=Input(\n", - " type=\"uri_file\", path=\"./emotion-dataset/small_validation.jsonl\"\n", - " ),\n", - " test_file_path=Input(\n", - " type=\"uri_file\", path=\"./emotion-dataset/small_test.jsonl\"\n", - " ),\n", - " # evaluation_config=Input(\n", - " # type=\"uri_file\", path=\"./text-classification-config.json\"\n", - " # ),\n", - " # The following parameters map to the dataset fields\n", - " sentence1_key=\"text\",\n", - " label_key=\"label_string\",\n", - " # Training settings\n", - " number_of_gpu_to_use_finetuning=gpus_per_node, # set to the number of GPUs available in the compute\n", - " num_train_epochs=3,\n", - " learning_rate=2e-5,\n", - " )\n", - " return {\n", - " # map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model\n", - " # registering the model is required to deploy the model to an online or batch endpoint\n", - " \"trained_model\": finetuning_job.outputs.mlflow_model_folder\n", - " }\n", - "\n", - "\n", - "pipeline_object = create_pipeline()\n", - "\n", - "# don't use cached results from previous jobs\n", - "pipeline_object.settings.force_rerun = True" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Submit the job" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# submit the pipeline job\n", - "pipeline_job = workspace_ml_client.jobs.create_or_update(\n", - " pipeline_object, experiment_name=experiment_name\n", - ")\n", - "# wait for the pipeline job to complete\n", - "workspace_ml_client.jobs.stream(pipeline_job.name)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 5. Review training and evaluation metrics\n", - "Viewing the job in AzureML studio is the best way to analyze logs, metrics and outputs of jobs. You can create custom charts and compare metics across different jobs. See https://learn.microsoft.com/en-us/azure/machine-learning/how-to-log-view-metrics?tabs=interactive#view-jobsruns-information-in-the-studio to learn more. \n", - "\n", - "However, we may need to access and review metrics programmatically for which we will use MLflow, which is the recommended client for logging and querying metrics." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import mlflow, json\n", - "\n", - "mlflow_tracking_uri = workspace_ml_client.workspaces.get(\n", - " workspace_ml_client.workspace_name\n", - ").mlflow_tracking_uri\n", - "mlflow.set_tracking_uri(mlflow_tracking_uri)\n", - "# concat 'tags.mlflow.rootRunId=' and pipeline_job.name in single quotes as filter variable\n", - "filter = \"tags.mlflow.rootRunId='\" + pipeline_job.name + \"'\"\n", - "runs = mlflow.search_runs(\n", - " experiment_names=[experiment_name], filter_string=filter, output_format=\"list\"\n", - ")\n", - "training_run = None\n", - "evaluation_run = None\n", - "# get the training and evaluation runs.\n", - "# using a hacky way till 'Bug 2320997: not able to show eval metrics in FT notebooks - mlflow client now showing display names' is fixed\n", - "for run in runs:\n", - " # check if run.data.metrics.epoch exists\n", - " if \"epoch\" in run.data.metrics:\n", - " training_run = run\n", - " # else, check if run.data.metrics.accuracy exists\n", - " elif \"accuracy\" in run.data.metrics:\n", - " evaluation_run = run" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "if training_run:\n", - " print(\"Training metrics:\\n\\n\")\n", - " print(json.dumps(training_run.data.metrics, indent=2))\n", - "else:\n", - " print(\"No Training job found\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "if evaluation_run:\n", - " print(\"Evaluation metrics:\\n\\n\")\n", - " print(json.dumps(evaluation_run.data.metrics, indent=2))\n", - "else:\n", - " print(\"No Evaluation job found\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 6. Register the fine tuned model with the workspace\n", - "\n", - "We will register the model from the output of the fine tuning job. This will track lineage between the fine tuned model and the fine tuning job. The fine tuning job, further, tracks lineage to the foundation model, data and training code." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azure.ai.ml.entities import Model\n", - "from azure.ai.ml.constants import AssetTypes\n", - "\n", - "# check if the `trained_model` output is available\n", - "print(\"pipeline job outputs: \", workspace_ml_client.jobs.get(pipeline_job.name).outputs)\n", - "\n", - "# fetch the model from pipeline job output - not working, hence fetching from fine tune child job\n", - "model_path_from_job = \"azureml://jobs/{0}/outputs/{1}\".format(\n", - " pipeline_job.name, \"trained_model\"\n", - ")\n", - "\n", - "finetuned_model_name = model_name + \"-emotion-detection\"\n", - "finetuned_model_name = finetuned_model_name.replace(\"/\", \"-\")\n", - "print(\"path to register model: \", model_path_from_job)\n", - "prepare_to_register_model = Model(\n", - " path=model_path_from_job,\n", - " type=AssetTypes.MLFLOW_MODEL,\n", - " name=finetuned_model_name,\n", - " version=timestamp, # use timestamp as version to avoid version conflict\n", - " description=model_name + \" fine tuned model for emotion detection\",\n", - ")\n", - "print(\"prepare to register model: \\n\", prepare_to_register_model)\n", - "# register the model from pipeline job output\n", - "registered_model = workspace_ml_client.models.create_or_update(\n", - " prepare_to_register_model\n", - ")\n", - "print(\"registered model: \\n\", registered_model)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 7. Deploy the fine tuned model to an online endpoint\n", - "Online endpoints give a durable REST API that can be used to integrate with applications that need to use the model." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import time, sys\n", - "from azure.ai.ml.entities import ManagedOnlineEndpoint, ManagedOnlineDeployment\n", - "\n", - "# Create online endpoint - endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n", - "\n", - "online_endpoint_name = \"emotion-\" + timestamp\n", - "# create an online endpoint\n", - "endpoint = ManagedOnlineEndpoint(\n", - " name=online_endpoint_name,\n", - " description=\"Online endpoint for \"\n", - " + registered_model.name\n", - " + \", fine tuned model for emotion detection\",\n", - " auth_mode=\"key\",\n", - ")\n", - "workspace_ml_client.begin_create_or_update(endpoint).wait()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can find here the list of SKU's supported for deployment - [Managed online endpoints SKU list](https://learn.microsoft.com/en-us/azure/machine-learning/reference-managed-online-endpoints-vm-sku-list)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# create a deployment\n", - "demo_deployment = ManagedOnlineDeployment(\n", - " name=\"demo\",\n", - " endpoint_name=online_endpoint_name,\n", - " model=registered_model.id,\n", - " instance_type=\"Standard_DS3_v2\",\n", - " instance_count=1,\n", - ")\n", - "workspace_ml_client.online_deployments.begin_create_or_update(demo_deployment).wait()\n", - "endpoint.traffic = {\"demo\": 100}\n", - "workspace_ml_client.begin_create_or_update(endpoint).result()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 8. Test the endpoint with sample data\n", - "\n", - "We will fetch some sample data from the test dataset and submit to online endpoint for inference. We will then show the display the scored labels alongside the ground truth labels" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# read ./emotion-dataset/small_test.jsonl into a pandas dataframe\n", - "test_df = pd.read_json(\"./emotion-dataset/small_test.jsonl\", lines=True)\n", - "# take 10 random samples\n", - "test_df = test_df.sample(n=10)\n", - "# rebuild index\n", - "test_df.reset_index(drop=True, inplace=True)\n", - "# rename the label_string column to ground_truth_label\n", - "test_df = test_df.rename(columns={\"label_string\": \"ground_truth_label\"})\n", - "test_df.head(10)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# create a json object with the key as \"inputs\" and value as a list of values from the text column of the test dataframe\n", - "test_json = {\"inputs\": {\"input_string\": test_df[\"text\"].tolist()}}\n", - "# save the json object to a file named sample_score.json in the ./emotion-dataset folder\n", - "with open(\"./emotion-dataset/sample_score.json\", \"w\") as f:\n", - " json.dump(test_json, f)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# score the sample_score.json file using the online endpoint with the azureml endpoint invoke method\n", - "response = workspace_ml_client.online_endpoints.invoke(\n", - " endpoint_name=online_endpoint_name,\n", - " deployment_name=\"demo\",\n", - " request_file=\"./emotion-dataset/sample_score.json\",\n", - ")\n", - "print(\"raw response: \\n\", response, \"\\n\")\n", - "# convert the response to a pandas dataframe and rename the label column as scored_label\n", - "response_df = pd.read_json(response)\n", - "response_df = response_df.rename(columns={0: \"scored_label\"})\n", - "response_df.head(10)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# merge the test dataframe and the response dataframe on the index\n", - "merged_df = pd.merge(test_df, response_df, left_index=True, right_index=True)\n", - "merged_df.head(10)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 9. Delete the online endpoint\n", - "Don't forget to delete the online endpoint, else you will leave the billing meter running for the compute used by the endpoint" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "workspace_ml_client.online_endpoints.begin_delete(name=online_endpoint_name).wait()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "notebooks-venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/sdk/python/foundation-models/system/finetune/text-classification/text-classification-config.json b/sdk/python/foundation-models/system/finetune/text-classification/text-classification-config.json deleted file mode 100644 index 597603459e..0000000000 --- a/sdk/python/foundation-models/system/finetune/text-classification/text-classification-config.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "metrics": ["average_precision_score_macro", "AUC_macro", "recall_score_macro", "average_precision_score_binary", "average_precision_score_micro", "AUC_binary", "recall_score_micro", "AUC_micro", "norm_macro_recall", "average_precision_score_weighted", "weighted_accuracy", "precision_score_micro", "f1_score_binary", "accuracy_table", "precision_score_macro", "f1_score_micro", "precision_score_weighted", "f1_score_weighted", "confusion_matrix", "recall_score_binary", "matthews_correlation", "log_loss", "accuracy", "precision_score_binary", "balanced_accuracy", "AUC_weighted", "f1_score_macro", "recall_score_weighted"], - "multilabel": false, - "enable_metric_confidence": true, - "confidence_metrics": ["accuracy", "f1_score_micro"], - "use_binary": false -} \ No newline at end of file diff --git a/sdk/python/foundation-models/system/finetune/token-classification/conll2003-dataset/download-dataset.py b/sdk/python/foundation-models/system/finetune/token-classification/conll2003-dataset/download-dataset.py deleted file mode 100644 index 3702e2a14f..0000000000 --- a/sdk/python/foundation-models/system/finetune/token-classification/conll2003-dataset/download-dataset.py +++ /dev/null @@ -1,39 +0,0 @@ -# import library to parse command line arguments -import argparse, os - -parser = argparse.ArgumentParser() -# add an argument to specify a dataset name to download -parser.add_argument("--dataset", type=str, default="conll2003", help="dataset name") -# add an argument to specify the directory to download the dataset to -parser.add_argument( - "--download_dir", - type=str, - default="data", - help="directory to download the dataset to", -) -args = parser.parse_args() - -# create the download directory if it does not exist -if not os.path.exists(args.download_dir): - os.makedirs(args.download_dir) - - -def format_ner_tags(example, class_names): - example["text"] = " ".join(example["tokens"]) - example["ner_tags_str"] = [class_names[id] for id in example["ner_tags"]] - return example - - -# import hugging face datasets library -from datasets import load_dataset, get_dataset_split_names -from functools import partial - -for split in get_dataset_split_names(args.dataset): - # load the split of the dataset - dataset = load_dataset(args.dataset, split=split) - dataset = dataset.map( - partial(format_ner_tags, class_names=dataset.features["ner_tags"].feature.names) - ) - # save the split of the dataset to the download directory as json lines file - dataset.to_json(os.path.join(args.download_dir, f"{split}.jsonl")) - # print dataset features diff --git a/sdk/python/foundation-models/system/finetune/token-classification/conll2003-dataset/sample_score.json b/sdk/python/foundation-models/system/finetune/token-classification/conll2003-dataset/sample_score.json deleted file mode 100644 index 899e024028..0000000000 --- a/sdk/python/foundation-models/system/finetune/token-classification/conll2003-dataset/sample_score.json +++ /dev/null @@ -1 +0,0 @@ -{"inputs": {"input_string": ["BUENOS AIRES 1996-12-06", "TALLINN 1996-12-06", "Sunseeds 219 216 220 216", "The London club had been rocked by a two-goal burst from forwards Dean Sturridge and Darryl Powell in the 62nd and 71st minutes which overturned Arsenal 's 1-0 lead from a diving header by captain Tony Adams on the stroke of halftime .", "Gianluca Vialli ( Chelsea )", "Source : Manitoba Pork .", "( ( Chicago Newsdesk 312-408-8720 ) )", "On Thursday , overnight rates moved between 21.625 and 22.125 .", "Lazio ( 12 ) v AS Roma ( 7 ) 1930", "Previous World Cup victories : None"]}} \ No newline at end of file diff --git a/sdk/python/foundation-models/system/finetune/token-classification/token-classification-config.json b/sdk/python/foundation-models/system/finetune/token-classification/token-classification-config.json deleted file mode 100644 index 23efa790c7..0000000000 --- a/sdk/python/foundation-models/system/finetune/token-classification/token-classification-config.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "metrics": ["precision_score_macro", "f1_score_micro", "recall_score_macro", "f1_score_weighted", "recall_score_micro", "accuracy", "precision_score_weighted", "precision_score_micro", "f1_score_macro", "recall_score_weighted"] -} \ No newline at end of file diff --git a/sdk/python/foundation-models/system/finetune/token-classification/token-classification.ipynb b/sdk/python/foundation-models/system/finetune/token-classification/token-classification.ipynb deleted file mode 100644 index c7377c17da..0000000000 --- a/sdk/python/foundation-models/system/finetune/token-classification/token-classification.ipynb +++ /dev/null @@ -1,608 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Token Classification - Named Entity Recognition (NER)\n", - "\n", - "This sample shows how use `token-classification` components from the `azureml` system registry to fine tune a model to detect entities using conll2003 dataset. We then deploy the fine tuned model to an online endpoint for real time inference. The model is trained on tiny sample of the dataset with a small number of epochs to illustrate the fine tuning approach.\n", - "\n", - "### Training data\n", - "We will use the [conll2003](https://huggingface.co/datasets/conll2003) dataset. A copy of this dataset is available in the [conll2003-dataset](./conll2003-dataset/) folder. \n", - "\n", - "### Model\n", - "Models that can perform the `fill-mask` task are generally good foundation models to fine tune for `token-classification`. We will use the `bert-base-uncased` model in this notebook. If you opened this notebook from a specific model card, remember to replace the specific model name. Optionally, if you need to fine tune a model that is available on HuggingFace, but not available in `azureml` system registry, you can either [import](https://github.com/Azure/azureml-examples) the model or use the `huggingface_id` parameter instruct the components to pull the model directly from HuggingFace. \n", - "\n", - "### Outline\n", - "* Setup pre-requisites such as compute.\n", - "* Pick a model to fine tune.\n", - "* Pick and explore training data.\n", - "* Configure the fine tuning job.\n", - "* Run the fine tuning job.\n", - "* Register the fine tuned model. \n", - "* Deploy the fine tuned model for real time inference.\n", - "* Clean up resources. \n", - "\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 1. Setup pre-requisites\n", - "* Install dependencies\n", - "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace ``, `` and `` below.\n", - "* Connect to `azureml` system registry\n", - "* Set an optional experiment name\n", - "* Check or create compute. A single GPU node can have multiple GPU cards. For example, in one node of `Standard_ND40rs_v2` there are 8 NVIDIA V100 GPUs while in `Standard_NC12s_v3`, there are 2 NVIDIA V100 GPUs. Refer to the [docs](https://learn.microsoft.com/en-us/azure/virtual-machines/sizes-gpu) for this information. The number of GPU cards per node is set in the param `gpus_per_node` below. Setting this value correctly will ensure utilization of all GPUs in the node. The recommended GPU compute SKUs can be found [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ncv3-series) and [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ndv2-series)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Install dependencies by running below cell. This is not an optional step if running in a new environment." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%pip install azure-ai-ml\n", - "%pip install azure-identity\n", - "%pip install datasets==2.9.0\n", - "%pip install mlflow\n", - "%pip install azureml-mlflow" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azure.ai.ml import MLClient\n", - "from azure.identity import (\n", - " DefaultAzureCredential,\n", - " InteractiveBrowserCredential,\n", - " ClientSecretCredential,\n", - ")\n", - "from azure.ai.ml.entities import AmlCompute\n", - "import time\n", - "\n", - "try:\n", - " credential = DefaultAzureCredential()\n", - " credential.get_token(\"https://management.azure.com/.default\")\n", - "except Exception as ex:\n", - " credential = InteractiveBrowserCredential()\n", - "\n", - "try:\n", - " workspace_ml_client = MLClient.from_config(credential=credential)\n", - "except:\n", - " workspace_ml_client = MLClient(\n", - " credential,\n", - " subscription_id=\"\",\n", - " resource_group_name=\"\",\n", - " workspace_name=\"\",\n", - " )\n", - "\n", - "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n", - "registry_ml_client = MLClient(credential, registry_name=\"azureml\")\n", - "\n", - "experiment_name = \"token-classification-ner\"\n", - "\n", - "# If you already have a gpu cluster, mention it here. Else will create a new one with the name 'gpu-cluster-big'\n", - "compute_cluster = \"gpu-cluster-big\"\n", - "try:\n", - " compute = workspace_ml_client.compute.get(compute_cluster)\n", - "except Exception as ex:\n", - " compute = AmlCompute(\n", - " name=compute_cluster,\n", - " size=\"Standard_ND40rs_v2\",\n", - " max_instances=2, # For multi node training set this to an integer value more than 1\n", - " )\n", - " workspace_ml_client.compute.begin_create_or_update(compute).wait()\n", - "\n", - "# This is the number of GPUs in a single node of the selected 'vm_size' compute.\n", - "# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.\n", - "# Setting this to more than the number of GPUs will result in an error.\n", - "gpu_count_found = False\n", - "workspace_compute_sku_list = workspace_ml_client.compute.list_sizes()\n", - "available_sku_sizes = []\n", - "for compute_sku in workspace_compute_sku_list:\n", - " available_sku_sizes.append(compute_sku.name)\n", - " if compute_sku.name.lower() == compute.size.lower():\n", - " gpus_per_node = compute_sku.gpus\n", - " gpu_count_found = True\n", - "# if gpu_count_found not found, then print an error\n", - "if gpu_count_found:\n", - " print(f\"Number of GPU's in copute {compute.size}: {gpus_per_node}\")\n", - "else:\n", - " raise ValueError(\n", - " f\"Number of GPU's in copute {compute.size} not found. Available skus are: {available_sku_sizes}.\"\n", - " f\"This should not happen. Please check the selected compute cluster: {compute_cluster} and try again.\"\n", - " )\n", - "# CPU based finetune works only for single-node single-process\n", - "if gpus_per_node == 0:\n", - " print(\n", - " \"WARNING! Selected compute doesn't have GPU. CPU based finetune is experimental and works on a single process in a single node\"\n", - " )\n", - " gpus_per_node = 1\n", - "\n", - "# genrating a unique timestamp that can be used for names and versions that need to be unique\n", - "timestamp = str(int(time.time()))" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 2. Pick a foundation model to fine tune\n", - "\n", - "Models that support `fill-mask` tasks are good candidates to fine tune for `token-classification`. You can browse these models in the Model Catalog in the AzureML Studio, filtering by the `fill-mask` task. In this example, we use the `bert-base-uncased` model. If you have opened this notebook for a different model, replace the model name and version accordingly. \n", - "\n", - "Note the model id property of the model. This will be passed as input to the fine tuning job. This is also available as the `Asset ID` field in model details page in AzureML Studio Model Catalog. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "model_name = \"bert-base-uncased\"\n", - "model_version = \"1\"\n", - "foundation_model = registry_ml_client.models.get(model_name, model_version)\n", - "print(\n", - " \"\\n\\nUsing model name: {0}, version: {1}, id: {2} for fine tuning\".format(\n", - " foundation_model.name, foundation_model.version, foundation_model.id\n", - " )\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 3. Pick the dataset for fine-tuning the model\n", - "\n", - "A copy of the conll2003 dataset is available in the [conll2003-dataset](./conll2003-dataset/) folder. The next few cells show basic data preparation for fine tuning:\n", - "* Visualize some data rows\n", - "* We want this sample to run quickly, so save smaller `train`, `validation` and `test` files containing 10% of the original. This means the fine tuned model will have lower accuracy, hence it should not be put to real-world use. \n", - "\n", - "> The [download-dataset.py](./conll2003-dataset/download-dataset.py) is used to download the conll2003 dataset and transform the dataset into finetune pipeline component consumable format." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# load the ./conll2003-dataset/train.jsonl file into a pandas dataframe and show the first 5 rows\n", - "import pandas as pd\n", - "\n", - "pd.set_option(\n", - " \"display.max_colwidth\", 0\n", - ") # set the max column width to 0 to display the full text\n", - "df = pd.read_json(\"./conll2003-dataset/train.jsonl\", lines=True)\n", - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# load test.jsonl, train.jsonl and validation.jsonl form the ./conll2003-dataset folder into pandas dataframes\n", - "test_df = pd.read_json(\"./conll2003-dataset/test.jsonl\", lines=True)\n", - "train_df = pd.read_json(\"./conll2003-dataset/train.jsonl\", lines=True)\n", - "validation_df = pd.read_json(\"./conll2003-dataset/validation.jsonl\", lines=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# save 10% of the rows from the train, validation and test dataframes into files with small_ prefix in the ./conll2003-dataset folder\n", - "train_df.sample(frac=0.1).to_json(\n", - " \"./conll2003-dataset/small_train.jsonl\", orient=\"records\", lines=True\n", - ")\n", - "validation_df.sample(frac=0.1).to_json(\n", - " \"./conll2003-dataset/small_validation.jsonl\", orient=\"records\", lines=True\n", - ")\n", - "test_df.sample(frac=0.1).to_json(\n", - " \"./conll2003-dataset/small_test.jsonl\", orient=\"records\", lines=True\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4. Submit the fine tuning job using the the model and data as inputs\n", - " \n", - "Create the job that uses the `token-classification` pipeline component. [Learn more]() about all the parameters supported for fine tuning." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azure.ai.ml.dsl import pipeline\n", - "from azure.ai.ml.entities import CommandComponent, PipelineComponent, Job, Component\n", - "from azure.ai.ml import PyTorchDistribution, Input\n", - "\n", - "# fetch the pipeline component\n", - "pipeline_component_func = registry_ml_client.components.get(\n", - " name=\"token_classification_pipeline\", label=\"latest\"\n", - ")\n", - "\n", - "\n", - "# define the pipeline job\n", - "@pipeline()\n", - "def create_pipeline():\n", - " finetuning_job = pipeline_component_func(\n", - " # specify the foundation model available in the azureml system registry id identified in step #3\n", - " mlflow_model_path=foundation_model.id,\n", - " # huggingface_id = 'bert-base-uncased', # if you want to use a huggingface model, uncomment this line and comment the above line\n", - " compute_model_import=compute_cluster,\n", - " compute_preprocess=compute_cluster,\n", - " compute_finetune=compute_cluster,\n", - " compute_model_evaluation=compute_cluster,\n", - " # map the dataset splits to parameters\n", - " train_file_path=Input(\n", - " type=\"uri_file\", path=\"./conll2003-dataset/small_train.jsonl\"\n", - " ),\n", - " validation_file_path=Input(\n", - " type=\"uri_file\", path=\"./conll2003-dataset/small_validation.jsonl\"\n", - " ),\n", - " test_file_path=Input(\n", - " type=\"uri_file\", path=\"./conll2003-dataset/small_test.jsonl\"\n", - " ),\n", - " evaluation_config=Input(\n", - " type=\"uri_file\", path=\"./token-classification-config.json\"\n", - " ),\n", - " # The following parameters map to the dataset fields\n", - " token_key=\"tokens\",\n", - " tag_key=\"ner_tags_str\",\n", - " # Training settings\n", - " number_of_gpu_to_use_finetuning=gpus_per_node, # set to the number of GPUs available in the compute\n", - " num_train_epochs=3,\n", - " learning_rate=2e-5,\n", - " )\n", - " return {\n", - " # map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model\n", - " # registering the model is required to deploy the model to an online or batch endpoint\n", - " \"trained_model\": finetuning_job.outputs.mlflow_model_folder\n", - " }\n", - "\n", - "\n", - "pipeline_object = create_pipeline()\n", - "\n", - "# don't use cached results from previous jobs\n", - "pipeline_object.settings.force_rerun = True" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Submit the job" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# submit the pipeline job\n", - "pipeline_job = workspace_ml_client.jobs.create_or_update(\n", - " pipeline_object, experiment_name=experiment_name\n", - ")\n", - "# wait for the pipeline job to complete\n", - "workspace_ml_client.jobs.stream(pipeline_job.name)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 5. Review training and evaluation metrics\n", - "Viewing the job in AzureML studio is the best way to analyze logs, metrics and outputs of jobs. You can create custom charts and compare metics across different jobs. See https://learn.microsoft.com/en-us/azure/machine-learning/how-to-log-view-metrics?tabs=interactive#view-jobsruns-information-in-the-studio to learn more. \n", - "\n", - "However, we may need to access and review metrics programmatically for which we will use MLflow, which is the recommended client for logging and querying metrics." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import mlflow, json\n", - "\n", - "mlflow_tracking_uri = workspace_ml_client.workspaces.get(\n", - " workspace_ml_client.workspace_name\n", - ").mlflow_tracking_uri\n", - "mlflow.set_tracking_uri(mlflow_tracking_uri)\n", - "# concat 'tags.mlflow.rootRunId=' and pipeline_job.name in single quotes as filter variable\n", - "filter = \"tags.mlflow.rootRunId='\" + pipeline_job.name + \"'\"\n", - "runs = mlflow.search_runs(\n", - " experiment_names=[experiment_name], filter_string=filter, output_format=\"list\"\n", - ")\n", - "training_run = None\n", - "evaluation_run = None\n", - "# get the training and evaluation runs.\n", - "# using a hacky way till 'Bug 2320997: not able to show eval metrics in FT notebooks - mlflow client now showing display names' is fixed\n", - "for run in runs:\n", - " # check if run.data.metrics.epoch exists\n", - " if \"epoch\" in run.data.metrics:\n", - " training_run = run\n", - " # else, check if run.data.metrics.accuracy exists\n", - " elif \"accuracy\" in run.data.metrics:\n", - " evaluation_run = run" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "if training_run:\n", - " print(\"Training metrics:\\n\\n\")\n", - " print(json.dumps(training_run.data.metrics, indent=2))\n", - "else:\n", - " print(\"No Training job found\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "if evaluation_run:\n", - " print(\"Evaluation metrics:\\n\\n\")\n", - " print(json.dumps(evaluation_run.data.metrics, indent=2))\n", - "else:\n", - " print(\"No Evaluation job found\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 6. Register the fine tuned model with the workspace\n", - "\n", - "We will register the model from the output of the fine tuning job. This will track lineage between the fine tuned model and the fine tuning job. The fine tuning job, further, tracks lineage to the foundation model, data and training code." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azure.ai.ml.entities import Model\n", - "from azure.ai.ml.constants import AssetTypes\n", - "\n", - "# check if the `trained_model` output is available\n", - "print(\"pipeline job outputs: \", workspace_ml_client.jobs.get(pipeline_job.name).outputs)\n", - "\n", - "# fetch the model from pipeline job output - not working, hence fetching from fine tune child job\n", - "model_path_from_job = \"azureml://jobs/{0}/outputs/{1}\".format(\n", - " pipeline_job.name, \"trained_model\"\n", - ")\n", - "\n", - "finetuned_model_name = model_name + \"-ner\"\n", - "finetuned_model_name = finetuned_model_name.replace(\"/\", \"-\")\n", - "print(\"path to register model: \", model_path_from_job)\n", - "prepare_to_register_model = Model(\n", - " path=model_path_from_job,\n", - " type=AssetTypes.MLFLOW_MODEL,\n", - " name=finetuned_model_name,\n", - " version=timestamp, # use timestamp as version to avoid version conflict\n", - " description=model_name + \" fine tuned model for named entity recognition\",\n", - ")\n", - "print(\"prepare to register model: \\n\", prepare_to_register_model)\n", - "# register the model from pipeline job output\n", - "registered_model = workspace_ml_client.models.create_or_update(\n", - " prepare_to_register_model\n", - ")\n", - "print(\"registered model: \\n\", registered_model)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 7. Deploy the fine tuned model to an online endpoint\n", - "Online endpoints give a durable REST API that can be used to integrate with applications that need to use the model." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import time, sys\n", - "from azure.ai.ml.entities import ManagedOnlineEndpoint, ManagedOnlineDeployment\n", - "\n", - "# Create online endpoint - endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n", - "\n", - "online_endpoint_name = \"ner-\" + timestamp\n", - "# create an online endpoint\n", - "endpoint = ManagedOnlineEndpoint(\n", - " name=online_endpoint_name,\n", - " description=\"Online endpoint for \"\n", - " + registered_model.name\n", - " + \", fine tuned model for named entity recognition\",\n", - " auth_mode=\"key\",\n", - ")\n", - "workspace_ml_client.begin_create_or_update(endpoint).wait()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can find here the list of SKU's supported for deployment - [Managed online endpoints SKU list](https://learn.microsoft.com/en-us/azure/machine-learning/reference-managed-online-endpoints-vm-sku-list)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# create a deployment\n", - "demo_deployment = ManagedOnlineDeployment(\n", - " name=\"demo\",\n", - " endpoint_name=online_endpoint_name,\n", - " model=registered_model.id,\n", - " instance_type=\"Standard_DS3_v2\",\n", - " instance_count=1,\n", - ")\n", - "workspace_ml_client.online_deployments.begin_create_or_update(demo_deployment).wait()\n", - "endpoint.traffic = {\"demo\": 100}\n", - "workspace_ml_client.begin_create_or_update(endpoint).result()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 8. Test the endpoint with sample data\n", - "\n", - "We will fetch some sample data from the test dataset and submit to online endpoint for inference. We will then show the display the scored labels alongside the ground truth labels" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# read ./conll2003-dataset/small_test.jsonl into a pandas dataframe\n", - "test_df = pd.read_json(\"./conll2003-dataset/small_test.jsonl\", lines=True)\n", - "# take 10 random samples\n", - "test_df = test_df.sample(n=10)\n", - "# drop the id, pos_tags, chunk_tags, ner_tags column\n", - "test_df.drop(columns=[\"id\", \"pos_tags\", \"chunk_tags\", \"ner_tags\"], inplace=True)\n", - "# rebuild index\n", - "test_df.reset_index(drop=True, inplace=True)\n", - "# rename the ner_tags_str column to ground_truth_label\n", - "test_df = test_df.rename(columns={\"ner_tags_str\": \"ground_truth_tags\"})\n", - "test_df.head(10)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "\n", - "# create a json object with the key as \"inputs\" and value as a list of values from the text column of the test dataframe\n", - "test_json = {\"inputs\": {\"input_string\": test_df[\"text\"].tolist()}}\n", - "# save the json object to a file named sample_score.json in the ./conll2003-dataset folder\n", - "with open(\"./conll2003-dataset/sample_score.json\", \"w\") as f:\n", - " json.dump(test_json, f)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# score the sample_score.json file using the online endpoint with the azureml endpoint invoke method\n", - "response = workspace_ml_client.online_endpoints.invoke(\n", - " endpoint_name=online_endpoint_name,\n", - " deployment_name=\"demo\",\n", - " request_file=\"./conll2003-dataset/sample_score.json\",\n", - ")\n", - "print(\"raw response: \\n\", response, \"\\n\")\n", - "# convert the response to a pandas dataframe\n", - "response_df = pd.read_json(response)\n", - "# rename the column to predicted_tags\n", - "response_df.rename(columns={0: \"predicted_tags\"}, inplace=True)\n", - "response_df.head(10)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# merge the test dataframe and the response dataframe on the index\n", - "merged_df = pd.merge(test_df, response_df, left_index=True, right_index=True)\n", - "merged_df.head(10)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 9. Delete the online endpoint\n", - "Don't forget to delete the online endpoint, else you will leave the billing meter running for the compute used by the endpoint" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "workspace_ml_client.online_endpoints.begin_delete(name=online_endpoint_name).wait()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "notebooks-venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/sdk/python/foundation-models/system/finetune/translation/translation-config.json b/sdk/python/foundation-models/system/finetune/translation/translation-config.json deleted file mode 100644 index f293ed61bb..0000000000 --- a/sdk/python/foundation-models/system/finetune/translation/translation-config.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "metrics": ["bleu_1", "bleu_2", "bleu_3", "bleu_4"], - "smoothing":false -} \ No newline at end of file diff --git a/sdk/python/foundation-models/system/finetune/translation/translation.ipynb b/sdk/python/foundation-models/system/finetune/translation/translation.ipynb deleted file mode 100644 index 1270692962..0000000000 --- a/sdk/python/foundation-models/system/finetune/translation/translation.ipynb +++ /dev/null @@ -1,604 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Translation - Translate english to romanian\n", - "\n", - "This sample shows how to use `translation` components from the `azureml` system registry to fine tune a model to translate english language to romanian language. We then deploy it to an online endpoint for real time inference. The model is trained on tiny sample of the dataset with a small number of epochs to illustrate the fine tuning approach.\n", - "\n", - "### Training data\n", - "We will use the [wmt16 (ro-en)](https://huggingface.co/datasets/wmt16) dataset. A copy of this dataset is available in the [wmt16-en-ro-dataset](./wmt16-en-ro-dataset/) folder for easy access. \n", - "\n", - "### Model\n", - "Models that can perform the `translation` task are used here. We will use the `t5-small` model in this notebook. If you opened this notebook from a specific model card, remember to replace the specific model name. Optionally, if you need to fine tune a model that is available on HuggingFace, but not available in `azureml` system registry, you can either [import](https://github.com/Azure/azureml-examples) the model or use the `huggingface_id` parameter instruct the components to pull the model directly from HuggingFace. \n", - "\n", - "### Outline\n", - "* Setup pre-requisites such as compute.\n", - "* Pick a model to fine tune.\n", - "* Pick and explore training data.\n", - "* Configure the fine tuning job.\n", - "* Run the fine tuning job.\n", - "* Register the fine tuned model. \n", - "* Deploy the fine tuned model for real time inference.\n", - "* Clean up resources." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 1. Setup pre-requisites\n", - "* Install dependencies\n", - "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace ``, `` and `` below.\n", - "* Connect to `azureml` system registry\n", - "* Set an optional experiment name\n", - "* Check or create compute. A single GPU node can have multiple GPU cards. For example, in one node of `Standard_ND40rs_v2` there are 8 NVIDIA V100 GPUs while in `Standard_NC12s_v3`, there are 2 NVIDIA V100 GPUs. Refer to the [docs](https://learn.microsoft.com/en-us/azure/virtual-machines/sizes-gpu) for this information. The number of GPU cards per node is set in the param `gpus_per_node` below. Setting this value correctly will ensure utilization of all GPUs in the node. The recommended GPU compute SKUs can be found [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ncv3-series) and [here](https://learn.microsoft.com/en-us/azure/virtual-machines/ndv2-series)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Install dependencies by running below cell. This is not an optional step if running in a new environment." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%pip install azure-ai-ml\n", - "%pip install azure-identity\n", - "%pip install datasets==2.9.0\n", - "%pip install mlflow\n", - "%pip install azureml-mlflow" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azure.ai.ml import MLClient\n", - "from azure.identity import (\n", - " DefaultAzureCredential,\n", - " InteractiveBrowserCredential,\n", - " ClientSecretCredential,\n", - ")\n", - "from azure.ai.ml.entities import AmlCompute\n", - "import time\n", - "\n", - "try:\n", - " credential = DefaultAzureCredential()\n", - " credential.get_token(\"https://management.azure.com/.default\")\n", - "except Exception as ex:\n", - " credential = InteractiveBrowserCredential()\n", - "\n", - "try:\n", - " workspace_ml_client = MLClient.from_config(credential=credential)\n", - "except:\n", - " workspace_ml_client = MLClient(\n", - " credential,\n", - " subscription_id=\"\",\n", - " resource_group_name=\"\",\n", - " workspace_name=\"\",\n", - " )\n", - "\n", - "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n", - "registry_ml_client = MLClient(credential, registry_name=\"azureml\")\n", - "\n", - "experiment_name = \"translation-wmt16-en-ro\"\n", - "\n", - "# If you already have a gpu cluster, mention it here. Else will create a new one with the name 'gpu-cluster-big'\n", - "compute_cluster = \"gpu-cluster-big\"\n", - "try:\n", - " compute = workspace_ml_client.compute.get(compute_cluster)\n", - "except Exception as ex:\n", - " compute = AmlCompute(\n", - " name=compute_cluster,\n", - " size=\"Standard_ND40rs_v2\",\n", - " max_instances=2, # For multi node training set this to an integer value more than 1\n", - " )\n", - " workspace_ml_client.compute.begin_create_or_update(compute).wait()\n", - "\n", - "# This is the number of GPUs in a single node of the selected 'vm_size' compute.\n", - "# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.\n", - "# Setting this to more than the number of GPUs will result in an error.\n", - "gpu_count_found = False\n", - "workspace_compute_sku_list = workspace_ml_client.compute.list_sizes()\n", - "available_sku_sizes = []\n", - "for compute_sku in workspace_compute_sku_list:\n", - " available_sku_sizes.append(compute_sku.name)\n", - " if compute_sku.name.lower() == compute.size.lower():\n", - " gpus_per_node = compute_sku.gpus\n", - " gpu_count_found = True\n", - "# if gpu_count_found not found, then print an error\n", - "if gpu_count_found:\n", - " print(f\"Number of GPU's in copute {compute.size}: {gpus_per_node}\")\n", - "else:\n", - " raise ValueError(\n", - " f\"Number of GPU's in copute {compute.size} not found. Available skus are: {available_sku_sizes}.\"\n", - " f\"This should not happen. Please check the selected compute cluster: {compute_cluster} and try again.\"\n", - " )\n", - "# CPU based finetune works only for single-node single-process\n", - "if gpus_per_node == 0:\n", - " print(\n", - " \"WARNING! Selected compute doesn't have GPU. CPU based finetune is experimental and works on a single process in a single node\"\n", - " )\n", - " gpus_per_node = 1\n", - "\n", - "# genrating a unique timestamp that can be used for names and versions that need to be unique\n", - "timestamp = str(int(time.time()))" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 2. Pick a foundation model to fine tune\n", - "\n", - "Models that support `translation` tasks are picked to fine tune. You can browse these models in the Model Catalog in the AzureML Studio, filtering by the `translation` task. In this example, we use the `t5-small` model. If you have opened this notebook for a different model, replace the model name and version accordingly. \n", - "\n", - "Note the model id property of the model. This will be passed as input to the fine tuning job. This is also available as the `Asset ID` field in model details page in AzureML Studio Model Catalog. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "model_name = \"t5-small\"\n", - "model_version = \"1\"\n", - "foundation_model = registry_ml_client.models.get(model_name, model_version)\n", - "print(\n", - " \"\\n\\nUsing model name: {0}, version: {1}, id: {2} for fine tuning\".format(\n", - " foundation_model.name, foundation_model.version, foundation_model.id\n", - " )\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 3. Pick the dataset for fine-tuning the model \n", - "\n", - "A copy of the dataset is available in the [wmt16-en-ro-dataset](./wmt16-en-ro-dataset/) folder. \n", - "* Visualize some data rows. \n", - "* We want this sample to run quickly, so save smaller `train`, `validation` and `test` files containing 20% of the already trimmed rows. This means the fine tuned model will have lower accuracy, hence it should not be put to real-world use. \n", - "\n", - "> The [download-dataset.py](./wmt16-en-ro-dataset/download-dataset.py) is used to download the wmt16 (ro-en) dataset and transform the dataset into finetune pipeline component consumable format. Also as the dataset is large, hence we here have only part of the dataset.\n", - "\n", - "> **Note** : Some language models have different language codes and hence the column names in the dataset should reflect the same." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "\n", - "pd.set_option(\n", - " \"display.max_colwidth\", 0\n", - ") # set the max column width to 0 to display the full text\n", - "# load the train.jsonl, test.jsonl and validation.jsonl files from the ./wmt16-en-ro-dataset/ folder and show first 5 rows\n", - "train_df = pd.read_json(\"./wmt16-en-ro-dataset/train.jsonl\", lines=True)\n", - "validation_df = pd.read_json(\"./wmt16-en-ro-dataset/validation.jsonl\", lines=True)\n", - "test_df = pd.read_json(\"./wmt16-en-ro-dataset/test.jsonl\", lines=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# save 20% of the rows from the dataframes into files with small_ prefix in the ./wmt16-en-ro-dataset folder\n", - "train_df.sample(frac=0.2).to_json(\n", - " \"./wmt16-en-ro-dataset/small_train.jsonl\", orient=\"records\", lines=True\n", - ")\n", - "validation_df.sample(frac=0.2).to_json(\n", - " \"./wmt16-en-ro-dataset/small_validation.jsonl\", orient=\"records\", lines=True\n", - ")\n", - "test_df.sample(frac=0.2).to_json(\n", - " \"./wmt16-en-ro-dataset/small_test.jsonl\", orient=\"records\", lines=True\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4. Submit the fine tuning job using the the model and data as inputs\n", - " \n", - "Create the job that uses the `translation` pipeline component. [Learn more]() about all the parameters supported for fine tuning." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azure.ai.ml.dsl import pipeline\n", - "from azure.ai.ml.entities import CommandComponent, PipelineComponent, Job, Component\n", - "from azure.ai.ml import PyTorchDistribution, Input\n", - "\n", - "# fetch the pipeline component\n", - "pipeline_component_func = registry_ml_client.components.get(\n", - " name=\"translation_pipeline\", label=\"latest\"\n", - ")\n", - "\n", - "\n", - "# define the pipeline job\n", - "@pipeline()\n", - "def create_pipeline():\n", - " finetuning_job = pipeline_component_func(\n", - " # specify the foundation model available in the azureml system registry id identified in step #3\n", - " mlflow_model_path=foundation_model.id,\n", - " # huggingface_id = 't5-small', # if you want to use a huggingface model, uncomment this line and comment the above line\n", - " compute_model_import=compute_cluster,\n", - " compute_preprocess=compute_cluster,\n", - " compute_finetune=compute_cluster,\n", - " compute_model_evaluation=compute_cluster,\n", - " # map the dataset splits to parameters\n", - " train_file_path=Input(\n", - " type=\"uri_file\", path=\"./wmt16-en-ro-dataset/small_train.jsonl\"\n", - " ),\n", - " validation_file_path=Input(\n", - " type=\"uri_file\", path=\"./wmt16-en-ro-dataset/small_validation.jsonl\"\n", - " ),\n", - " test_file_path=Input(\n", - " type=\"uri_file\", path=\"./wmt16-en-ro-dataset/small_test.jsonl\"\n", - " ),\n", - " evaluation_config=Input(type=\"uri_file\", path=\"./translation-config.json\"),\n", - " # The following parameters map to the dataset fields\n", - " # source_lang parameter maps to the \"en\" field in the wmt16 dataset\n", - " source_lang=\"en\",\n", - " # target_lang parameter maps to the \"ro\" field in the wmt16 dataset\n", - " target_lang=\"ro\",\n", - " # training settings\n", - " number_of_gpu_to_use_finetuning=gpus_per_node, # set to the number of GPUs available in the compute\n", - " num_train_epochs=3,\n", - " learning_rate=2e-5,\n", - " )\n", - " return {\n", - " # map the output of the fine tuning job to the output of the pipeline job so that we can easily register the fine tuned model\n", - " # registering the model is required to deploy the model to an online or batch endpoint\n", - " \"trained_model\": finetuning_job.outputs.mlflow_model_folder\n", - " }\n", - "\n", - "\n", - "pipeline_object = create_pipeline()\n", - "\n", - "# don't use cached results from previous jobs\n", - "pipeline_object.settings.force_rerun = True" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Submit the job" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# submit the pipeline job\n", - "pipeline_job = workspace_ml_client.jobs.create_or_update(\n", - " pipeline_object, experiment_name=experiment_name\n", - ")\n", - "# wait for the pipeline job to complete\n", - "workspace_ml_client.jobs.stream(pipeline_job.name)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 5. Review training and evaluation metrics\n", - "Viewing the job in AzureML studio is the best way to analyze logs, metrics and outputs of jobs. You can create custom charts and compare metics across different jobs. See https://learn.microsoft.com/en-us/azure/machine-learning/how-to-log-view-metrics?tabs=interactive#view-jobsruns-information-in-the-studio to learn more. \n", - "\n", - "However, we may need to access and review metrics programmatically for which we will use MLflow, which is the recommended client for logging and querying metrics." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import mlflow, json\n", - "\n", - "mlflow_tracking_uri = workspace_ml_client.workspaces.get(\n", - " workspace_ml_client.workspace_name\n", - ").mlflow_tracking_uri\n", - "mlflow.set_tracking_uri(mlflow_tracking_uri)\n", - "# concat 'tags.mlflow.rootRunId=' and pipeline_job.name in single quotes as filter variable\n", - "filter = \"tags.mlflow.rootRunId='\" + pipeline_job.name + \"'\"\n", - "runs = mlflow.search_runs(\n", - " experiment_names=[experiment_name], filter_string=filter, output_format=\"list\"\n", - ")\n", - "training_run = None\n", - "evaluation_run = None\n", - "# get the training and evaluation runs.\n", - "# using a hacky way till 'Bug 2320997: not able to show eval metrics in FT notebooks - mlflow client now showing display names' is fixed\n", - "for run in runs:\n", - " # check if run.data.metrics.epoch exists\n", - " if \"epoch\" in run.data.metrics:\n", - " training_run = run\n", - " # else, check if run.data.metrics.accuracy exists\n", - " elif \"bleu_1\" in run.data.metrics:\n", - " evaluation_run = run" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "if training_run:\n", - " print(\"Training metrics:\\n\\n\")\n", - " print(json.dumps(training_run.data.metrics, indent=2))\n", - "else:\n", - " print(\"No Training job found\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "if evaluation_run:\n", - " print(\"Evaluation metrics:\\n\\n\")\n", - " print(json.dumps(evaluation_run.data.metrics, indent=2))\n", - "else:\n", - " print(\"No Evaluation job found\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 6. Register the fine tuned model with the workspace\n", - "\n", - "We will register the model from the output of the fine tuning job. This will track lineage between the fine tuned model and the fine tuning job. The fine tuning job, further, tracks lineage to the foundation model, data and training code." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azure.ai.ml.entities import Model\n", - "from azure.ai.ml.constants import AssetTypes\n", - "\n", - "# check if the `trained_model` output is available\n", - "print(\"pipeline job outputs: \", workspace_ml_client.jobs.get(pipeline_job.name).outputs)\n", - "\n", - "# fetch the model from pipeline job output - not working, hence fetching from fine tune child job\n", - "model_path_from_job = \"azureml://jobs/{0}/outputs/{1}\".format(\n", - " pipeline_job.name, \"trained_model\"\n", - ")\n", - "\n", - "finetuned_model_name = model_name + \"-wmt16-en-ro-src\"\n", - "finetuned_model_name = finetuned_model_name.replace(\"/\", \"-\")\n", - "print(\"path to register model: \", model_path_from_job)\n", - "prepare_to_register_model = Model(\n", - " path=model_path_from_job,\n", - " type=AssetTypes.MLFLOW_MODEL,\n", - " name=finetuned_model_name,\n", - " version=timestamp, # use timestamp as version to avoid version conflict\n", - " description=model_name + \" fine tuned model for translation wmt16 en to ro\",\n", - ")\n", - "print(\"prepare to register model: \\n\", prepare_to_register_model)\n", - "# register the model from pipeline job output\n", - "registered_model = workspace_ml_client.models.create_or_update(\n", - " prepare_to_register_model\n", - ")\n", - "print(\"registered model: \\n\", registered_model)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 7. Deploy the fine tuned model to an online endpoint\n", - "Online endpoints give a durable REST API that can be used to integrate with applications that need to use the model." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import time, sys\n", - "from azure.ai.ml.entities import ManagedOnlineEndpoint, ManagedOnlineDeployment\n", - "\n", - "# Create online endpoint - endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n", - "\n", - "online_endpoint_name = \"translation-en-ro-\" + timestamp\n", - "# create an online endpoint\n", - "endpoint = ManagedOnlineEndpoint(\n", - " name=online_endpoint_name,\n", - " description=\"Online endpoint for \"\n", - " + registered_model.name\n", - " + \", fine tuned model for emotion detection\",\n", - " auth_mode=\"key\",\n", - ")\n", - "workspace_ml_client.begin_create_or_update(endpoint).wait()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can find here the list of SKU's supported for deployment - [Managed online endpoints SKU list](https://learn.microsoft.com/en-us/azure/machine-learning/reference-managed-online-endpoints-vm-sku-list)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# create a deployment\n", - "demo_deployment = ManagedOnlineDeployment(\n", - " name=\"demo\",\n", - " endpoint_name=online_endpoint_name,\n", - " model=registered_model.id,\n", - " instance_type=\"Standard_DS3_v2\",\n", - " instance_count=1,\n", - ")\n", - "workspace_ml_client.online_deployments.begin_create_or_update(demo_deployment).wait()\n", - "endpoint.traffic = {\"demo\": 100}\n", - "workspace_ml_client.begin_create_or_update(endpoint).result()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 8. Test the endpoint with sample data\n", - "\n", - "We will fetch some sample data from the test dataset and submit to online endpoint for inference. We will then show the display the scored labels alongside the ground truth labels" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# read ./wmt16-en-ro-dataset/small_test.jsonl into a pandas dataframe\n", - "import pandas as pd\n", - "import json\n", - "\n", - "test_df = pd.read_json(\"./wmt16-en-ro-dataset/test.jsonl\", orient=\"records\", lines=True)\n", - "# take 1 random sample\n", - "test_df = test_df.sample(n=1)\n", - "# rebuild index\n", - "test_df.reset_index(drop=True, inplace=True)\n", - "test_df.head(1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# create a json object with the key as \"inputs\" and value as a list of values from the en column of the test dataframe\n", - "test_json = {\"inputs\": {\"input_string\": test_df[\"en\"].tolist()}}\n", - "# save the json object to a file named sample_score.json in the ./wmt16-en-ro-dataset folder\n", - "with open(\"./wmt16-en-ro-dataset/sample_score.json\", \"w\") as f:\n", - " json.dump(test_json, f)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "> If the input data is long or number of records are too may, you may run into the following error: \"Failed to test real-time endpoint\n", - "upstream request timeout Please check this guide to understand why this error code might have been returned [https://docs.microsoft.com/en-us/azure/machine-learning/how-to-troubleshoot-online-endpoints#http-status-codes]\". Try to submit smaller and fewer inputs." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# score the sample_score.json file using the online endpoint with the azureml endpoint invoke method\n", - "response = workspace_ml_client.online_endpoints.invoke(\n", - " endpoint_name=online_endpoint_name,\n", - " deployment_name=\"demo\",\n", - " request_file=\"./wmt16-en-ro-dataset/sample_score.json\",\n", - ")\n", - "print(\"raw response: \\n\", response, \"\\n\")\n", - "# convert the response to a pandas dataframe\n", - "response_df = pd.read_json(response)\n", - "# rename the column to predicted_tags\n", - "response_df.rename(columns={0: \"predicted_translation\"}, inplace=True)\n", - "response_df.head(1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# merge the test dataframe and the response dataframe on the index\n", - "merged_df = pd.merge(test_df, response_df, left_index=True, right_index=True)\n", - "merged_df.head(1)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 9. Delete the online endpoint\n", - "Don't forget to delete the online endpoint, else you will leave the billing meter running for the compute used by the endpoint" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "workspace_ml_client.online_endpoints.begin_delete(name=online_endpoint_name).wait()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "notebooks-venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/sdk/python/foundation-models/system/finetune/translation/wmt16-en-ro-dataset/download-dataset.py b/sdk/python/foundation-models/system/finetune/translation/wmt16-en-ro-dataset/download-dataset.py deleted file mode 100644 index d945ea740e..0000000000 --- a/sdk/python/foundation-models/system/finetune/translation/wmt16-en-ro-dataset/download-dataset.py +++ /dev/null @@ -1,45 +0,0 @@ -# import library to parse command line arguments -import argparse, os - -parser = argparse.ArgumentParser() -# add an argument to specify a dataset name to download -parser.add_argument("--dataset", type=str, default="wmt16", help="dataset name") -# add an argument to specify a dataset name to download -parser.add_argument( - "--dataset_subset", type=str, default="ro-en", help="dataset subset name" -) -# argument to save a fraction of the dataset -parser.add_argument( - "--fraction", type=float, default=0.05, help="fraction of the dataset to save" -) -# add an argument to specify the directory to download the dataset to -parser.add_argument( - "--download_dir", - type=str, - default="data", - help="directory to download the dataset to", -) -args = parser.parse_args() - -# create the download directory if it does not exist -if not os.path.exists(args.download_dir): - os.makedirs(args.download_dir) - - -def format_translation(example): - for key in example["translation"]: - example[key] = example["translation"][key] - return example - - -# import hugging face datasets library -from datasets import load_dataset, get_dataset_split_names - -for split in get_dataset_split_names(args.dataset, args.dataset_subset): - # load the split of the dataset - dataset = load_dataset(args.dataset, args.dataset_subset, split=split) - dataset = dataset.map(format_translation, remove_columns=["translation"]) - # save the split of the dataset to the download directory as json lines file - dataset.select(range(int(dataset.num_rows * args.fraction))).to_json( - os.path.join(args.download_dir, f"{split}.jsonl") - ) diff --git a/sdk/python/foundation-models/system/finetune/translation/wmt16-en-ro-dataset/sample_score.json b/sdk/python/foundation-models/system/finetune/translation/wmt16-en-ro-dataset/sample_score.json deleted file mode 100644 index 5a9a88d089..0000000000 --- a/sdk/python/foundation-models/system/finetune/translation/wmt16-en-ro-dataset/sample_score.json +++ /dev/null @@ -1 +0,0 @@ -{"inputs": {"input_string": ["8 Poll Numbers That Show Donald Trump Is For Real"]}} \ No newline at end of file diff --git a/sdk/python/foundation-models/system/inference/automatic-speech-recognition/asr-online-endpoint.ipynb b/sdk/python/foundation-models/system/inference/automatic-speech-recognition/asr-online-endpoint.ipynb deleted file mode 100644 index ad5ae41c22..0000000000 --- a/sdk/python/foundation-models/system/inference/automatic-speech-recognition/asr-online-endpoint.ipynb +++ /dev/null @@ -1,234 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Automatic Speech Recognition Inference using Online Endpoints\n", - "\n", - "This sample shows how to deploy `automatic-speech-recognition` type models to an online endpoint for inference.\n", - "\n", - "### Task\n", - "`automatic-speech-recognition` (ASR) converts a speech signal to text, mapping a sequence of audio inputs to text outputs. Virtual assistants like Siri and Alexa use ASR models to help users everyday, and there are many other useful user-facing applications like live captioning and note-taking during meetings.\n", - "\n", - "### Model\n", - "Models that can perform the `automatic-speech-recognition` task are tagged with `task: automatic-speech-recognition`. We will use the `openai-whisper-large` model in this notebook. If you opened this notebook from a specific model card, remember to replace the specific model name. If you don't find a model that suits your scenario or domain, you can discover and [import models from HuggingFace hub](../../import/import-model-from-huggingface.ipynb) and then use them for inference. \n", - "\n", - "### Inference data\n", - "We will use custom audio files that have been uploaded to the cloud. \\\n", - "You can replace the links with any audio file stored on the cloud and verify inference.\n", - "- Most common audio formats (m4a, wav, flac, wma, mp3, etc.) are supported.\n", - "- The whisper model can process only 30 seconds of data at a time, so if the file you upload is longer than 30 seconds, only the first 30 seconds will be transcribed. This can be circumvented by splitting the file into 30 second chunks.\n", - "\n", - "### Outline\n", - "* Set up pre-requisites.\n", - "* Pick a model to deploy.\n", - "* Prepare data for inference. \n", - "* Deploy the model for real time inference.\n", - "* Test the endpoint\n", - "* Clean up resources." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 1. Set up pre-requisites\n", - "* Install dependencies\n", - "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace ``, `` and `` below.\n", - "* Connect to `azureml` system registry" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azure.ai.ml import MLClient\n", - "from azure.identity import (\n", - " DefaultAzureCredential,\n", - " InteractiveBrowserCredential,\n", - " ClientSecretCredential,\n", - ")\n", - "from azure.ai.ml.entities import AmlCompute\n", - "import time\n", - "\n", - "try:\n", - " credential = DefaultAzureCredential()\n", - " credential.get_token(\"https://management.azure.com/.default\")\n", - "except Exception as ex:\n", - " credential = InteractiveBrowserCredential()\n", - "\n", - "workspace_ml_client = MLClient(\n", - " credential,\n", - " subscription_id=\"\",\n", - " resource_group_name=\"\",\n", - " workspace_name=\"\",\n", - ")\n", - "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n", - "registry_ml_client = MLClient(credential, registry_name=\"azureml-preview\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 2. Pick a model to deploy\n", - "\n", - "Browse models in the Model Catalog in the AzureML Studio, filtering by the `automatic-speech-recognition` task. In this example, we use the `openai-whisper-large` model. If you have opened this notebook for a different model, replace the model name and version accordingly. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "model_name = \"openai-whisper-large\"\n", - "model_version = \"1\"\n", - "foundation_model = registry_ml_client.models.get(model_name, model_version)\n", - "print(\n", - " \"\\n\\nUsing model name: {0}, version: {1}, id: {2} for inferencing\".format(\n", - " foundation_model.name, foundation_model.version, foundation_model.id\n", - " )\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 3. Deploy the model to an online endpoint\n", - "Online endpoints give a durable REST API that can be used to integrate with applications that need to use the model." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import time, sys\n", - "from azure.ai.ml.entities import (\n", - " ManagedOnlineEndpoint,\n", - " ManagedOnlineDeployment,\n", - " OnlineRequestSettings,\n", - ")\n", - "\n", - "# Create online endpoint - endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n", - "timestamp = int(time.time())\n", - "online_endpoint_name = \"asr-\" + str(timestamp)\n", - "# create an online endpoint\n", - "endpoint = ManagedOnlineEndpoint(\n", - " name=online_endpoint_name,\n", - " description=\"Online endpoint for \"\n", - " + foundation_model.name\n", - " + \", for automatic-speech-recognition task\",\n", - " auth_mode=\"key\",\n", - ")\n", - "workspace_ml_client.begin_create_or_update(endpoint).wait()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# create a deployment\n", - "demo_deployment = ManagedOnlineDeployment(\n", - " name=\"demo\",\n", - " endpoint_name=online_endpoint_name,\n", - " model=foundation_model.id,\n", - " instance_type=\"Standard_DS4_v2\",\n", - " instance_count=1,\n", - " request_settings=OnlineRequestSettings(\n", - " request_timeout_ms=60000,\n", - " ),\n", - ")\n", - "workspace_ml_client.online_deployments.begin_create_or_update(demo_deployment).wait()\n", - "endpoint.traffic = {\"demo\": 100}\n", - "workspace_ml_client.begin_create_or_update(endpoint).result()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4. Test the endpoint with sample data\n", - "\n", - "We will fetch the sample scoring request and submit to online endpoint for inference. We will then show the display the scored labels alongside the ground truth labels" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "\n", - "# score the sample_score.json file using the online endpoint with the azureml endpoint invoke method\n", - "response = workspace_ml_client.online_endpoints.invoke(\n", - " endpoint_name=online_endpoint_name,\n", - " deployment_name=\"demo\",\n", - " request_file=\"./sample-request/sample_score.json\",\n", - ")\n", - "print(\"raw response: \\n\", response, \"\\n\")\n", - "# convert the json response to a pandas dataframe\n", - "response_df = pd.read_json(response)\n", - "response_df.head()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 5. Delete the online endpoint\n", - "Don't forget to delete the online endpoint, else you will leave the billing meter running for the compute used by the endpoint" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "workspace_ml_client.online_endpoints.begin_delete(name=online_endpoint_name).wait()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "base", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.13" - }, - "vscode": { - "interpreter": { - "hash": "2f394aca7ca06fed1e6064aef884364492d7cdda3614a461e02e6407fc40ba69" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/sdk/python/foundation-models/system/inference/automatic-speech-recognition/sample-request/sample_score.json b/sdk/python/foundation-models/system/inference/automatic-speech-recognition/sample-request/sample_score.json deleted file mode 100644 index 0c87bf55cd..0000000000 --- a/sdk/python/foundation-models/system/inference/automatic-speech-recognition/sample-request/sample_score.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "inputs": { - "audio": ["https://audiovisionfiles.blob.core.windows.net/audio/audio.m4a", "https://audiovisionfiles.blob.core.windows.net/audio/audio.m4a"], - "language": ["en", "fr"] - } -} \ No newline at end of file diff --git a/sdk/python/foundation-models/system/inference/fill-mask/book-corpus-dataset/download-dataset.py b/sdk/python/foundation-models/system/inference/fill-mask/book-corpus-dataset/download-dataset.py deleted file mode 100644 index dc41e786f3..0000000000 --- a/sdk/python/foundation-models/system/inference/fill-mask/book-corpus-dataset/download-dataset.py +++ /dev/null @@ -1,34 +0,0 @@ -# import library to parse command line arguments -import argparse, os - -parser = argparse.ArgumentParser() -# add an argument to specify a dataset name to download -parser.add_argument("--dataset", type=str, default="bookcorpus", help="dataset name") -# add an argument to specify the config name of the dataset -parser.add_argument( - "--fraction", type=float, default=0.001, help="fraction of the dataset to save" -) -# add an argument to specify the directory to download the dataset to -parser.add_argument( - "--download_dir", - type=str, - default="./", - help="directory to download the dataset to", -) -args = parser.parse_args() - -# create the download directory if it does not exist -if not os.path.exists(args.download_dir): - os.makedirs(args.download_dir) - -# import hugging face datasets library -from datasets import load_dataset, get_dataset_split_names - -for split in get_dataset_split_names(args.dataset): - print(f"Loading {split} split of {args.dataset} dataset...") - # load the split of the dataset - dataset = load_dataset(args.dataset, split=split) - # save the split of the dataset to the download directory as json lines file - dataset.select(range(int(dataset.num_rows * args.fraction))).to_json( - os.path.join(args.download_dir, f"{split}.jsonl") - ) diff --git a/sdk/python/foundation-models/system/inference/fill-mask/book-corpus-dataset/sample_score.json b/sdk/python/foundation-models/system/inference/fill-mask/book-corpus-dataset/sample_score.json deleted file mode 100644 index 88135323be..0000000000 --- a/sdk/python/foundation-models/system/inference/fill-mask/book-corpus-dataset/sample_score.json +++ /dev/null @@ -1 +0,0 @@ -{"inputs": {"input_string": ["`` was there no [MASK] between you two? \\'\\'"]}} \ No newline at end of file diff --git a/sdk/python/foundation-models/system/inference/fill-mask/fill-mask-batch-endpoint.ipynb b/sdk/python/foundation-models/system/inference/fill-mask/fill-mask-batch-endpoint.ipynb deleted file mode 100644 index bd7d8ad217..0000000000 --- a/sdk/python/foundation-models/system/inference/fill-mask/fill-mask-batch-endpoint.ipynb +++ /dev/null @@ -1,517 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Fill Mask Inference using Batch Endpoints\n", - "\n", - "This sample shows how to deploy `fill-mask` type models to a batch endpoint for inference.\n", - "\n", - "### Task\n", - "`fill-mask` task is about predicting masked words in a sentence. Models that perform this have a good understanding of the language structure and domain of the dataset that they are trained on. `fill-mask` models are typically used as foundation models for more scenario oriented tasks such as `text-classification` or `token-classification`.\n", - "\n", - "### Model\n", - "Models that can perform the `fill-mask` task are tagged with `task: fill-mask`. We will use the `bert-base-uncased` model in this notebook. If you opened this notebook from a specific model card, remember to replace the specific model name. If you don't find a model that suits your scenario or domain, you can discover and [import models from HuggingFace hub](../../import/import-model-from-huggingface.ipynb) and then use them for inference. \n", - "\n", - "### Inference data\n", - "We will use the [book corpus](https://huggingface.co/datasets/bookcorpus) dataset. A copy of this dataset is available in the [book-corpus-dataset](./book-corpus-dataset/) folder. \n", - "\n", - "### Outline\n", - "* Set up pre-requisites.\n", - "* Pick a model to deploy.\n", - "* Prepare data for inference. \n", - "* Deploy the model for batch inference.\n", - "* Run a batch inference job.\n", - "* Review inference predictions.\n", - "* Clean up resources." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 1. Set up pre-requisites\n", - "* Install dependencies.\n", - "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace ``, `` and `` below.\n", - "* Connect to `azureml` system registry.\n", - "* Create or update compute." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Import packages used by the following code snippets\n", - "import csv\n", - "import json\n", - "import os\n", - "import random\n", - "import time\n", - "\n", - "import pandas as pd\n", - "import urllib.request\n", - "\n", - "from azure.ai.ml import Input, MLClient\n", - "from azure.ai.ml.constants import AssetTypes\n", - "from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential\n", - "from azure.ai.ml.entities import (\n", - " AmlCompute,\n", - " BatchDeployment,\n", - " BatchEndpoint,\n", - " BatchRetrySettings,\n", - " Model,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "subscription_id = \"\"\n", - "resource_group_name = \"\"\n", - "workspace_name = \"\"" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Connect to workspace and registry using ML clients." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "try:\n", - " credential = DefaultAzureCredential()\n", - " credential.get_token(\"https://management.azure.com/.default\")\n", - "except Exception as ex:\n", - " credential = InteractiveBrowserCredential()\n", - "\n", - "workspace_ml_client = MLClient(\n", - " credential,\n", - " subscription_id=subscription_id,\n", - " resource_group_name=resource_group_name,\n", - " workspace_name=workspace_name,\n", - ")\n", - "# The models, fine tuning pipelines, and environments are available in the AzureML system registry, \"azureml\"\n", - "registry_ml_client = MLClient(credential, registry_name=\"azureml\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Create a compute cluster.\n", - "Use the model card from the AzureML system registry to check the minimum required inferencing SKU, referenced as `size` below. If you already have a sufficient compute cluster, you can simply define the name in `compute_name` in the following code block. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "compute_name = \"cpu-cluster\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "compute_cluster = AmlCompute(\n", - " name=compute_name,\n", - " description=\"An AML compute cluster\",\n", - " size=\"Standard_DS3_V2\",\n", - " min_instances=0,\n", - " max_instances=3,\n", - " idle_time_before_scale_down=120,\n", - ") # 120 seconds\n", - "\n", - "workspace_ml_client.begin_create_or_update(compute_cluster)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 2. Pick a model to deploy\n", - "\n", - "Browse models in the Model Catalog in the AzureML Studio, filtering by the `fill-mask` task. In this example, we use the `bert-base-uncased` model. If you have opened this notebook for a different model, replace the model name and version accordingly. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "model_name = \"bert-base-uncased\"\n", - "model_version = \"1\"\n", - "foundation_model = registry_ml_client.models.get(model_name, model_version)\n", - "print(\n", - " f\"Using model name: {foundation_model.name}, version: {foundation_model.version}, id: {foundation_model.id} for inferencing.\"\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 3. Prepare data for inference.\n", - "\n", - "A copy of the book corpus dataset is available in the [book-corpus-dataset](./book-corpus-dataset/) folder. The next few cells show basic data preparation:\n", - "* Visualize some data rows.\n", - "* We will `` one word in each sentence so that the model can predict the masked words.\n", - "* We want this sample to run quickly, so save a smaller dataset containing a fraction of the original." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Define directories and filenames as variables\n", - "dataset_dir = \"book-corpus-dataset\"\n", - "training_datafile = \"train.jsonl\"\n", - "\n", - "batch_dir = \"batch\"\n", - "batch_inputs_dir = os.path.join(batch_dir, \"inputs\")\n", - "batch_input_file = \"batch_input.csv\"\n", - "os.makedirs(batch_dir, exist_ok=True)\n", - "os.makedirs(batch_inputs_dir, exist_ok=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Load the ./book-corpus-dataset/train.jsonl file into a pandas dataframe and show the first 5 rows\n", - "pd.set_option(\n", - " \"display.max_colwidth\", 0\n", - ") # Set the max column width to 0 to display the full text\n", - "train_df = pd.read_json(os.path.join(\".\", dataset_dir, training_datafile), lines=True)\n", - "train_df.head()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Transform the data using the masking token." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Get the right mask token from huggingface\n", - "with urllib.request.urlopen(f\"https://huggingface.co/api/models/{model_name}\") as url:\n", - " data = json.load(url)\n", - " mask_token = data[\"mask_token\"]\n", - "\n", - "# Take the value of the \"text\" column, replace a random word with the mask token, and save the result in the \"masked_text\" column\n", - "train_df[\"masked_text\"] = train_df[\"text\"].apply(\n", - " lambda x: x.replace(random.choice(x.split()), mask_token, 1)\n", - ")\n", - "\n", - "# Save the train_df dataframe to a jsonl file in the ./book-corpus-dataset/batch folder with the `masked_` prefix\n", - "masked_datafile = os.path.join(batch_dir, \"masked_\" + training_datafile)\n", - "train_df.to_json(masked_datafile, orient=\"records\", lines=True)\n", - "train_df.head()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Save a fraction of the input data to files of smaller batches for testing. The MLflow model's signature specifies the input should be a column named `\"input_string\"`, so rename the transformed `\"masked_text\"` column. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "batch_df = (\n", - " train_df[[\"masked_text\"]]\n", - " .rename(columns={\"masked_text\": \"input_string\"})\n", - " .sample(frac=0.1)\n", - ")\n", - "\n", - "# Divide this into files of 100 rows each\n", - "batch_size_per_predict = 100\n", - "for i in range(0, len(batch_df), batch_size_per_predict):\n", - " j = i + batch_size_per_predict\n", - " batch_df[i:j].to_csv(\n", - " os.path.join(batch_inputs_dir, str(i) + batch_input_file), quoting=csv.QUOTE_ALL\n", - " )\n", - "\n", - "# Check out the first and last file name created\n", - "input_files = os.listdir(batch_inputs_dir)\n", - "print(f\"{input_files[0]} to {str(i)}{batch_input_file}.\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4. Deploy the model to a batch endpoint\n", - "Batch endpoints are endpoints that are used to do batch inferencing on large volumes of data over a period of time. The endpoints receive pointers to data and run jobs asynchronously to process the data in parallel on compute clusters. Batch endpoints store outputs to a data store for further analysis. For more information on batch endpoints and deployments see [What are batch endpoints?](https://learn.microsoft.com/en-us/azure/machine-learning/concept-endpoints?view=azureml-api-2#what-are-batch-endpoints).\n", - "\n", - "* Create a batch endpoint.\n", - "* Create a batch deployment.\n", - "* Set the deployment as default; doing so allows invoking the endpoint without specifying the deployment's name.\n", - "\n", - "#### Create the endpoint." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n", - "timestamp = int(time.time())\n", - "endpoint_name = \"fill-mask-\" + str(timestamp)\n", - "\n", - "endpoint = BatchEndpoint(\n", - " name=endpoint_name,\n", - " description=\"Batch endpoint for \" + foundation_model.name + \", for fill-mask task\",\n", - ")\n", - "workspace_ml_client.begin_create_or_update(endpoint).result()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Create the deployment." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "deployment_name = \"demo\"\n", - "\n", - "deployment = BatchDeployment(\n", - " name=deployment_name,\n", - " endpoint_name=endpoint_name,\n", - " model=foundation_model.id,\n", - " compute=compute_name,\n", - " error_threshold=0,\n", - " instance_count=1,\n", - " logging_level=\"info\",\n", - " max_concurrency_per_instance=2,\n", - " mini_batch_size=10,\n", - " output_file_name=\"predictions.csv\",\n", - " retry_settings=BatchRetrySettings(max_retries=3, timeout=300),\n", - ")\n", - "workspace_ml_client.begin_create_or_update(deployment).result()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Set the deployment as default." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "endpoint = workspace_ml_client.batch_endpoints.get(endpoint_name)\n", - "endpoint.defaults.deployment_name = deployment_name\n", - "workspace_ml_client.begin_create_or_update(endpoint).wait()\n", - "\n", - "endpoint = workspace_ml_client.batch_endpoints.get(endpoint_name)\n", - "print(f\"The default deployment is {endpoint.defaults.deployment_name}\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 5. Run a batch inference job.\n", - "Invoke the batch endpoint with the input parameter pointing to the folder containing the batch inference input. This creates a pipeline job using the default deployment in the endpoint. Wait for the job to complete." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "input = Input(path=batch_inputs_dir, type=AssetTypes.URI_FOLDER)\n", - "\n", - "job = workspace_ml_client.batch_endpoints.invoke(\n", - " endpoint_name=endpoint.name, input=input\n", - ")\n", - "\n", - "workspace_ml_client.jobs.stream(job.name)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 6. Review inference predictions.\n", - "Download the predictions from the job output and review the predictions using a dataframe." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "scoring_job = list(workspace_ml_client.jobs.list(parent_job_name=job.name))[0]\n", - "\n", - "workspace_ml_client.jobs.download(\n", - " name=scoring_job.name, download_path=batch_dir, output_name=\"score\"\n", - ")\n", - "\n", - "predictions_file = os.path.join(batch_dir, \"named-outputs\", \"score\", \"predictions.csv\")\n", - "\n", - "# Load the batch predictions file with no headers into a dataframe and set your column names\n", - "score_df = pd.read_csv(\n", - " predictions_file,\n", - " header=None,\n", - " names=[\"row_number_per_file\", \"prediction\", \"batch_input_file_name\"],\n", - ")\n", - "score_df.head()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Record the input file name and set the original index value in the `'index'` column for each input file. Join the `train_df` with ground truth into the input dataframe." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "input_df = []\n", - "for file in input_files:\n", - " input = pd.read_csv(os.path.join(batch_inputs_dir, file), index_col=0)\n", - " input.reset_index(inplace=True)\n", - " input[\"batch_input_file_name\"] = file\n", - " input.reset_index(names=[\"row_number_per_file\"], inplace=True)\n", - " input_df.append(input)\n", - "input_df = pd.concat(input_df)\n", - "input_df.set_index(\"index\", inplace=True)\n", - "input_df = input_df.join(train_df).drop(columns=[\"input_string\"])\n", - "\n", - "input_df.head()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Join the predictions with input data to compare them to ground truth." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df = pd.merge(\n", - " input_df, score_df, how=\"inner\", on=[\"row_number_per_file\", \"batch_input_file_name\"]\n", - ")\n", - "\n", - "# Show the first few rows of the results\n", - "df.head(20)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 7. Clean up resources\n", - "Batch endpoints use compute resources only when jobs are submitted. You can keep the batch endpoint for your reference without worrying about compute bills, or choose to delete the endpoint. If you created your compute cluster to have zero minimum instances and scale down soon after being idle, you won't be charged for an unused compute." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "workspace_ml_client.batch_endpoints.begin_delete(name=endpoint_name).result()\n", - "workspace_ml_client.compute.begin_delete(name=compute_name).result()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "base", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.12" - }, - "vscode": { - "interpreter": { - "hash": "2f394aca7ca06fed1e6064aef884364492d7cdda3614a461e02e6407fc40ba69" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/sdk/python/foundation-models/system/inference/fill-mask/fill-mask-online-endpoint.ipynb b/sdk/python/foundation-models/system/inference/fill-mask/fill-mask-online-endpoint.ipynb deleted file mode 100644 index 6466224c76..0000000000 --- a/sdk/python/foundation-models/system/inference/fill-mask/fill-mask-online-endpoint.ipynb +++ /dev/null @@ -1,323 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Fill Mask Inference using Online Endpoints\n", - "\n", - "This sample shows how to deploy `fill-mask` type models to an online endpoint for inference.\n", - "\n", - "### Task\n", - "`fill-mask` task is about predicting masked words in a sentence. Models that perform this have a good understanding of the language structure and domain of the dataset that they are trained on. `fill-mask` models are typically used as foundation models for more scenario oriented tasks such as `text-classification` or `token-classification`.\n", - "\n", - "### Model\n", - "Models that can perform the `fill-mask` task are tagged with `task: fill-mask`. We will use the `bert-base-uncased` model in this notebook. If you opened this notebook from a specific model card, remember to replace the specific model name. If you don't find a model that suits your scenario or domain, you can discover and [import models from HuggingFace hub](../../import/import-model-from-huggingface.ipynb) and then use them for inference. \n", - "\n", - "### Inference data\n", - "We will use the [book corpus](https://huggingface.co/datasets/bookcorpus) dataset. A copy of this dataset is available in the [book-corpus-dataset](./book-corpus-dataset/) folder. \n", - "\n", - "### Outline\n", - "* Set up pre-requisites.\n", - "* Pick a model to deploy.\n", - "* Prepare data for inference. \n", - "* Deploy the model for real time inference.\n", - "* Test the endpoint\n", - "* Clean up resources." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 1. Set up pre-requisites\n", - "* Install dependencies\n", - "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace ``, `` and `` below.\n", - "* Connect to `azureml` system registry" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azure.ai.ml import MLClient\n", - "from azure.identity import (\n", - " DefaultAzureCredential,\n", - " InteractiveBrowserCredential,\n", - " ClientSecretCredential,\n", - ")\n", - "from azure.ai.ml.entities import AmlCompute\n", - "import time\n", - "\n", - "try:\n", - " credential = DefaultAzureCredential()\n", - " credential.get_token(\"https://management.azure.com/.default\")\n", - "except Exception as ex:\n", - " credential = InteractiveBrowserCredential()\n", - "\n", - "workspace_ml_client = MLClient(\n", - " credential,\n", - " subscription_id=\"\",\n", - " resource_group_name=\"\",\n", - " workspace_name=\"\",\n", - ")\n", - "# The models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n", - "registry_ml_client = MLClient(credential, registry_name=\"azureml-preview\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 2. Pick a model to deploy\n", - "\n", - "Browse models in the Model Catalog in the AzureML Studio, filtering by the `fill-mask` task. In this example, we use the `bert-base-uncased` model. If you have opened this notebook for a different model, replace the model name and version accordingly. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "model_name = \"bert-base-uncased\"\n", - "model_version = \"3\"\n", - "foundation_model = registry_ml_client.models.get(model_name, model_version)\n", - "print(\n", - " \"\\n\\nUsing model name: {0}, version: {1}, id: {2} for inferencing\".format(\n", - " foundation_model.name, foundation_model.version, foundation_model.id\n", - " )\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 3. Prepare data for inference.\n", - "\n", - "A copy of the book corpus dataset is available in the [book-corpus-dataset](./book-corpus-dataset/) folder. The next few cells show basic data preparation:\n", - "* Visualize some data rows\n", - "* We will `` one word in each sentence so that the model can predict the masked words.\n", - "* Save few samples in the format that can be passed as input to the online-inference endpoint." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# load the ./book-corpus-dataset/train.jsonl file into a pandas dataframe and show the first 5 rows\n", - "import pandas as pd\n", - "\n", - "pd.set_option(\n", - " \"display.max_colwidth\", 0\n", - ") # set the max column width to 0 to display the full text\n", - "train_df = pd.read_json(\"./book-corpus-dataset/train.jsonl\", lines=True)\n", - "train_df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Get the right mask token from huggingface\n", - "import urllib.request, json\n", - "\n", - "with urllib.request.urlopen(f\"https://huggingface.co/api/models/{model_name}\") as url:\n", - " data = json.load(url)\n", - " mask_token = data[\"mask_token\"]\n", - "\n", - "# take the value of the \"text\" column, replace a random word with the mask token and save the result in the \"masked_text\" column\n", - "import random, os\n", - "\n", - "train_df[\"masked_text\"] = train_df[\"text\"].apply(\n", - " lambda x: x.replace(random.choice(x.split()), mask_token, 1)\n", - ")\n", - "# save the train_df dataframe to a jsonl file in the ./book-corpus-dataset folder with the masked_ prefix\n", - "train_df.to_json(\n", - " os.path.join(\".\", \"book-corpus-dataset\", \"masked_train.jsonl\"),\n", - " orient=\"records\",\n", - " lines=True,\n", - ")\n", - "train_df.head()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4. Deploy the model to an online endpoint\n", - "Online endpoints give a durable REST API that can be used to integrate with applications that need to use the model." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import time, sys\n", - "from azure.ai.ml.entities import (\n", - " ManagedOnlineEndpoint,\n", - " ManagedOnlineDeployment,\n", - " OnlineRequestSettings,\n", - ")\n", - "\n", - "# Create online endpoint - endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n", - "timestamp = int(time.time())\n", - "online_endpoint_name = \"fill-mask-\" + str(timestamp)\n", - "# create an online endpoint\n", - "endpoint = ManagedOnlineEndpoint(\n", - " name=online_endpoint_name,\n", - " description=\"Online endpoint for \" + foundation_model.name + \", for fill-mask task\",\n", - " auth_mode=\"key\",\n", - ")\n", - "workspace_ml_client.begin_create_or_update(endpoint).wait()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# create a deployment\n", - "demo_deployment = ManagedOnlineDeployment(\n", - " name=\"demo\",\n", - " endpoint_name=online_endpoint_name,\n", - " model=foundation_model.id,\n", - " instance_type=\"Standard_DS2_v2\",\n", - " instance_count=1,\n", - " request_settings=OnlineRequestSettings(\n", - " request_timeout_ms=60000,\n", - " ),\n", - ")\n", - "workspace_ml_client.online_deployments.begin_create_or_update(demo_deployment).wait()\n", - "endpoint.traffic = {\"demo\": 100}\n", - "workspace_ml_client.begin_create_or_update(endpoint).result()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 5. Test the endpoint with sample data\n", - "\n", - "We will fetch some sample data from the test dataset and submit to online endpoint for inference. We will then show the display the scored labels alongside the ground truth labels" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "\n", - "# read the ./book-corpus-dataset/masked_train.jsonl file into a pandas dataframe\n", - "df = pd.read_json(\"./book-corpus-dataset/masked_train.jsonl\", lines=True)\n", - "# escape single and double quotes in the masked_text column\n", - "df[\"masked_text\"] = df[\"masked_text\"].str.replace(\"'\", \"\\\\'\").str.replace('\"', '\\\\\"')\n", - "# pick 1 random row\n", - "sample_df = df.sample(1)\n", - "# create a json object with the key as \"inputs\" and value as a list of values from the masked_text column of the sample_df dataframe\n", - "test_json = {\"inputs\": {\"input_string\": sample_df[\"masked_text\"].tolist()}}\n", - "# save the json object to a file named sample_score.json in the ./book-corpus-dataset folder\n", - "with open(os.path.join(\".\", \"book-corpus-dataset\", \"sample_score.json\"), \"w\") as f:\n", - " json.dump(test_json, f)\n", - "sample_df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# score the sample_score.json file using the online endpoint with the azureml endpoint invoke method\n", - "response = workspace_ml_client.online_endpoints.invoke(\n", - " endpoint_name=online_endpoint_name,\n", - " deployment_name=\"demo\",\n", - " request_file=\"./book-corpus-dataset/sample_score.json\",\n", - ")\n", - "print(\"raw response: \\n\", response, \"\\n\")\n", - "# convert the json response to a pandas dataframe\n", - "response_df = pd.read_json(response)\n", - "response_df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# compare the predicted squences with the ground truth sequence\n", - "compare_df = pd.DataFrame(\n", - " {\n", - " \"ground_truth_sequence\": sample_df[\"text\"].tolist() * 5,\n", - " \"predicted_sequence\": response_df[\"sequence\"].tolist(),\n", - " \"score\": response_df[\"score\"].tolist(),\n", - " }\n", - ")\n", - "compare_df.head()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 6. Delete the online endpoint\n", - "Don't forget to delete the online endpoint, else you will leave the billing meter running for the compute used by the endpoint" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "workspace_ml_client.online_endpoints.begin_delete(name=online_endpoint_name).wait()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "base", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.13" - }, - "vscode": { - "interpreter": { - "hash": "2f394aca7ca06fed1e6064aef884364492d7cdda3614a461e02e6407fc40ba69" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/sdk/python/foundation-models/system/inference/question-answering/question-answering-batch-endpoint.ipynb b/sdk/python/foundation-models/system/inference/question-answering/question-answering-batch-endpoint.ipynb deleted file mode 100644 index 8ebdd2820f..0000000000 --- a/sdk/python/foundation-models/system/inference/question-answering/question-answering-batch-endpoint.ipynb +++ /dev/null @@ -1,485 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Question Answering Inference using Batch Endpoints\n", - "\n", - "This sample shows how to deploy `question-answering` type models to a batch endpoint for inference.\n", - "\n", - "### Task\n", - "`question-answering` tasks return an answer given a question. There are two common types of `question-answering` tasks:\n", - "\n", - "* Extractive: extract the answer from the given context.\n", - "* Abstractive: generate an answer from the context that correctly answers the question.\n", - " \n", - "### Model\n", - "Models that can perform the `question-answering` task are tagged with `task: question-answering`. We will use the `deepset-minilm-uncased-squad2` model in this notebook. If you opened this notebook from a specific model card, remember to replace the specific model name. If you don't find a model that suits your scenario or domain, you can discover and [import models from HuggingFace hub](../../import/import-model-from-huggingface.ipynb) and then use them for inference. \n", - "\n", - "### Inference data\n", - "We will use the [SQUAD](https://huggingface.co/datasets/squad) dataset. A copy of this dataset is available in the [squad-dataset](./squad-dataset/) folder. The [original source](https://rajpurkar.github.io/SQuAD-explorer/) of dataset describes it as follows: _\"Stanford Question Answering Dataset (SQuAD) is a reading comprehension dataset, consisting of questions posed by crowdworkers on a set of Wikipedia articles, where the answer to every question is a segment of text, or span, from the corresponding reading passage, or the question might be unanswerable.\"_\n", - "\n", - "\n", - "### Outline\n", - "* Set up pre-requisites.\n", - "* Pick a model to deploy.\n", - "* Prepare data for inference. \n", - "* Deploy the model for batch inference.\n", - "* Run a batch inference job.\n", - "* Review inference predictions.\n", - "* Clean up resources." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 1. Set up pre-requisites\n", - "* Install dependencies.\n", - "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace ``, `` and `` below.\n", - "* Connect to `azureml` system registry.\n", - "* Create or update compute." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Import packages used by the following code snippets\n", - "import csv\n", - "import os\n", - "import time\n", - "\n", - "import pandas as pd\n", - "\n", - "from azure.ai.ml import Input, MLClient\n", - "from azure.ai.ml.constants import AssetTypes\n", - "from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential\n", - "from azure.ai.ml.entities import (\n", - " AmlCompute,\n", - " BatchDeployment,\n", - " BatchEndpoint,\n", - " BatchRetrySettings,\n", - " Model,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "subscription_id = \"\"\n", - "resource_group_name = \"\"\n", - "workspace_name = \"\"" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Connect to workspace and registry using ML clients." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "try:\n", - " credential = DefaultAzureCredential()\n", - " credential.get_token(\"https://management.azure.com/.default\")\n", - "except Exception as ex:\n", - " credential = InteractiveBrowserCredential()\n", - "\n", - "workspace_ml_client = MLClient(\n", - " credential,\n", - " subscription_id=subscription_id,\n", - " resource_group_name=resource_group_name,\n", - " workspace_name=workspace_name,\n", - ")\n", - "# The models, fine tuning pipelines, and environments are available in the AzureML system registry, \"azureml\"\n", - "registry_ml_client = MLClient(credential, registry_name=\"azureml\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Create a compute cluster.\n", - "Use the model card from the AzureML system registry to check the minimum required inferencing SKU, referenced as `size` below. If you already have a sufficient compute cluster, you can simply define the name in `compute_name` in the following code block. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "compute_name = \"cpu-cluster\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "compute_cluster = AmlCompute(\n", - " name=compute_name,\n", - " description=\"An AML compute cluster\",\n", - " size=\"Standard_DS3_V2\",\n", - " min_instances=0,\n", - " max_instances=3,\n", - " idle_time_before_scale_down=120,\n", - ") # 120 seconds\n", - "\n", - "workspace_ml_client.begin_create_or_update(compute_cluster)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 2. Pick a model to deploy\n", - "\n", - "Browse models in the Model Catalog in the AzureML Studio, filtering by the `question-answering` task. In this example, we use the `deepset-minilm-uncased-squad2` model. If you have opened this notebook for a different model, replace the model name and version accordingly. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "model_name = \"deepset-minilm-uncased-squad2\"\n", - "model_version = \"1\"\n", - "foundation_model = registry_ml_client.models.get(model_name, model_version)\n", - "print(\n", - " f\"Using model name: {foundation_model.name}, version: {foundation_model.version}, id: {foundation_model.id} for inferencing.\"\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 3. Prepare data for inference.\n", - "\n", - "A copy of the SQUAD dataset is available in the [squad-dataset](./squad-dataset/) folder. The next few cells show basic data preparation:\n", - "* Visualize some data rows\n", - "* We want this sample to run quickly, so save a smaller dataset containing a fraction of the original." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Define directories and filenames as variables\n", - "dataset_dir = \"squad-dataset\"\n", - "training_datafile = \"train.jsonl\"\n", - "\n", - "batch_dir = \"batch\"\n", - "batch_inputs_dir = os.path.join(batch_dir, \"inputs\")\n", - "batch_input_file = \"batch_input.csv\"\n", - "os.makedirs(batch_dir, exist_ok=True)\n", - "os.makedirs(batch_inputs_dir, exist_ok=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Load the ./squad-dataset/train.jsonl file into a pandas dataframe and show the first 5 rows\n", - "pd.set_option(\n", - " \"display.max_colwidth\", 0\n", - ") # Set the max column width to 0 to display the full text\n", - "train_df = pd.read_json(os.path.join(\".\", dataset_dir, training_datafile), lines=True)\n", - "train_df.head()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Save a fraction of the input data to files of smaller batches for testing. The MLflow model's signature specifies the input should be a column named `\"question\"` and a column named `\"context\"`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "batch_df = train_df[[\"question\", \"context\"]].sample(frac=0.01)\n", - "\n", - "# Divide this into files of 50 rows each\n", - "batch_size_per_predict = 50\n", - "for i in range(0, len(batch_df), batch_size_per_predict):\n", - " j = i + batch_size_per_predict\n", - " batch_df[i:j].to_csv(\n", - " os.path.join(batch_inputs_dir, str(i) + batch_input_file), quoting=csv.QUOTE_ALL\n", - " )\n", - "\n", - "# Check out the first and last file name created\n", - "input_files = os.listdir(batch_inputs_dir)\n", - "print(f\"{input_files[0]} to {str(i)}{batch_input_file}.\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4. Deploy the model to a batch endpoint\n", - "Batch endpoints are endpoints that are used to do batch inferencing on large volumes of data over a period of time. The endpoints receive pointers to data and run jobs asynchronously to process the data in parallel on compute clusters. Batch endpoints store outputs to a data store for further analysis. For more information on batch endpoints and deployments see [What are batch endpoints?](https://learn.microsoft.com/en-us/azure/machine-learning/concept-endpoints?view=azureml-api-2#what-are-batch-endpoints).\n", - "\n", - "* Create a batch endpoint.\n", - "* Create a batch deployment.\n", - "* Set the deployment as default; doing so allows invoking the endpoint without specifying the deployment's name.\n", - "\n", - "#### Create the endpoint." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n", - "timestamp = int(time.time())\n", - "endpoint_name = \"question-answering-\" + str(timestamp)\n", - "\n", - "endpoint = BatchEndpoint(\n", - " name=endpoint_name,\n", - " description=\"Batch endpoint for \"\n", - " + foundation_model.name\n", - " + \", for question-answering task\",\n", - ")\n", - "workspace_ml_client.begin_create_or_update(endpoint).result()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Create the deployment." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "deployment_name = \"demo\"\n", - "\n", - "deployment = BatchDeployment(\n", - " name=deployment_name,\n", - " endpoint_name=endpoint_name,\n", - " model=foundation_model.id,\n", - " compute=compute_name,\n", - " error_threshold=0,\n", - " instance_count=1,\n", - " logging_level=\"info\",\n", - " max_concurrency_per_instance=1,\n", - " mini_batch_size=10,\n", - " output_file_name=\"predictions.csv\",\n", - " retry_settings=BatchRetrySettings(max_retries=3, timeout=300),\n", - ")\n", - "workspace_ml_client.begin_create_or_update(deployment).result()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Set the deployment as default." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "endpoint = workspace_ml_client.batch_endpoints.get(endpoint_name)\n", - "endpoint.defaults.deployment_name = deployment_name\n", - "workspace_ml_client.begin_create_or_update(endpoint).wait()\n", - "\n", - "endpoint = workspace_ml_client.batch_endpoints.get(endpoint_name)\n", - "print(f\"The default deployment is {endpoint.defaults.deployment_name}\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 5. Run a batch inference job.\n", - "Invoke the batch endpoint with the input parameter pointing to the folder containing the batch inference input. This creates a pipeline job using the default deployment in the endpoint. Wait for the job to complete." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "input = Input(path=batch_inputs_dir, type=AssetTypes.URI_FOLDER)\n", - "\n", - "job = workspace_ml_client.batch_endpoints.invoke(\n", - " endpoint_name=endpoint.name, input=input\n", - ")\n", - "\n", - "workspace_ml_client.jobs.stream(job.name)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 6. Review inference predictions. \n", - "Download the predictions from the job output and review the predictions using a dataframe." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "scoring_job = list(workspace_ml_client.jobs.list(parent_job_name=job.name))[0]\n", - "\n", - "workspace_ml_client.jobs.download(\n", - " name=scoring_job.name, download_path=batch_dir, output_name=\"score\"\n", - ")\n", - "\n", - "predictions_file = os.path.join(batch_dir, \"named-outputs\", \"score\", \"predictions.csv\")\n", - "\n", - "# Load the batch predictions file with no headers into a dataframe and set your column names\n", - "score_df = pd.read_csv(\n", - " predictions_file,\n", - " header=None,\n", - " names=[\"row_number_per_file\", \"prediction\", \"batch_input_file_name\"],\n", - ")\n", - "score_df.head()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Record the input file name and set the original index value in the `'index'` column for each input file. Join the `train_df` with ground truth into the input dataframe." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "input_df = []\n", - "for file in input_files:\n", - " input = pd.read_csv(os.path.join(batch_inputs_dir, file), index_col=0)\n", - " input.reset_index(inplace=True)\n", - " input[\"batch_input_file_name\"] = file\n", - " input.reset_index(names=[\"row_number_per_file\"], inplace=True)\n", - " input_df.append(input)\n", - "input_df = pd.concat(input_df)\n", - "input_df.set_index(\"index\", inplace=True)\n", - "input_df = input_df.drop(columns=[\"question\", \"context\"]).join(train_df)\n", - "\n", - "input_df.head()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Join the predictions with input data to compare them to ground truth." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df = pd.merge(\n", - " input_df, score_df, how=\"inner\", on=[\"row_number_per_file\", \"batch_input_file_name\"]\n", - ")\n", - "\n", - "# Show the first few rows of the results\n", - "df.head(20)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 7. Clean up resources\n", - "Batch endpoints use compute resources only when jobs are submitted. You can keep the batch endpoint for your reference without worrying about compute bills, or choose to delete the endpoint. If you created your compute cluster to have zero minimum instances and scale down soon after being idle, you won't be charged for an unused compute." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "workspace_ml_client.batch_endpoints.begin_delete(name=endpoint_name).result()\n", - "workspace_ml_client.compute.begin_delete(name=compute_name).result()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "base", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.13" - }, - "vscode": { - "interpreter": { - "hash": "2f394aca7ca06fed1e6064aef884364492d7cdda3614a461e02e6407fc40ba69" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/sdk/python/foundation-models/system/inference/question-answering/question-answering-online-endpoint.ipynb b/sdk/python/foundation-models/system/inference/question-answering/question-answering-online-endpoint.ipynb deleted file mode 100644 index d9f3c7da7e..0000000000 --- a/sdk/python/foundation-models/system/inference/question-answering/question-answering-online-endpoint.ipynb +++ /dev/null @@ -1,302 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Question Answering Inference using Online Endpoints\n", - "\n", - "This sample shows how to deploy `question-answering` type models to an online endpoint for inference.\n", - "\n", - "### Task\n", - "`question-answering` tasks return an answer given a question. There are two common types of `question-answering` tasks:\n", - "\n", - "* Extractive: extract the answer from the given context.\n", - "* Abstractive: generate an answer from the context that correctly answers the question.\n", - " \n", - "### Model\n", - "Models that can perform the `question-answering` task are tagged with `task: question-answering`. We will use the `deepset-minilm-uncased-squad2` model in this notebook. If you opened this notebook from a specific model card, remember to replace the specific model name. If you don't find a model that suits your scenario or domain, you can discover and [import models from HuggingFace hub](../../import/import-model-from-huggingface.ipynb) and then use them for inference. \n", - "\n", - "### Inference data\n", - "We will use the [SQUAD](https://huggingface.co/datasets/squad) dataset. A copy of this dataset is available in the [squad-dataset](./squad-dataset/) folder. The [original source](https://rajpurkar.github.io/SQuAD-explorer/) of dataset describes it as follows: _\"Stanford Question Answering Dataset (SQuAD) is a reading comprehension dataset, consisting of questions posed by crowdworkers on a set of Wikipedia articles, where the answer to every question is a segment of text, or span, from the corresponding reading passage, or the question might be unanswerable.\"_\n", - "\n", - "\n", - "### Outline\n", - "* Set up pre-requisites.\n", - "* Pick a model to deploy.\n", - "* Prepare data for inference. \n", - "* Deploy the model for real time inference.\n", - "* Test the endpoint\n", - "* Clean up resources." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 1. Set up pre-requisites\n", - "* Install dependencies\n", - "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace ``, `` and `` below.\n", - "* Connect to `azureml` system registry" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azure.ai.ml import MLClient\n", - "from azure.identity import (\n", - " DefaultAzureCredential,\n", - " InteractiveBrowserCredential,\n", - " ClientSecretCredential,\n", - ")\n", - "from azure.ai.ml.entities import AmlCompute\n", - "import time\n", - "\n", - "try:\n", - " credential = DefaultAzureCredential()\n", - " credential.get_token(\"https://management.azure.com/.default\")\n", - "except Exception as ex:\n", - " credential = InteractiveBrowserCredential()\n", - "\n", - "workspace_ml_client = MLClient(\n", - " credential,\n", - " subscription_id=\"\",\n", - " resource_group_name=\"\",\n", - " workspace_name=\"\",\n", - ")\n", - "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n", - "registry_ml_client = MLClient(credential, registry_name=\"azureml-preview\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 2. Pick a model to deploy\n", - "\n", - "Browse models in the Model Catalog in the AzureML Studio, filtering by the `question-answering` task. In this example, we use the `deepset-minilm-uncased-squad2` model. If you have opened this notebook for a different model, replace the model name and version accordingly. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "model_name = \"deepset-minilm-uncased-squad2\"\n", - "model_version = \"3\"\n", - "foundation_model = registry_ml_client.models.get(model_name, model_version)\n", - "print(\n", - " \"\\n\\nUsing model name: {0}, version: {1}, id: {2} for inferencing\".format(\n", - " foundation_model.name, foundation_model.version, foundation_model.id\n", - " )\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 3. Prepare data for inference.\n", - "\n", - "A copy of the SQUAD dataset is available in the [squad-dataset](./squad-dataset/) folder. The next few cells show basic data preparation:\n", - "* Visualize some data rows\n", - "* Save few samples in the format that can be passed as input to the online-inference endpoint." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Load the ./squad-dataset/train.jsonl file into a pandas dataframe and show the first 5 rows\n", - "import pandas as pd\n", - "\n", - "pd.set_option(\n", - " \"display.max_colwidth\", 0\n", - ") # set the max column width to 0 to display the full text\n", - "train_df = pd.read_json(\"./squad-dataset/train.jsonl\", lines=True)\n", - "train_df.head()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4. Deploy the model to an online endpoint\n", - "Online endpoints give a durable REST API that can be used to integrate with applications that need to use the model." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import time, sys\n", - "from azure.ai.ml.entities import (\n", - " ManagedOnlineEndpoint,\n", - " ManagedOnlineDeployment,\n", - " OnlineRequestSettings,\n", - ")\n", - "\n", - "# Create online endpoint - endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n", - "timestamp = int(time.time())\n", - "online_endpoint_name = \"question-answering-\" + str(timestamp)\n", - "# create an online endpoint\n", - "endpoint = ManagedOnlineEndpoint(\n", - " name=online_endpoint_name,\n", - " description=\"Online endpoint for \"\n", - " + foundation_model.name\n", - " + \", for question-answering task\",\n", - " auth_mode=\"key\",\n", - ")\n", - "workspace_ml_client.begin_create_or_update(endpoint).wait()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# create a deployment\n", - "demo_deployment = ManagedOnlineDeployment(\n", - " name=\"demo\",\n", - " endpoint_name=online_endpoint_name,\n", - " model=foundation_model.id,\n", - " instance_type=\"Standard_DS2_v2\",\n", - " instance_count=1,\n", - " request_settings=OnlineRequestSettings(\n", - " request_timeout_ms=60000,\n", - " ),\n", - ")\n", - "workspace_ml_client.online_deployments.begin_create_or_update(demo_deployment).wait()\n", - "endpoint.traffic = {\"demo\": 100}\n", - "workspace_ml_client.begin_create_or_update(endpoint).result()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 5. Test the endpoint with sample data\n", - "\n", - "We will fetch some sample data from the test dataset and submit to online endpoint for inference. We will then show the display the scored labels alongside the ground truth labels" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "import os\n", - "\n", - "# read the ./squad-dataset/train.jsonl file into a pandas dataframe\n", - "df = pd.read_json(\"./squad-dataset/train.jsonl\", lines=True)\n", - "# escape single and double quotes in the text column\n", - "df[\"question\"] = df[\"question\"].str.replace(\"'\", \"\\\\'\").str.replace('\"', '\\\\\"')\n", - "df[\"context\"] = df[\"context\"].str.replace(\"'\", \"\\\\'\").str.replace('\"', '\\\\\"')\n", - "# pick 1 random row\n", - "sample_df = df.sample(1)\n", - "# create a json object with the key as \"inputs\" and value as a list of question-context pairs from columns of the sample_df dataframe\n", - "test_json = {\n", - " \"inputs\": {\n", - " \"question\": sample_df[\"question\"].to_list(),\n", - " \"context\": sample_df[\"context\"].to_list(),\n", - " }\n", - "}\n", - "# save the json object to a file named sample_score.json in the ./squad-dataset folder\n", - "with open(os.path.join(\".\", \"squad-dataset\", \"sample_score.json\"), \"w\") as f:\n", - " json.dump(test_json, f)\n", - "sample_df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# score the sample_score.json file using the online endpoint with the azureml endpoint invoke method\n", - "response = workspace_ml_client.online_endpoints.invoke(\n", - " endpoint_name=online_endpoint_name,\n", - " deployment_name=\"demo\",\n", - " request_file=\"./squad-dataset/sample_score.json\",\n", - ")\n", - "print(\"raw response: \\n\", response, \"\\n\")\n", - "# convert the json response to a pandas dataframe\n", - "response_df = pd.read_json(response, typ=\"series\")\n", - "response_df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# compare the predicted answer with the actual answer\n", - "response_df = pd.DataFrame({\"predicted_answer\": [response_df[\"answer\"]]})\n", - "response_df[\"ground_truth_answer\"] = sample_df[\"answers\"].to_list()[0][\"text\"]\n", - "response_df.head()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 6. Delete the online endpoint\n", - "Don't forget to delete the online endpoint, else you will leave the billing meter running for the compute used by the endpoint" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "workspace_ml_client.online_endpoints.begin_delete(name=online_endpoint_name).wait()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "base", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.13" - }, - "vscode": { - "interpreter": { - "hash": "2f394aca7ca06fed1e6064aef884364492d7cdda3614a461e02e6407fc40ba69" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/sdk/python/foundation-models/system/inference/question-answering/squad-dataset/download-dataset.py b/sdk/python/foundation-models/system/inference/question-answering/squad-dataset/download-dataset.py deleted file mode 100644 index e9ffb0b999..0000000000 --- a/sdk/python/foundation-models/system/inference/question-answering/squad-dataset/download-dataset.py +++ /dev/null @@ -1,28 +0,0 @@ -# import library to parse command line arguments -import argparse, os - -parser = argparse.ArgumentParser() -# add an argument to specify a dataset name to download -parser.add_argument("--dataset", type=str, default="squad", help="dataset name") -# add an argument to specify the directory to download the dataset to -parser.add_argument( - "--download_dir", - type=str, - default="data", - help="directory to download the dataset to", -) -args = parser.parse_args() - -# create the download directory if it does not exist -if not os.path.exists(args.download_dir): - os.makedirs(args.download_dir) - -# import hugging face datasets library -from datasets import load_dataset, get_dataset_split_names - -for split in get_dataset_split_names(args.dataset): - # load the split of the dataset - dataset = load_dataset(args.dataset, split=split) - # save the split of the dataset to the download directory as json lines file - dataset.to_json(os.path.join(args.download_dir, f"{split}.jsonl")) - # print dataset features diff --git a/sdk/python/foundation-models/system/inference/question-answering/squad-dataset/sample_score.json b/sdk/python/foundation-models/system/inference/question-answering/squad-dataset/sample_score.json deleted file mode 100644 index 1d18e6ca1b..0000000000 --- a/sdk/python/foundation-models/system/inference/question-answering/squad-dataset/sample_score.json +++ /dev/null @@ -1 +0,0 @@ -{"inputs": {"question": ["What high profile controversial project was Von Neumann a prinipal of?"], "context": ["He was a pioneer of the application of operator theory to quantum mechanics, in the development of functional analysis, a principal member of the Manhattan Project and the Institute for Advanced Study in Princeton (as one of the few originally appointed), and a key figure in the development of game theory and the concepts of cellular automata, the universal constructor and the digital computer. He published 150 papers in his life; 60 in pure mathematics, 20 in physics, and 60 in applied mathematics. His last work, an unfinished manuscript written while in the hospital, was later published in book form as The Computer and the Brain."]}} \ No newline at end of file diff --git a/sdk/python/foundation-models/system/inference/summarization/news-summary-dataset/download-dataset.py b/sdk/python/foundation-models/system/inference/summarization/news-summary-dataset/download-dataset.py deleted file mode 100644 index c25ad19702..0000000000 --- a/sdk/python/foundation-models/system/inference/summarization/news-summary-dataset/download-dataset.py +++ /dev/null @@ -1,38 +0,0 @@ -# import library to parse command line arguments -import argparse, os - -parser = argparse.ArgumentParser() -# add an argument to specify a dataset name to download -parser.add_argument("--dataset", type=str, default="squad", help="dataset name") -# add an argument to specify the config name of the dataset -parser.add_argument( - "--config_name", type=str, default="plain_text", help="config name of the dataset" -) -# argument to save a fraction of the dataset -parser.add_argument( - "--fraction", type=float, default=0.05, help="fraction of the dataset to save" -) -# add an argument to specify the directory to download the dataset to -parser.add_argument( - "--download_dir", - type=str, - default="data", - help="directory to download the dataset to", -) -args = parser.parse_args() - -# create the download directory if it does not exist -if not os.path.exists(args.download_dir): - os.makedirs(args.download_dir) - -# import hugging face datasets library -from datasets import load_dataset, get_dataset_split_names - -for split in get_dataset_split_names(args.dataset, config_name=args.config_name): - print(f"Loading {split} split of {args.dataset} dataset...") - # load the split of the dataset - dataset = load_dataset(args.dataset, args.config_name, split=split) - # save the split of the dataset to the download directory as json lines file - dataset.select(range(int(dataset.num_rows * args.fraction))).to_json( - os.path.join(args.download_dir, f"{split}.jsonl") - ) diff --git a/sdk/python/foundation-models/system/inference/summarization/news-summary-dataset/sample_score.json b/sdk/python/foundation-models/system/inference/summarization/news-summary-dataset/sample_score.json deleted file mode 100644 index ff3e4f7286..0000000000 --- a/sdk/python/foundation-models/system/inference/summarization/news-summary-dataset/sample_score.json +++ /dev/null @@ -1 +0,0 @@ -{"inputs": {"input_string": ["NEW YORK (CNN) -- Nearly a year after being beaten into a coma, Bryan Steinhauer said Wednesday he does not hate the Serbian basketball player witnesses said brutally assaulted him. Bryan Steinhauer, who was beaten into a coma, is making significant progress in his rehabilitation. \\\"I am not full of hate; hatred kills progress,\\\" he said. Appearing alongside his parents and doctors at New York\\'s Mount Sinai Hospital, Steinhauer struggled to speak but his message was clear. \\\"Please don\\'t feel sorry for me,\\\" he said. \\\"Tragedy leads to wisdom, and this experience has taught me so much about life.\\\" Watch Steinhauer talk about his recovery \u00bb . The 22-year-old from Brooklyn was about to graduate and had a job lined up at accounting giant KPMG when he got into an argument last May that nearly cost him his life. According to police, witnesses said Steinhauer and college basketball player Miladin Kovacevic had exchanged harsh words at an upstate New York bar near Binghamton University after Steinhauer danced with the girlfriend of one of Kovacevic\\'s friends. The witnesses said the fight went outside the bar, where several men attacked Steinhauer, with Kovacevic beating him about the head, according to police. Kovacevic is 6-foot-9 and 260 pounds while Steinhauer was 130 pounds. Kovacevic was arrested but jumped bail and fled to Serbia with the help of Serbian consular officials in New York. The case strained relations between the United States and Serbia.Hillary Clinton intervened, first as U.S. senator from New York and later as secretary of state, as did Sen. Charles Schumer, D-New York, to make sure Kovacevic was prosecuted. Serbia has no extradition treaty with the United States, but Serbian officials arrested Kovacevic last October and are working on prosecuting him with the assistance of the district attorney of Broome County, where the beating took place. In addition, the Serbian government recently paid the Steinhauer family $900,000 in recognition of the misconduct of Serbian government officials and the financial burdens placed upon the Steinhauer family as a result of the beating. Steinhauer awoke from his coma last August, three months after the beating that left him with skull fractures, a severe brain injury and no memory of the attack. He weighed less than 100 pounds, could not speak or walk, and was fed through a tube, doctors said Wednesday. \\\"He had hemorrhages and contusions affecting almost every lobe of his brain,\\\" said Dr. Brian Greenwald. Working with specialists and boosted by the support of his family, Steinhauer surprised even his doctors in his quick recovery, they said. He now has outpatient rehabilitation four times a week, goes to a gym, and receives acupuncture treatments. While he can eat on his own now and is making significant progress, Steinhauer continues to undergo intensive therapy. Steinhauer says he doesn\\'t think about Kovacevic because he\\'s not concerned about other people. \\\"I\\'ve had a second birth and raising at Mount Sinai,\\\" he said. \\\"Live long and prosper.\\\""]}} \ No newline at end of file diff --git a/sdk/python/foundation-models/system/inference/summarization/summarization-online-endpoint.ipynb b/sdk/python/foundation-models/system/inference/summarization/summarization-online-endpoint.ipynb deleted file mode 100644 index 3370e3e4a5..0000000000 --- a/sdk/python/foundation-models/system/inference/summarization/summarization-online-endpoint.ipynb +++ /dev/null @@ -1,298 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Summarization Inference using Online Endpoints\n", - "\n", - "This sample shows how to deploy `summarization` type models to an online endpoint for inference.\n", - "\n", - "### Task\n", - "`summarization` creates a shorter version of a document or an article that captures all the important information. Along with translation, it is another example of a task that can be formulated as a sequence-to-sequence task. \n", - "`summarization` can be:\n", - "\n", - "* Extractive: extract the most relevant information from a document.\n", - "* Abstractive: generate new text that captures the most relevant information.\n", - "\n", - "### Model\n", - "Models that can perform the `summarization` task are tagged with `task: summarization`. We will use the `sshleifer-distilbart-cnn-12-6` model in this notebook. If you opened this notebook from a specific model card, remember to replace the specific model name. If you don't find a model that suits your scenario or domain, you can discover and [import models from HuggingFace hub](../../import/import-model-from-huggingface.ipynb) and then use them for inference. \n", - "\n", - "### Inference data\n", - "We will use the [CNN DailyMail](https://huggingface.co/datasets/cnn_dailymail) dataset. A copy of this dataset is available in the [news-summary-dataset](./news-summary-dataset/) folder.\n", - "\n", - "### Outline\n", - "* Set up pre-requisites.\n", - "* Pick a model to deploy.\n", - "* Prepare data for inference. \n", - "* Deploy the model for real time inference.\n", - "* Test the endpoint\n", - "* Clean up resources." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 1. Set up pre-requisites\n", - "* Install dependencies\n", - "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace ``, `` and `` below.\n", - "* Connect to `azureml` system registry" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azure.ai.ml import MLClient\n", - "from azure.identity import (\n", - " DefaultAzureCredential,\n", - " InteractiveBrowserCredential,\n", - " ClientSecretCredential,\n", - ")\n", - "from azure.ai.ml.entities import AmlCompute\n", - "import time\n", - "\n", - "try:\n", - " credential = DefaultAzureCredential()\n", - " credential.get_token(\"https://management.azure.com/.default\")\n", - "except Exception as ex:\n", - " credential = InteractiveBrowserCredential()\n", - "\n", - "workspace_ml_client = MLClient(\n", - " credential,\n", - " subscription_id=\"\",\n", - " resource_group_name=\"\",\n", - " workspace_name=\"\",\n", - ")\n", - "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n", - "registry_ml_client = MLClient(credential, registry_name=\"azureml-preview\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 2. Pick a model to deploy\n", - "\n", - "Browse models in the Model Catalog in the AzureML Studio, filtering by the `summarization` task. In this example, we use the `sshleifer-distilbart-cnn-12-6` model. If you have opened this notebook for a different model, replace the model name and version accordingly. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "model_name = \"sshleifer-distilbart-cnn-12-6\"\n", - "model_version = \"3\"\n", - "foundation_model = registry_ml_client.models.get(model_name, model_version)\n", - "print(\n", - " \"\\n\\nUsing model name: {0}, version: {1}, id: {2} for inferencing\".format(\n", - " foundation_model.name, foundation_model.version, foundation_model.id\n", - " )\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 3. Prepare data for inference.\n", - "\n", - "A copy of the news summary dataset is available in the [news-summary-dataset](./news-summary-dataset/) folder. The next few cells show basic data preparation:\n", - "* Visualize some data rows\n", - "* Save few samples in the format that can be passed as input to the online-inference endpoint." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# load the ./news-summary-dataset/train.jsonl file into a pandas dataframe and show the first 5 rows\n", - "import pandas as pd\n", - "\n", - "pd.set_option(\n", - " \"display.max_colwidth\", 0\n", - ") # set the max column width to 0 to display the full text\n", - "train_df = pd.read_json(\"./news-summary-dataset/train.jsonl\", lines=True)\n", - "train_df.head(2)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4. Deploy the model to an online endpoint\n", - "Online endpoints give a durable REST API that can be used to integrate with applications that need to use the model." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import time, sys\n", - "from azure.ai.ml.entities import (\n", - " ManagedOnlineEndpoint,\n", - " ManagedOnlineDeployment,\n", - " OnlineRequestSettings,\n", - ")\n", - "\n", - "# Create online endpoint - endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n", - "timestamp = int(time.time())\n", - "online_endpoint_name = \"summarization-\" + str(timestamp)\n", - "# create an online endpoint\n", - "endpoint = ManagedOnlineEndpoint(\n", - " name=online_endpoint_name,\n", - " description=\"Online endpoint for \"\n", - " + foundation_model.name\n", - " + \", for summarization task\",\n", - " auth_mode=\"key\",\n", - ")\n", - "workspace_ml_client.begin_create_or_update(endpoint).wait()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# create a deployment\n", - "demo_deployment = ManagedOnlineDeployment(\n", - " name=\"demo\",\n", - " endpoint_name=online_endpoint_name,\n", - " model=foundation_model.id,\n", - " instance_type=\"Standard_DS3_v2\",\n", - " instance_count=1,\n", - " request_settings=OnlineRequestSettings(\n", - " request_timeout_ms=60000,\n", - " ),\n", - ")\n", - "workspace_ml_client.online_deployments.begin_create_or_update(demo_deployment).wait()\n", - "endpoint.traffic = {\"demo\": 100}\n", - "workspace_ml_client.begin_create_or_update(endpoint).result()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 5. Test the endpoint with sample data\n", - "\n", - "We will fetch some sample data from the test dataset and submit to online endpoint for inference. We will then show the display the scored labels alongside the ground truth labels" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "import os\n", - "\n", - "# read the ./news-summary-dataset/train.jsonl file into a pandas dataframe\n", - "df = pd.read_json(\"./news-summary-dataset/train.jsonl\", lines=True)\n", - "# escape single and double quotes in the masked_text column\n", - "df[\"article\"] = df[\"article\"].str.replace(\"'\", \"\\\\'\").str.replace('\"', '\\\\\"')\n", - "# pick 1 random row\n", - "sample_df = df.sample(1)\n", - "# create a json object with the key as \"inputs\" and value as a list of values from the article column of the sample_df dataframe\n", - "sample_json = {\"inputs\": sample_df[\"article\"].tolist()}\n", - "# save the json object to a file named sample_score.json in the ./news-summary-dataset folder\n", - "test_json = {\"inputs\": {\"input_string\": sample_df[\"article\"].tolist()}}\n", - "# save the json object to a file named sample_score.json in the ./news-summary-dataset folder\n", - "with open(os.path.join(\".\", \"news-summary-dataset\", \"sample_score.json\"), \"w\") as f:\n", - " json.dump(test_json, f)\n", - "sample_df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# score the sample_score.json file using the online endpoint with the azureml endpoint invoke method\n", - "response = workspace_ml_client.online_endpoints.invoke(\n", - " endpoint_name=online_endpoint_name,\n", - " deployment_name=\"demo\",\n", - " request_file=\"./news-summary-dataset/sample_score.json\",\n", - ")\n", - "print(\"raw response: \\n\", response, \"\\n\")\n", - "# convert the json response to a pandas dataframe\n", - "response_df = pd.read_json(response)\n", - "response_df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# compare the true summary with the predicted summary\n", - "response_df.rename(columns={\"summary_text\": \"predicted_summary\"}, inplace=True)\n", - "response_df[\"ground_truth_summary\"] = [sample_df[\"highlights\"].tolist()[0]]\n", - "response_df.head()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 6. Delete the online endpoint\n", - "Don't forget to delete the online endpoint, else you will leave the billing meter running for the compute used by the endpoint" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "workspace_ml_client.online_endpoints.begin_delete(name=online_endpoint_name).wait()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "base", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.13" - }, - "vscode": { - "interpreter": { - "hash": "2f394aca7ca06fed1e6064aef884364492d7cdda3614a461e02e6407fc40ba69" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/sdk/python/foundation-models/system/inference/text-classification/entailment-contradiction-batch.ipynb b/sdk/python/foundation-models/system/inference/text-classification/entailment-contradiction-batch.ipynb deleted file mode 100644 index 329689fc0b..0000000000 --- a/sdk/python/foundation-models/system/inference/text-classification/entailment-contradiction-batch.ipynb +++ /dev/null @@ -1,614 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - } - }, - "source": [ - "## Text Classification Inference using Batch Endpoints\n", - "\n", - "This sample shows how to deploy `text-classification` type models to a batch endpoint for inference.\n", - "\n", - "### Task\n", - "`text-classification` is generic task type that can be used for scenarios such as sentiment analysis, emotion detection, grammar checking, spam filtering, etc. In this example, we will test for entailment v/s contradiction, meaning given a premise sentence and a hypothesis sentence, the task is to predict whether the premise entails the hypothesis (entailment), contradicts the hypothesis (contradiction), or neither (neutral). \n", - "\n", - "### Inference data\n", - "The Multi-Genre Natural Language Inference Corpus, or MNLI is a crowd sourced collection of sentence pairs with textual entailment annotations.The [MNLI](https://huggingface.co/datasets/glue) dataset is a subset of the larger [General Language Understanding Evaluation](https://gluebenchmark.com/) dataset. A copy of this dataset is available in the [glue-mnli-dataset](./glue-mnli-dataset/) folder.\n", - "\n", - "### Model\n", - "Look for models tagged with `text-classification` in the system registry. Just looking for `text-classification` is not sufficient, you need to check if the model is specifically finetuned for entailment v/s contradiction by studying the model card and looking at the input/output samples or signatures of the model. In this notebook, we use the `microsoft-deberta-base-mnli` model.\n", - "\n", - " \n", - "### Outline\n", - "* Set up pre-requisites.\n", - "* Pick a model to deploy.\n", - "* Prepare data for inference. \n", - "* Deploy the model for batch inference.\n", - "* Run a batch inference job.\n", - "* Review inference predictions.\n", - "* Clean up resources.\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 1. Set up pre-requisites\n", - "* Install dependencies.\n", - "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace ``, `` and `` below.\n", - "* Connect to `azureml` system registry.\n", - "* Create or update compute." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Import packages used by the following code snippets\n", - "import csv\n", - "import json\n", - "import os\n", - "import time\n", - "\n", - "import pandas as pd\n", - "\n", - "from azure.ai.ml import Input, MLClient\n", - "from azure.ai.ml.constants import AssetTypes\n", - "from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential\n", - "from azure.ai.ml.entities import (\n", - " AmlCompute,\n", - " BatchDeployment,\n", - " BatchEndpoint,\n", - " BatchRetrySettings,\n", - " Model,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "subscription_id = \"\"\n", - "resource_group_name = \"\"\n", - "workspace_name = \"\"" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Connect to workspace and registry using ML clients." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "jupyter": { - "outputs_hidden": false, - "source_hidden": false - }, - "nteract": { - "transient": { - "deleting": false - } - } - }, - "outputs": [], - "source": [ - "try:\n", - " credential = DefaultAzureCredential()\n", - " credential.get_token(\"https://management.azure.com/.default\")\n", - "except Exception as ex:\n", - " credential = InteractiveBrowserCredential()\n", - "\n", - "workspace_ml_client = MLClient(\n", - " credential,\n", - " subscription_id=subscription_id,\n", - " resource_group_name=resource_group_name,\n", - " workspace_name=workspace_name,\n", - ")\n", - "# The models, fine tuning pipelines, and environments are available in the AzureML system registry, \"azureml\"\n", - "registry_ml_client = MLClient(credential, registry_name=\"azureml\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Create a compute cluster.\n", - "Use the model card from the AzureML system registry to check the minimum required inferencing SKU, referenced as `size` below. If you already have a sufficient compute cluster, you can simply define the name in `compute_name` in the following code block. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "compute_name = \"cpu-cluster\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "compute_cluster = AmlCompute(\n", - " name=compute_name,\n", - " description=\"An AML compute cluster\",\n", - " size=\"Standard_DS3_V2\",\n", - " min_instances=0,\n", - " max_instances=3,\n", - " idle_time_before_scale_down=120,\n", - ") # 120 seconds\n", - "\n", - "workspace_ml_client.begin_create_or_update(compute_cluster)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - } - }, - "source": [ - "### 2. Pick a model to deploy\n", - "\n", - "Browse models in the Model Catalog in the AzureML Studio, filtering by the `text-classification` task. In this example, we use the `microsoft-deberta-base-mnli` model. If you have opened this notebook for a different model, replace the model name and version accordingly. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "jupyter": { - "outputs_hidden": false, - "source_hidden": false - }, - "nteract": { - "transient": { - "deleting": false - } - } - }, - "outputs": [], - "source": [ - "model_name = \"microsoft-deberta-base-mnli\"\n", - "model_version = \"1\"\n", - "foundation_model = registry_ml_client.models.get(model_name, model_version)\n", - "print(\n", - " f\"Using model name: {foundation_model.name}, version: {foundation_model.version}, id: {foundation_model.id} for inferencing.\"\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 3. Prepare data for inference.\n", - "\n", - "A copy of the MNLI is available in the [ glue-mnli](./glue-mnli/) folder. The next few cells show basic data preparation:\n", - "* Visualize some data rows\n", - "* Replace numerical categories in data with the actual string labels. This mapping is available in the [./glue-mnli-dataset/label.json](./glue-mnli-dataset/label.json). This step is needed because the selected models will return labels such `CONTRADICTION`, `CONTRADICTION`, etc. when running prediction. If the labels in your ground truth data are left as `0`, `1`, `2`, etc., then they would not match with prediction labels returned by the models.\n", - "* The dataset contains `premise` and `hypothesis` as two different columns. However, the models expect a single string for prediction in the format `[CLS] [SEP] [SEP]`. Hence we merge the columns and drop the original columns.\n", - "* We want this sample to run quickly, so save a smaller dataset containing a fraction of the original.\n", - "* Since we are using a `mlflow` model, we don't need to write any inference code. However, we need the inference data to be in a shape can can be used for inference. Specifically, batch inference does not support jsonl lines files, but supports `csv` and `parquet`. We will dump a csv version from the pandas dataframe. Next, the rows of the batch inference csv file must strictly contain only the columns that will be passed to the model as input and the column header must match the model signature. In our case, the model signature which can be found in the `MLmodel` file in the model artifacts expects `input_string` as input. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Define directories and filenames as variables\n", - "dataset_dir = \"glue-mnli-dataset\"\n", - "training_datafile = \"train.jsonl\"\n", - "label_datafile = \"label.json\"\n", - "\n", - "batch_dir = \"batch\"\n", - "batch_inputs_dir = os.path.join(batch_dir, \"inputs\")\n", - "batch_input_file = \"batch_input.csv\"\n", - "os.makedirs(batch_dir, exist_ok=True)\n", - "os.makedirs(batch_inputs_dir, exist_ok=True)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In the below cell, we load the input file and look at some sample data " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Load the ./glue-mnli-dataset/train.jsonl file into a pandas dataframe and show the first 5 rows\n", - "pd.set_option(\n", - " \"display.max_colwidth\", 0\n", - ") # Set the max column width to 0 to display the full text\n", - "train_df = pd.read_json(os.path.join(\".\", dataset_dir, training_datafile), lines=True)\n", - "train_df.head()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Replace numerical labels with string labels and drop the columns not needed." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Load the id2label json element of the label.json file into pandas table with keys as 'label' column of int64 type and values as 'label_string' column as string type\n", - "with open(os.path.join(dataset_dir, label_datafile)) as f:\n", - " id2label = json.load(f)\n", - " id2label = id2label[\"id2label\"]\n", - " label_df = pd.DataFrame.from_dict(\n", - " id2label, orient=\"index\", columns=[\"label_string\"]\n", - " )\n", - " label_df[\"label\"] = label_df.index.astype(\"int64\")\n", - " label_df = label_df[[\"label\", \"label_string\"]]\n", - "\n", - "# Join the train, validation and test dataframes with the id2label dataframe to get the label_string column\n", - "train_df = train_df.merge(label_df, on=\"label\", how=\"left\")\n", - "# Concat the premise and hypothesis columns to with \"[CLS]\" in the beginning and \"[SEP]\" in the middle and end to get the text column\n", - "train_df[\"text\"] = train_df.apply(\n", - " lambda row: \"[CLS] \" + row.premise + \" [SEP] \" + row.hypothesis + \" [SEP]\", axis=1\n", - ")\n", - "# Drop the idx, premise and hypothesis columns as they are not needed\n", - "train_df.drop(columns=[\"idx\", \"premise\", \"hypothesis\", \"label\"], inplace=True)\n", - "# Rename the label_string column to ground_truth_label\n", - "train_df.rename(columns={\"label_string\": \"ground_truth_label\"}, inplace=True)\n", - "\n", - "# Save the train_df dataframe to a jsonl file in the ./glue-mnli-dataset/batch folder with the `cls_sep_` prefix\n", - "cls_sep_datafile = os.path.join(batch_dir, \"cls_sep_\" + training_datafile)\n", - "train_df.to_json(cls_sep_datafile, orient=\"records\", lines=True)\n", - "train_df.head()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Save a fraction of the input data to files of smaller batches for testing. The MLflow model's signature specifies the input should be a column named `\"input_string\"`, so rename the transformed `\"text\"` column. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "batch_df = train_df[[\"text\"]].rename(columns={\"text\": \"input_string\"}).sample(frac=0.05)\n", - "\n", - "# Divide this into files of 100 rows each\n", - "batch_size_per_predict = 100\n", - "for i in range(0, len(batch_df), batch_size_per_predict):\n", - " j = i + batch_size_per_predict\n", - " batch_df[i:j].to_csv(\n", - " os.path.join(batch_inputs_dir, str(i) + batch_input_file), quoting=csv.QUOTE_ALL\n", - " )\n", - "\n", - "# Check out the first and last file name created\n", - "input_files = os.listdir(batch_inputs_dir)\n", - "print(f\"{input_files[0]} to {str(i)}{batch_input_file}.\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4. Deploy the model to a batch endpoint\n", - "Batch endpoints are endpoints that are used to do batch inferencing on large volumes of data over a period of time. The endpoints receive pointers to data and run jobs asynchronously to process the data in parallel on compute clusters. Batch endpoints store outputs to a data store for further analysis. For more information on batch endpoints and deployments see [What are batch endpoints?](https://learn.microsoft.com/en-us/azure/machine-learning/concept-endpoints?view=azureml-api-2#what-are-batch-endpoints).\n", - "\n", - "* Create a batch endpoint.\n", - "* Create a batch deployment.\n", - "* Set the deployment as default; doing so allows invoking the endpoint without specifying the deployment's name.\n", - "\n", - "#### Create the endpoint." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n", - "timestamp = int(time.time())\n", - "endpoint_name = \"text-classification-\" + str(timestamp)\n", - "\n", - "endpoint = BatchEndpoint(\n", - " name=endpoint_name,\n", - " description=\"Batch endpoint for \"\n", - " + foundation_model.name\n", - " + \", for text-classification task\",\n", - ")\n", - "workspace_ml_client.begin_create_or_update(endpoint).result()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Create the deployment." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "jupyter": { - "outputs_hidden": false, - "source_hidden": false - }, - "nteract": { - "transient": { - "deleting": false - } - } - }, - "outputs": [], - "source": [ - "deployment_name = \"demo\"\n", - "\n", - "deployment = BatchDeployment(\n", - " name=deployment_name,\n", - " endpoint_name=endpoint_name,\n", - " model=foundation_model.id,\n", - " compute=compute_name,\n", - " error_threshold=0,\n", - " instance_count=1,\n", - " logging_level=\"info\",\n", - " max_concurrency_per_instance=1,\n", - " mini_batch_size=10,\n", - " output_file_name=\"predictions.csv\",\n", - " retry_settings=BatchRetrySettings(max_retries=3, timeout=300),\n", - ")\n", - "workspace_ml_client.begin_create_or_update(deployment).result()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Set the deployment as default." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "jupyter": { - "outputs_hidden": false, - "source_hidden": false - }, - "nteract": { - "transient": { - "deleting": false - } - } - }, - "outputs": [], - "source": [ - "endpoint = workspace_ml_client.batch_endpoints.get(endpoint_name)\n", - "endpoint.defaults.deployment_name = deployment_name\n", - "workspace_ml_client.begin_create_or_update(endpoint).wait()\n", - "\n", - "endpoint = workspace_ml_client.batch_endpoints.get(endpoint_name)\n", - "print(f\"The default deployment is {endpoint.defaults.deployment_name}\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 5. Run a batch inference job.\n", - "Invoke the batch endpoint with the input parameter pointing to the folder containing the batch inference input. This creates a pipeline job using the default deployment in the endpoint. Wait for the job to complete." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "jupyter": { - "outputs_hidden": false, - "source_hidden": false - }, - "nteract": { - "transient": { - "deleting": false - } - } - }, - "outputs": [], - "source": [ - "input = Input(path=batch_inputs_dir, type=AssetTypes.URI_FOLDER)\n", - "\n", - "job = workspace_ml_client.batch_endpoints.invoke(\n", - " endpoint_name=endpoint.name, input=input\n", - ")\n", - "\n", - "workspace_ml_client.jobs.stream(job.name)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 6. Review inference predictions. \n", - "Download the predictions from the job output and review the predictions using a dataframe." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "jupyter": { - "outputs_hidden": false, - "source_hidden": false - }, - "nteract": { - "transient": { - "deleting": false - } - } - }, - "outputs": [], - "source": [ - "scoring_job = list(workspace_ml_client.jobs.list(parent_job_name=job.name))[0]\n", - "\n", - "workspace_ml_client.jobs.download(\n", - " name=scoring_job.name, download_path=batch_dir, output_name=\"score\"\n", - ")\n", - "\n", - "predictions_file = os.path.join(batch_dir, \"named-outputs\", \"score\", \"predictions.csv\")\n", - "\n", - "# Load the batch predictions file with no headers into a dataframe and set your column names\n", - "score_df = pd.read_csv(\n", - " predictions_file,\n", - " header=None,\n", - " names=[\"row_number_per_file\", \"prediction\", \"batch_input_file_name\"],\n", - ")\n", - "score_df.head()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Record the input file name and set the original index value in the `'index'` column for each input file. Join the `train_df` with ground truth into the input dataframe." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "input_df = []\n", - "for file in input_files:\n", - " input = pd.read_csv(os.path.join(batch_inputs_dir, file), index_col=0)\n", - " input.reset_index(inplace=True)\n", - " input[\"batch_input_file_name\"] = file\n", - " input.reset_index(names=[\"row_number_per_file\"], inplace=True)\n", - " input_df.append(input)\n", - "input_df = pd.concat(input_df)\n", - "input_df.set_index(\"index\", inplace=True)\n", - "input_df = input_df.join(train_df).drop(columns=[\"input_string\"])\n", - "\n", - "input_df.head()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Join the predictions with input data to compare them to ground truth." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df = pd.merge(\n", - " input_df, score_df, how=\"inner\", on=[\"row_number_per_file\", \"batch_input_file_name\"]\n", - ")\n", - "\n", - "# Show the first few rows of the results\n", - "df.head(20)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 7. Clean up resources\n", - "Batch endpoints use compute resources only when jobs are submitted. You can keep the batch endpoint for your reference without worrying about compute bills, or choose to delete the endpoint. If you created your compute cluster to have zero minimum instances and scale down soon after being idle, you won't be charged for an unused compute." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "workspace_ml_client.batch_endpoints.begin_delete(name=endpoint_name).result()\n", - "workspace_ml_client.compute.begin_delete(name=compute_name).result()" - ] - } - ], - "metadata": { - "kernel_info": { - "name": "amlv2" - }, - "kernelspec": { - "display_name": "hf", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.13" - }, - "nteract": { - "version": "nteract-front-end@1.0.0" - }, - "vscode": { - "interpreter": { - "hash": "429d412e307b288f3a8cba821a3ba110e77b02cf5672d0d0b14db25cc0bc89f4" - } - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/sdk/python/foundation-models/system/inference/text-classification/entailment-contradiction-online.ipynb b/sdk/python/foundation-models/system/inference/text-classification/entailment-contradiction-online.ipynb deleted file mode 100644 index ca258f8a8d..0000000000 --- a/sdk/python/foundation-models/system/inference/text-classification/entailment-contradiction-online.ipynb +++ /dev/null @@ -1,328 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Text Classification Inference using Online Endpoints\n", - "\n", - "This sample shows how to deploy `text-classification` type models to an online endpoint for inference.\n", - "\n", - "### Task\n", - "`text-classification` is generic task type that can be used for scenarios such as sentiment analysis, emotion detection, grammar checking, spam filtering, etc. In this example, we will test for entailment v/s contradiction, meaning given a premise sentence and a hypothesis sentence, the task is to predict whether the premise entails the hypothesis (entailment), contradicts the hypothesis (contradiction), or neither (neutral). \n", - "\n", - "### Inference data\n", - "The Multi-Genre Natural Language Inference Corpus, or MNLI is a crowd sourced collection of sentence pairs with textual entailment annotations.The [MNLI](https://huggingface.co/datasets/glue) dataset is a subset of the larger [General Language Understanding Evaluation](https://gluebenchmark.com/) dataset. A copy of this dataset is available in the [glue-mnli](./glue-mnli/) folder.\n", - "\n", - "### Model\n", - "Look for models tagged with `text-classification` in the system registry. Just looking for `text-classification` is not sufficient, you need to check if the model is specifically finetuned for entailment v/s contradiction by studying the model card and looking at the input/output samples or signatures of the model. In this notebook, we use the `microsoft-deberta-base-mnli` model.\n", - "\n", - " \n", - "\n", - "### Outline\n", - "* Set up pre-requisites.\n", - "* Pick a model to deploy.\n", - "* Prepare data for inference. \n", - "* Deploy the model for real time inference.\n", - "* Test the endpoint\n", - "* Clean up resources." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 1. Set up pre-requisites\n", - "* Install dependencies\n", - "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace ``, `` and `` below.\n", - "* Connect to `azureml` system registry" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azure.ai.ml import MLClient\n", - "from azure.identity import (\n", - " DefaultAzureCredential,\n", - " InteractiveBrowserCredential,\n", - " ClientSecretCredential,\n", - ")\n", - "from azure.ai.ml.entities import AmlCompute\n", - "import time\n", - "\n", - "try:\n", - " credential = DefaultAzureCredential()\n", - " credential.get_token(\"https://management.azure.com/.default\")\n", - "except Exception as ex:\n", - " credential = InteractiveBrowserCredential()\n", - "\n", - "workspace_ml_client = MLClient(\n", - " credential,\n", - " subscription_id=\"\",\n", - " resource_group_name=\"\",\n", - " workspace_name=\"\",\n", - ")\n", - "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n", - "registry_ml_client = MLClient(credential, registry_name=\"azureml-preview\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 2. Pick a model to deploy\n", - "\n", - "Browse models in the Model Catalog in the AzureML Studio, filtering by the `fill-mask` task. In this example, we use the `bert-base-uncased` model. If you have opened this notebook for a different model, replace the model name and version accordingly. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "model_name = \"microsoft-deberta-base-mnli\"\n", - "model_version = \"2\"\n", - "foundation_model = registry_ml_client.models.get(model_name, model_version)\n", - "print(\n", - " \"\\n\\nUsing model name: {0}, version: {1}, id: {2} for fine tuning\".format(\n", - " foundation_model.name, foundation_model.version, foundation_model.id\n", - " )\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 3. Prepare data for inference.\n", - "\n", - "A copy of the MNLI is available in the [ glue-mnli-dataset](./glue-mnli-dataset/) folder. The next few cells show basic data preparation:\n", - "* Visualize some data rows\n", - "* Replace numerical categories in data with the actual string labels. This mapping is available in the [./glue-mnli-dataset/label.json](./glue-mnli-dataset/label.json). This step is needed because the selected models will return labels such `CONTRADICTION`, `CONTRADICTION`, etc. when running prediction. If the labels in your ground truth data are left as `0`, `1`, `2`, etc., then they would not match with prediction labels returned by the models.\n", - "* The dataset contains `premise` and `hypothesis` as two different columns. However, the models expect a single string for prediction in the format `[CLS] [SEP] [SEP]`. Hence we merge the columns and drop the original columns.\n", - "* We want this sample to run quickly, so save smaller dataset containing 10% of the original. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "dataset_dir = \"./glue-mnli-dataset\"\n", - "data_file = \"train.jsonl\"\n", - "\n", - "# load the train.jsonl file into a pandas dataframe and show the first 5 rows\n", - "import pandas as pd\n", - "\n", - "pd.set_option(\n", - " \"display.max_colwidth\", 0\n", - ") # set the max column width to 0 to display the full text\n", - "df = pd.read_json(os.path.join(dataset_dir, data_file), lines=True)\n", - "df.head()\n", - "\n", - "# load the id2label json element of the label.json file into pandas table with keys as 'label' column of int64 type and values as 'label_string' column as string type\n", - "import json\n", - "\n", - "label_file = \"label.json\"\n", - "with open(os.path.join(dataset_dir, label_file)) as f:\n", - " id2label = json.load(f)\n", - " id2label = id2label[\"id2label\"]\n", - " label_df = pd.DataFrame.from_dict(\n", - " id2label, orient=\"index\", columns=[\"label_string\"]\n", - " )\n", - " label_df[\"label\"] = label_df.index.astype(\"int64\")\n", - " label_df = label_df[[\"label\", \"label_string\"]]\n", - "\n", - "# join the train, validation and test dataframes with the id2label dataframe to get the label_string column\n", - "df = df.merge(label_df, on=\"label\", how=\"left\")\n", - "# concat the premise and hypothesis columns to with \"[CLS]\" in the beginning and \"[SEP]\" in the middle and end to get the text column\n", - "df[\"text\"] = \"[CLS] \" + df[\"premise\"] + \" [SEP] \" + df[\"hypothesis\"] + \" [SEP]\"\n", - "# drop the idx, premise and hypothesis columns as they are not needed\n", - "df = df.drop(columns=[\"idx\", \"premise\", \"hypothesis\", \"label\"])\n", - "# rename the label_string column to ground_truth_label\n", - "df = df.rename(columns={\"label_string\": \"ground_truth_label\"})\n", - "\n", - "# save 10% of the rows from the train, validation and test dataframes into files with small_ prefix in the ./dataset_dir folder\n", - "small_data_file = \"small_train.jsonl\"\n", - "df.sample(frac=0.1).to_json(\n", - " os.path.join(dataset_dir, small_data_file), orient=\"records\", lines=True\n", - ")\n", - "\n", - "df.head()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4. Deploy the model to an online endpoint\n", - "Online endpoints give a durable REST API that can be used to integrate with applications that need to use the model." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import time, sys\n", - "from azure.ai.ml.entities import ManagedOnlineEndpoint, ManagedOnlineDeployment\n", - "\n", - "# Create online endpoint - endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n", - "timestamp = int(time.time())\n", - "online_endpoint_name = \"entail-contra-\" + str(timestamp)\n", - "# create an online endpoint\n", - "endpoint = ManagedOnlineEndpoint(\n", - " name=online_endpoint_name,\n", - " description=\"Online endpoint for \"\n", - " + foundation_model.name\n", - " + \", to detect entailment v/s contradiction\",\n", - " auth_mode=\"key\",\n", - ")\n", - "workspace_ml_client.begin_create_or_update(endpoint).wait()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# create a deployment\n", - "demo_deployment = ManagedOnlineDeployment(\n", - " name=\"demo\",\n", - " endpoint_name=online_endpoint_name,\n", - " model=foundation_model.id,\n", - " instance_type=\"Standard_DS2_v2\",\n", - " instance_count=1,\n", - ")\n", - "workspace_ml_client.online_deployments.begin_create_or_update(demo_deployment).wait()\n", - "endpoint.traffic = {\"demo\": 100}\n", - "workspace_ml_client.begin_create_or_update(endpoint).result()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 5. Test the endpoint with sample data\n", - "\n", - "We will fetch some sample data from the test dataset and submit to online endpoint for inference. We will then show the display the scored labels alongside the ground truth labels" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "\n", - "data_file_small = \"small_train.jsonl\"\n", - "score_file = \"sample_score.json\"\n", - "# read the data file into a pandas dataframe\n", - "df = pd.read_json(os.path.join(dataset_dir, data_file_small), lines=True)\n", - "# escape single and double quotes in the masked_text column\n", - "# pick 5 random rows\n", - "sample_df = df.sample(5)\n", - "# reset the index of sample_df\n", - "sample_df = sample_df.reset_index(drop=True)\n", - "\n", - "# save the json object to a file named sample_score.json in the\n", - "test_json = {\"inputs\": {\"input_string\": sample_df[\"text\"].tolist()}}\n", - "# save the json object to a file named sample_score.json in the ./glue-mnli-dataset folder\n", - "with open(os.path.join(\".\", dataset_dir, score_file), \"w\") as f:\n", - " json.dump(test_json, f)\n", - "sample_df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# score the sample_score.json file using the online endpoint with the azureml endpoint invoke method\n", - "response = workspace_ml_client.online_endpoints.invoke(\n", - " endpoint_name=online_endpoint_name,\n", - " deployment_name=\"demo\",\n", - " request_file=os.path.join(\".\", dataset_dir, score_file),\n", - ")\n", - "print(\"raw response: \\n\", response, \"\\n\")\n", - "# convert the json response to a pandas dataframe\n", - "response_df = pd.read_json(response)\n", - "# rename label column to predicted_label\n", - "response_df = response_df.rename(columns={\"label\": \"predicted_label\"})\n", - "response_df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# merge the sample_df and response_df dataframes\n", - "merged_df = sample_df.merge(response_df, left_index=True, right_index=True)\n", - "merged_df.head()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 6. Delete the online endpoint\n", - "Don't forget to delete the online endpoint, else you will leave the billing meter running for the compute used by the endpoint" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "workspace_ml_client.online_endpoints.begin_delete(name=online_endpoint_name).wait()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "base", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.13" - }, - "vscode": { - "interpreter": { - "hash": "2f394aca7ca06fed1e6064aef884364492d7cdda3614a461e02e6407fc40ba69" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/sdk/python/foundation-models/system/inference/text-classification/glue-mnli-dataset/download-dataset.py b/sdk/python/foundation-models/system/inference/text-classification/glue-mnli-dataset/download-dataset.py deleted file mode 100644 index b6794c4b4f..0000000000 --- a/sdk/python/foundation-models/system/inference/text-classification/glue-mnli-dataset/download-dataset.py +++ /dev/null @@ -1,43 +0,0 @@ -# import library to parse command line arguments -import argparse, os - -parser = argparse.ArgumentParser() -# add an argument to specify a dataset name to download -parser.add_argument("--dataset", type=str, default="glue", help="dataset name") -# add an argument to specify the config name of the dataset -parser.add_argument( - "--config_name", type=str, default="mnli", help="config name of the dataset" -) -# argument to save a fraction of the dataset -parser.add_argument( - "--fraction", type=float, default=0.1, help="fraction of the dataset to save" -) -# add an argument to specify the directory to download the dataset to -parser.add_argument( - "--download_dir", - type=str, - default="./", - help="directory to download the dataset to", -) -# add an argument to specify the split of the dataset to download -parser.add_argument( - "--split", type=str, default="train", help="split of the dataset to download" -) -args = parser.parse_args() - -# create the download directory if it does not exist -if not os.path.exists(args.download_dir): - os.makedirs(args.download_dir) - -# import hugging face datasets library -from datasets import load_dataset, get_dataset_split_names - -for split in get_dataset_split_names(args.dataset, config_name=args.config_name): - if split == args.split: - print(f"Loading {split} split of {args.dataset} dataset...") - # load the split of the dataset - dataset = load_dataset(args.dataset, args.config_name, split=split) - # save the split of the dataset to the download directory as json lines file - dataset.select(range(int(dataset.num_rows * args.fraction))).to_json( - os.path.join(args.download_dir, f"{split}.jsonl") - ) diff --git a/sdk/python/foundation-models/system/inference/text-classification/glue-mnli-dataset/label.json b/sdk/python/foundation-models/system/inference/text-classification/glue-mnli-dataset/label.json deleted file mode 100644 index b836faff17..0000000000 --- a/sdk/python/foundation-models/system/inference/text-classification/glue-mnli-dataset/label.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "id2label": { - "0": "ENTAILMENT", - "1": "NEUTRAL", - "2": "CONTRADICTION" - }, - "label2id": { - "ENTAILMENT": 0, - "CONTRADICTION": 2, - "NEUTRAL": 1 - } -} \ No newline at end of file diff --git a/sdk/python/foundation-models/system/inference/text-classification/glue-mnli-dataset/sample_score.json b/sdk/python/foundation-models/system/inference/text-classification/glue-mnli-dataset/sample_score.json deleted file mode 100644 index e121a4c5ab..0000000000 --- a/sdk/python/foundation-models/system/inference/text-classification/glue-mnli-dataset/sample_score.json +++ /dev/null @@ -1 +0,0 @@ -{"inputs": {"input_string": ["[CLS] These two separate increases in the fees lawyers pay is a step closer to achieving those two goals, McMorrow said in written statement. [SEP] Two separate fees that lawyers pay are decreasing. [SEP]", "[CLS] In this particular case, let's just say when the original advice was given the wheel was spinning, but the hamster had gone. [SEP] There has been advice given. [SEP]", "[CLS] The gardens are popular with children who enjoy watching the marionette shows, riding donkeys, and sailing boats on the circular ponds. [SEP] Children really like the marionette shows in the gardens. [SEP]", "[CLS] this is the kid who who really you you know barely made it through high school [SEP] He passed high school with straight A's. [SEP]", "[CLS] good don't ever drink Scotch it's terrible i quit drinking Scotch when i found out about that but anyway but uh as far as as far as you know Central and South America we our policy pretty much uh it depends on who we're what government we're buying down there at the particular time [SEP] I no longer drink Scotch. [SEP]"]}} \ No newline at end of file diff --git a/sdk/python/foundation-models/system/inference/text-generation/book-corpus-dataset/download-dataset.py b/sdk/python/foundation-models/system/inference/text-generation/book-corpus-dataset/download-dataset.py deleted file mode 100644 index dc41e786f3..0000000000 --- a/sdk/python/foundation-models/system/inference/text-generation/book-corpus-dataset/download-dataset.py +++ /dev/null @@ -1,34 +0,0 @@ -# import library to parse command line arguments -import argparse, os - -parser = argparse.ArgumentParser() -# add an argument to specify a dataset name to download -parser.add_argument("--dataset", type=str, default="bookcorpus", help="dataset name") -# add an argument to specify the config name of the dataset -parser.add_argument( - "--fraction", type=float, default=0.001, help="fraction of the dataset to save" -) -# add an argument to specify the directory to download the dataset to -parser.add_argument( - "--download_dir", - type=str, - default="./", - help="directory to download the dataset to", -) -args = parser.parse_args() - -# create the download directory if it does not exist -if not os.path.exists(args.download_dir): - os.makedirs(args.download_dir) - -# import hugging face datasets library -from datasets import load_dataset, get_dataset_split_names - -for split in get_dataset_split_names(args.dataset): - print(f"Loading {split} split of {args.dataset} dataset...") - # load the split of the dataset - dataset = load_dataset(args.dataset, split=split) - # save the split of the dataset to the download directory as json lines file - dataset.select(range(int(dataset.num_rows * args.fraction))).to_json( - os.path.join(args.download_dir, f"{split}.jsonl") - ) diff --git a/sdk/python/foundation-models/system/inference/text-generation/book-corpus-dataset/sample_score.json b/sdk/python/foundation-models/system/inference/text-generation/book-corpus-dataset/sample_score.json deleted file mode 100644 index f80ed6b523..0000000000 --- a/sdk/python/foundation-models/system/inference/text-generation/book-corpus-dataset/sample_score.json +++ /dev/null @@ -1 +0,0 @@ -{"inputs": {"input_string": ["rocco noticed the almost defeated look on her lovely face and did n\\'t like it ."]}} \ No newline at end of file diff --git a/sdk/python/foundation-models/system/inference/text-generation/text-generation-batch-endpoint.ipynb b/sdk/python/foundation-models/system/inference/text-generation/text-generation-batch-endpoint.ipynb deleted file mode 100644 index c4a01ae97c..0000000000 --- a/sdk/python/foundation-models/system/inference/text-generation/text-generation-batch-endpoint.ipynb +++ /dev/null @@ -1,482 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Text Generation Inference using Batch Endpoints\n", - "\n", - "This sample shows how to deploy `text-generation` type models to a batch endpoint for inference.\n", - "\n", - "### Task\n", - "`text-generation` is the task of producing new text. These models can, for example, fill in incomplete text or paraphrase. Some common applications of text generation are code generation and story generation.\n", - "\n", - "### Model\n", - "Models that can perform the `text-generation` task are tagged with `task: text-generation`. We will use the `gpt2` model in this notebook. If you opened this notebook from a specific model card, remember to replace the specific model name. If you don't find a model that suits your scenario or domain, you can discover and [import models from HuggingFace hub](../../import/import-model-from-huggingface.ipynb) and then use them for inference. \n", - "\n", - "### Inference data\n", - "We will use the [book corpus](https://huggingface.co/datasets/bookcorpus) dataset. A copy of this dataset is available in the [book-corpus-dataset](./book-corpus-dataset/) folder.\n", - "\n", - "### Outline\n", - "* Set up pre-requisites.\n", - "* Pick a model to deploy.\n", - "* Prepare data for inference. \n", - "* Deploy the model for batch inference.\n", - "* Run a batch inference job.\n", - "* Review inference predictions.\n", - "* Clean up resources." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 1. Set up pre-requisites\n", - "* Install dependencies.\n", - "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace ``, `` and `` below.\n", - "* Connect to `azureml` system registry.\n", - "* Create or update compute." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Import packages used by the following code snippets\n", - "import csv\n", - "import os\n", - "import time\n", - "\n", - "import pandas as pd\n", - "\n", - "from azure.ai.ml import Input, MLClient\n", - "from azure.ai.ml.constants import AssetTypes\n", - "from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential\n", - "from azure.ai.ml.entities import (\n", - " AmlCompute,\n", - " BatchDeployment,\n", - " BatchEndpoint,\n", - " BatchRetrySettings,\n", - " Model,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "subscription_id = \"\"\n", - "resource_group_name = \"\"\n", - "workspace_name = \"\"" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Connect to workspace and registry using ML clients." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "try:\n", - " credential = DefaultAzureCredential()\n", - " credential.get_token(\"https://management.azure.com/.default\")\n", - "except Exception as ex:\n", - " credential = InteractiveBrowserCredential()\n", - "\n", - "workspace_ml_client = MLClient(\n", - " credential,\n", - " subscription_id=subscription_id,\n", - " resource_group_name=resource_group_name,\n", - " workspace_name=workspace_name,\n", - ")\n", - "# The models, fine tuning pipelines, and environments are available in the AzureML system registry, \"azureml\"\n", - "registry_ml_client = MLClient(credential, registry_name=\"azureml\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Create a compute cluster.\n", - "Use the model card from the AzureML system registry to check the minimum required inferencing SKU, referenced as `size` below. If you already have a sufficient compute cluster, you can simply define the name in `compute_name` in the following code block." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "compute_name = \"cpu-cluster\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "compute_cluster = AmlCompute(\n", - " name=compute_name,\n", - " description=\"An AML compute cluster\",\n", - " size=\"Standard_DS3_V2\",\n", - " min_instances=0,\n", - " max_instances=3,\n", - " idle_time_before_scale_down=120,\n", - ") # 120 seconds\n", - "\n", - "workspace_ml_client.begin_create_or_update(compute_cluster)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 2. Pick a model to deploy\n", - "\n", - "Browse models in the Model Catalog in the AzureML Studio, filtering by the `text-generation` task. In this example, we use the `gpt2` model. If you have opened this notebook for a different model, replace the model name and version accordingly. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "model_name = \"gpt2\"\n", - "model_version = \"1\"\n", - "foundation_model = registry_ml_client.models.get(model_name, model_version)\n", - "print(\n", - " f\"Using model name: {foundation_model.name}, version: {foundation_model.version}, id: {foundation_model.id} for inferencing.\"\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 3. Prepare data for inference.\n", - "\n", - "A copy of the book corpus dataset is available in the [book-corpus-dataset](./book-corpus-dataset/) folder. The next few cells show basic data preparation:\n", - "* Visualize some data rows\n", - "* We want this sample to run quickly, so save a smaller dataset containing a fraction of the original." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Define directories and filenames as variables\n", - "dataset_dir = \"book-corpus-dataset\"\n", - "training_datafile = \"train.jsonl\"\n", - "\n", - "batch_dir = \"batch\"\n", - "batch_inputs_dir = os.path.join(batch_dir, \"inputs\")\n", - "batch_input_file = \"batch_input.csv\"\n", - "os.makedirs(batch_dir, exist_ok=True)\n", - "os.makedirs(batch_inputs_dir, exist_ok=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Load the ./book-corpus-dataset/train.jsonl file into a pandas dataframe and show the first 5 rows\n", - "pd.set_option(\n", - " \"display.max_colwidth\", 0\n", - ") # Set the max column width to 0 to display the full text\n", - "train_df = pd.read_json(os.path.join(\".\", dataset_dir, training_datafile), lines=True)\n", - "train_df.head()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Save a fraction of the input data to files of smaller batches for testing. The MLflow model's signature specifies the input should be a column named `\"input_string\"`, so rename the `\"text\"` column. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "batch_df = (\n", - " train_df[[\"text\"]].rename(columns={\"text\": \"input_string\"}).sample(frac=0.001)\n", - ")\n", - "\n", - "# Divide this into files of 25 rows each\n", - "batch_size_per_predict = 25\n", - "for i in range(0, len(batch_df), batch_size_per_predict):\n", - " j = i + batch_size_per_predict\n", - " batch_df[i:j].to_csv(\n", - " os.path.join(batch_inputs_dir, str(i) + batch_input_file), quoting=csv.QUOTE_ALL\n", - " )\n", - "\n", - "# Check out the first and last file name created\n", - "input_files = os.listdir(batch_inputs_dir)\n", - "print(f\"{input_files[0]} to {str(i)}{batch_input_file}.\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4. Deploy the model to a batch endpoint\n", - "Batch endpoints are endpoints that are used to do batch inferencing on large volumes of data over a period of time. The endpoints receive pointers to data and run jobs asynchronously to process the data in parallel on compute clusters. Batch endpoints store outputs to a data store for further analysis. For more information on batch endpoints and deployments see [What are batch endpoints?](https://learn.microsoft.com/en-us/azure/machine-learning/concept-endpoints?view=azureml-api-2#what-are-batch-endpoints).\n", - "\n", - "* Create a batch endpoint.\n", - "* Create a batch deployment.\n", - "* Set the deployment as default; doing so allows invoking the endpoint without specifying the deployment's name.\n", - "\n", - "#### Create the endpoint." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n", - "timestamp = int(time.time())\n", - "endpoint_name = \"text-generation-\" + str(timestamp)\n", - "\n", - "endpoint = BatchEndpoint(\n", - " name=endpoint_name,\n", - " description=\"Batch endpoint for \"\n", - " + foundation_model.name\n", - " + \", for text-generation task\",\n", - ")\n", - "workspace_ml_client.begin_create_or_update(endpoint).result()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Create the deployment." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "deployment_name = \"demo\"\n", - "\n", - "deployment = BatchDeployment(\n", - " name=deployment_name,\n", - " endpoint_name=endpoint_name,\n", - " model=foundation_model.id,\n", - " compute=compute_name,\n", - " error_threshold=0,\n", - " instance_count=1,\n", - " logging_level=\"info\",\n", - " max_concurrency_per_instance=1,\n", - " mini_batch_size=10,\n", - " output_file_name=\"predictions.csv\",\n", - " retry_settings=BatchRetrySettings(max_retries=3, timeout=300),\n", - ")\n", - "workspace_ml_client.begin_create_or_update(deployment).result()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Set the deployment as default." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "endpoint = workspace_ml_client.batch_endpoints.get(endpoint_name)\n", - "endpoint.defaults.deployment_name = deployment_name\n", - "workspace_ml_client.begin_create_or_update(endpoint).wait()\n", - "\n", - "endpoint = workspace_ml_client.batch_endpoints.get(endpoint_name)\n", - "print(f\"The default deployment is {endpoint.defaults.deployment_name}\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 5. Run a batch inference job.\n", - "Invoke the batch endpoint with the input parameter pointing to the folder containing the batch inference input. This creates a pipeline job using the default deployment in the endpoint. Wait for the job to complete." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "input = Input(path=batch_inputs_dir, type=AssetTypes.URI_FOLDER)\n", - "\n", - "job = workspace_ml_client.batch_endpoints.invoke(\n", - " endpoint_name=endpoint.name, input=input\n", - ")\n", - "\n", - "workspace_ml_client.jobs.stream(job.name)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 6. Review inference predictions." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "scoring_job = list(workspace_ml_client.jobs.list(parent_job_name=job.name))[0]\n", - "\n", - "workspace_ml_client.jobs.download(\n", - " name=scoring_job.name, download_path=batch_dir, output_name=\"score\"\n", - ")\n", - "\n", - "predictions_file = os.path.join(batch_dir, \"named-outputs\", \"score\", \"predictions.csv\")\n", - "\n", - "# Load the batch predictions file with no headers into a dataframe and set your column names\n", - "score_df = pd.read_csv(\n", - " predictions_file,\n", - " header=None,\n", - " names=[\"row_number_per_file\", \"prediction\", \"batch_input_file_name\"],\n", - ")\n", - "score_df.head()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Record the input file name and set the original index value in the `'index'` column for each input file. Join the `train_df` with ground truth into the input dataframe." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "input_df = []\n", - "for file in input_files:\n", - " input = pd.read_csv(os.path.join(batch_inputs_dir, file), index_col=0)\n", - " input.reset_index(inplace=True)\n", - " input[\"batch_input_file_name\"] = file\n", - " input.reset_index(names=[\"row_number_per_file\"], inplace=True)\n", - " input_df.append(input)\n", - "input_df = pd.concat(input_df)\n", - "input_df.set_index(\"index\", inplace=True)\n", - "input_df = input_df.join(train_df).drop(columns=[\"input_string\"])\n", - "\n", - "input_df.head()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Join the predictions with input data to compare them to ground truth." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df = pd.merge(\n", - " input_df, score_df, how=\"inner\", on=[\"row_number_per_file\", \"batch_input_file_name\"]\n", - ")\n", - "\n", - "# Show the first few rows of the results\n", - "df.head(20)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 7. Clean up resources\n", - "Batch endpoints use compute resources only when jobs are submitted. You can keep the batch endpoint for your reference without worrying about compute bills, or choose to delete the endpoint. If you created your compute cluster to have zero minimum instances and scale down soon after being idle, you won't be charged for an unused compute." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "workspace_ml_client.batch_endpoints.begin_delete(name=endpoint_name).result()\n", - "workspace_ml_client.compute.begin_delete(name=compute_name).result()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "base", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.13" - }, - "vscode": { - "interpreter": { - "hash": "2f394aca7ca06fed1e6064aef884364492d7cdda3614a461e02e6407fc40ba69" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/sdk/python/foundation-models/system/inference/text-generation/text-generation-online-endpoint.ipynb b/sdk/python/foundation-models/system/inference/text-generation/text-generation-online-endpoint.ipynb deleted file mode 100644 index fa46f0afa3..0000000000 --- a/sdk/python/foundation-models/system/inference/text-generation/text-generation-online-endpoint.ipynb +++ /dev/null @@ -1,282 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Text Generation Inference using Online Endpoints\n", - "\n", - "This sample shows how to deploy `text-generation` type models to an online endpoint for inference.\n", - "\n", - "### Task\n", - "`text-generation` is the task of producing new text. These models can, for example, fill in incomplete text or paraphrase. Some common applications of text generation are code generation and story generation.\n", - "\n", - "### Model\n", - "Models that can perform the `text-generation` task are tagged with `task: text-generation`. We will use the `gpt2` model in this notebook. If you opened this notebook from a specific model card, remember to replace the specific model name. If you don't find a model that suits your scenario or domain, you can discover and [import models from HuggingFace hub](../../import/import-model-from-huggingface.ipynb) and then use them for inference. \n", - "\n", - "### Inference data\n", - "We will use the [book corpus](https://huggingface.co/datasets/bookcorpus) dataset. A copy of this dataset is available in the [book-corpus-dataset](./book-corpus-dataset/) folder.\n", - "\n", - "### Outline\n", - "* Set up pre-requisites.\n", - "* Pick a model to deploy.\n", - "* Prepare data for inference. \n", - "* Deploy the model for real time inference.\n", - "* Test the endpoint\n", - "* Clean up resources." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 1. Set up pre-requisites\n", - "* Install dependencies\n", - "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace ``, `` and `` below.\n", - "* Connect to `azureml` system registry" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azure.ai.ml import MLClient\n", - "from azure.identity import (\n", - " DefaultAzureCredential,\n", - " InteractiveBrowserCredential,\n", - " ClientSecretCredential,\n", - ")\n", - "from azure.ai.ml.entities import AmlCompute\n", - "import time\n", - "\n", - "try:\n", - " credential = DefaultAzureCredential()\n", - " credential.get_token(\"https://management.azure.com/.default\")\n", - "except Exception as ex:\n", - " credential = InteractiveBrowserCredential()\n", - "\n", - "workspace_ml_client = MLClient(\n", - " credential,\n", - " subscription_id=\"\",\n", - " resource_group_name=\"\",\n", - " workspace_name=\"\",\n", - ")\n", - "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n", - "registry_ml_client = MLClient(credential, registry_name=\"azureml-preview\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 2. Pick a model to deploy\n", - "\n", - "Browse models in the Model Catalog in the AzureML Studio, filtering by the `text-generation` task. In this example, we use the `gpt2` model. If you have opened this notebook for a different model, replace the model name and version accordingly. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "model_name = \"gpt2\"\n", - "model_version = \"3\"\n", - "foundation_model = registry_ml_client.models.get(model_name, model_version)\n", - "print(\n", - " \"\\n\\nUsing model name: {0}, version: {1}, id: {2} for inferencing\".format(\n", - " foundation_model.name, foundation_model.version, foundation_model.id\n", - " )\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 3. Prepare data for inference.\n", - "\n", - "A copy of the book corpus dataset is available in the [book-corpus-dataset](./book-corpus-dataset/) folder. The next few cells show basic data preparation:\n", - "* Visualize some data rows\n", - "* Save few samples in the format that can be passed as input to the online-inference endpoint." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# load the ./book-corpus-dataset/train.jsonl file into a pandas dataframe and show the first 5 rows\n", - "import pandas as pd\n", - "\n", - "pd.set_option(\n", - " \"display.max_colwidth\", 0\n", - ") # set the max column width to 0 to display the full text\n", - "train_df = pd.read_json(\"./book-corpus-dataset/train.jsonl\", lines=True)\n", - "train_df.head(2)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4. Deploy the model to an online endpoint\n", - "Online endpoints give a durable REST API that can be used to integrate with applications that need to use the model." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import time, sys\n", - "from azure.ai.ml.entities import (\n", - " ManagedOnlineEndpoint,\n", - " ManagedOnlineDeployment,\n", - " OnlineRequestSettings,\n", - ")\n", - "\n", - "# Create online endpoint - endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n", - "timestamp = int(time.time())\n", - "online_endpoint_name = \"text-generation-\" + str(timestamp)\n", - "# create an online endpoint\n", - "endpoint = ManagedOnlineEndpoint(\n", - " name=online_endpoint_name,\n", - " description=\"Online endpoint for \"\n", - " + foundation_model.name\n", - " + \", for text-generation task\",\n", - " auth_mode=\"key\",\n", - ")\n", - "workspace_ml_client.begin_create_or_update(endpoint).wait()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# create a deployment\n", - "demo_deployment = ManagedOnlineDeployment(\n", - " name=\"demo\",\n", - " endpoint_name=online_endpoint_name,\n", - " model=foundation_model.id,\n", - " instance_type=\"Standard_DS2_v2\",\n", - " instance_count=1,\n", - " request_settings=OnlineRequestSettings(\n", - " request_timeout_ms=60000,\n", - " ),\n", - ")\n", - "workspace_ml_client.online_deployments.begin_create_or_update(demo_deployment).wait()\n", - "endpoint.traffic = {\"demo\": 100}\n", - "workspace_ml_client.begin_create_or_update(endpoint).result()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 5. Test the endpoint with sample data\n", - "\n", - "We will fetch some sample data from the test dataset and submit to online endpoint for inference. We will then show the display the scored labels alongside the ground truth labels" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "import os\n", - "\n", - "# read the ./book-corpus-dataset/train.jsonl file into a pandas dataframe\n", - "df = pd.read_json(\"./book-corpus-dataset/train.jsonl\", lines=True)\n", - "# escape single and double quotes in the text column\n", - "df[\"text\"] = df[\"text\"].str.replace(\"'\", \"\\\\'\").str.replace('\"', '\\\\\"')\n", - "# pick 1 random row\n", - "sample_df = df.sample(1)\n", - "# create a json object with the key as \"inputs\" and value as a list of values from the article column of the sample_df dataframe\n", - "sample_json = {\"inputs\": sample_df[\"text\"].tolist()}\n", - "# save the json object to a file named sample_score.json in the ./book-corpus-dataset folder\n", - "test_json = {\"inputs\": {\"input_string\": sample_df[\"text\"].tolist()}}\n", - "# save the json object to a file named sample_score.json in the ./book-corpus-dataset folder\n", - "with open(os.path.join(\".\", \"book-corpus-dataset\", \"sample_score.json\"), \"w\") as f:\n", - " json.dump(test_json, f)\n", - "sample_df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# score the sample_score.json file using the online endpoint with the azureml endpoint invoke method\n", - "response = workspace_ml_client.online_endpoints.invoke(\n", - " endpoint_name=online_endpoint_name,\n", - " deployment_name=\"demo\",\n", - " request_file=\"./book-corpus-dataset/sample_score.json\",\n", - ")\n", - "print(\"raw response: \\n\", response, \"\\n\")\n", - "# convert the json response to a pandas dataframe\n", - "response_df = pd.read_json(response)\n", - "response_df.head()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 6. Delete the online endpoint\n", - "Don't forget to delete the online endpoint, else you will leave the billing meter running for the compute used by the endpoint" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "workspace_ml_client.online_endpoints.begin_delete(name=online_endpoint_name).wait()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "base", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.13" - }, - "vscode": { - "interpreter": { - "hash": "2f394aca7ca06fed1e6064aef884364492d7cdda3614a461e02e6407fc40ba69" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/sdk/python/foundation-models/system/inference/token-classification/Jean-Baptiste-wikiner_fr/download-dataset.py b/sdk/python/foundation-models/system/inference/token-classification/Jean-Baptiste-wikiner_fr/download-dataset.py deleted file mode 100644 index d3836a7456..0000000000 --- a/sdk/python/foundation-models/system/inference/token-classification/Jean-Baptiste-wikiner_fr/download-dataset.py +++ /dev/null @@ -1,41 +0,0 @@ -# import library to parse command line arguments -import argparse, os - -parser = argparse.ArgumentParser() -# add an argument to specify a dataset name to download -parser.add_argument( - "--dataset", type=str, default="Jean-Baptiste/wikiner_fr", help="dataset name" -) -# add an argument to specify the directory to download the dataset to -parser.add_argument( - "--download_dir", - type=str, - default="data", - help="directory to download the dataset to", -) -args = parser.parse_args() - -# create the download directory if it does not exist -if not os.path.exists(args.download_dir): - os.makedirs(args.download_dir) - - -def format_ner_tags(example, class_names): - example["text"] = " ".join(example["tokens"]) - example["ner_tags_str"] = [class_names[id] for id in example["ner_tags"]] - return example - - -# import hugging face datasets library -from datasets import load_dataset, get_dataset_split_names -from functools import partial - -for split in get_dataset_split_names(args.dataset): - # load the split of the dataset - dataset = load_dataset(args.dataset, split=split) - dataset = dataset.map( - partial(format_ner_tags, class_names=dataset.features["ner_tags"].feature.names) - ) - # save the split of the dataset to the download directory as json lines file - dataset.to_json(os.path.join(args.download_dir, f"{split}.jsonl")) - # print dataset features diff --git a/sdk/python/foundation-models/system/inference/token-classification/Jean-Baptiste-wikiner_fr/sample_score.json b/sdk/python/foundation-models/system/inference/token-classification/Jean-Baptiste-wikiner_fr/sample_score.json deleted file mode 100644 index 4fe2e37452..0000000000 --- a/sdk/python/foundation-models/system/inference/token-classification/Jean-Baptiste-wikiner_fr/sample_score.json +++ /dev/null @@ -1 +0,0 @@ -{"inputs": {"input_string": ["Il est difficile de se rendre compte de la taille de la Maison Blanche , car une grande partie est souterraine ou cach\u00e9e par le paysage ."]}} \ No newline at end of file diff --git a/sdk/python/foundation-models/system/inference/token-classification/token-classification-batch-endpoint.ipynb b/sdk/python/foundation-models/system/inference/token-classification/token-classification-batch-endpoint.ipynb deleted file mode 100644 index 24b900a634..0000000000 --- a/sdk/python/foundation-models/system/inference/token-classification/token-classification-batch-endpoint.ipynb +++ /dev/null @@ -1,484 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Token Classification Inference using Batch Endpoints\n", - "\n", - "This sample shows how to deploy `token-classification` type models to a batch endpoint for inference.\n", - "\n", - "### Task\n", - "`token-classification` assigns a label to individual tokens in a sentence. One of the most common `token-classification` tasks is Named Entity Recognition (NER). NER attempts to find a label for each entity in a sentence, such as a person, location, or organization.\n", - "\n", - "### Model\n", - "Models that can perform the `token-classification` task are tagged with `task: token-classification`. We will use the `Jean-Baptiste-camembert-ner` model in this notebook. If you opened this notebook from a specific model card, remember to replace the specific model name. If you don't find a model that suits your scenario or domain, you can discover and [import models from HuggingFace hub](../../import/import-model-from-huggingface.ipynb) and then use them for inference. \n", - "\n", - "### Inference data\n", - "We will use the [Jean-Baptiste/wikiner_fr](https://huggingface.co/datasets/Jean-Baptiste/wikiner_fr) dataset. A copy of this dataset is available in the [Jean-Baptiste-wikiner_fr](./Jean-Baptiste-wikiner_fr/) folder. \\\n", - "Please note that the dataset used here is a French dataset, as the Jean-Baptiste/camembert-ner model was trained in French.\n", - "\n", - "### Outline\n", - "* Set up pre-requisites.\n", - "* Pick a model to deploy.\n", - "* Prepare data for inference. \n", - "* Deploy the model for batch inference.\n", - "* Run a batch inference job.\n", - "* Review inference predictions.\n", - "* Clean up resources." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 1. Set up pre-requisites\n", - "* Install dependencies.\n", - "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace ``, `` and `` below.\n", - "* Connect to `azureml` system registry.\n", - "* Create or update compute." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Import packages used by the following code snippets\n", - "import csv\n", - "import os\n", - "import time\n", - "\n", - "import pandas as pd\n", - "\n", - "from azure.ai.ml import Input, MLClient\n", - "from azure.ai.ml.constants import AssetTypes\n", - "from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential\n", - "from azure.ai.ml.entities import (\n", - " AmlCompute,\n", - " BatchDeployment,\n", - " BatchEndpoint,\n", - " BatchRetrySettings,\n", - " Model,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "subscription_id = \"\"\n", - "resource_group_name = \"\"\n", - "workspace_name = \"\"" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Connect to workspace and registry using ML clients." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "try:\n", - " credential = DefaultAzureCredential()\n", - " credential.get_token(\"https://management.azure.com/.default\")\n", - "except Exception as ex:\n", - " credential = InteractiveBrowserCredential()\n", - "\n", - "workspace_ml_client = MLClient(\n", - " credential,\n", - " subscription_id=subscription_id,\n", - " resource_group_name=resource_group_name,\n", - " workspace_name=workspace_name,\n", - ")\n", - "# The models, fine tuning pipelines, and environments are available in the AzureML system registry, \"azureml\"\n", - "registry_ml_client = MLClient(credential, registry_name=\"azureml\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Create a compute cluster.\n", - "Use the model card from the AzureML system registry to check the minimum required inferencing SKU, referenced as `size` below. If you already have a sufficient compute cluster, you can simply define the name in `compute_name` in the following code block." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "compute_name = \"cpu-cluster\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "compute_cluster = AmlCompute(\n", - " name=compute_name,\n", - " description=\"An AML compute cluster\",\n", - " size=\"Standard_DS3_V2\",\n", - " min_instances=0,\n", - " max_instances=3,\n", - " idle_time_before_scale_down=120,\n", - ") # 120 seconds\n", - "\n", - "workspace_ml_client.begin_create_or_update(compute_cluster)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 2. Pick a model to deploy\n", - "\n", - "Browse models in the Model Catalog in the AzureML Studio, filtering by the `token-classification` task. In this example, we use the `Jean-Baptiste-camembert-ner` model. If you have opened this notebook for a different model, replace the model name and version accordingly. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "model_name = \"Jean-Baptiste-camembert-ner\"\n", - "model_version = \"1\"\n", - "foundation_model = registry_ml_client.models.get(model_name, model_version)\n", - "print(\n", - " f\"Using model name: {foundation_model.name}, version: {foundation_model.version}, id: {foundation_model.id} for inferencing.\"\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 3. Prepare data for inference.\n", - "\n", - "A copy of the Jean-Baptiste-wikiner_fr dataset is available in the [Jean-Baptiste-wikiner_fr](./Jean-Baptiste-wikiner_fr/) folder. The next few cells show basic data preparation:\n", - "* Visualize some data rows\n", - "* We want this sample to run quickly, so save a smaller dataset containing a fraction of the original." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Define directories and filenames as variables\n", - "dataset_dir = \"Jean-Baptiste-wikiner_fr\"\n", - "training_datafile = \"train.jsonl\"\n", - "\n", - "batch_dir = \"batch\"\n", - "batch_inputs_dir = os.path.join(batch_dir, \"inputs\")\n", - "batch_input_file = \"batch_input.csv\"\n", - "os.makedirs(batch_dir, exist_ok=True)\n", - "os.makedirs(batch_inputs_dir, exist_ok=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Load the ./Jean-Baptiste-wikiner_fr/train.jsonl file into a pandas dataframe and show the first 5 rows\n", - "pd.set_option(\n", - " \"display.max_colwidth\", 0\n", - ") # Set the max column width to 0 to display the full text\n", - "train_df = pd.read_json(os.path.join(\".\", dataset_dir, training_datafile), lines=True)\n", - "train_df.head()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Save a fraction of the input data to files of smaller batches for testing. The MLflow model's signature specifies the input should be a column named `\"input_string\"`, so rename the transformed `\"text\"` column. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "batch_df = (\n", - " train_df[[\"text\"]].rename(columns={\"text\": \"input_string\"}).sample(frac=0.001)\n", - ")\n", - "\n", - "# Divide this into files of 25 rows each\n", - "batch_size_per_predict = 25\n", - "for i in range(0, len(batch_df), batch_size_per_predict):\n", - " j = i + batch_size_per_predict\n", - " batch_df[i:j].to_csv(\n", - " os.path.join(batch_inputs_dir, str(i) + batch_input_file), quoting=csv.QUOTE_ALL\n", - " )\n", - "\n", - "# Check out the first and last file name created\n", - "input_files = os.listdir(batch_inputs_dir)\n", - "print(f\"{input_files[0]} to {str(i)}{batch_input_file}.\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4. Deploy the model to a batch endpoint\n", - "Batch endpoints are endpoints that are used to do batch inferencing on large volumes of data over a period of time. The endpoints receive pointers to data and run jobs asynchronously to process the data in parallel on compute clusters. Batch endpoints store outputs to a data store for further analysis. For more information on batch endpoints and deployments see [What are batch endpoints?](https://learn.microsoft.com/en-us/azure/machine-learning/concept-endpoints?view=azureml-api-2#what-are-batch-endpoints).\n", - "\n", - "* Create a batch endpoint.\n", - "* Create a batch deployment.\n", - "* Set the deployment as default; doing so allows invoking the endpoint without specifying the deployment's name.\n", - "\n", - "#### Create the endpoint." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n", - "timestamp = int(time.time())\n", - "endpoint_name = \"token-classification-\" + str(timestamp)\n", - "\n", - "endpoint = BatchEndpoint(\n", - " name=endpoint_name,\n", - " description=\"Batch endpoint for \"\n", - " + foundation_model.name\n", - " + \", for token-classification task\",\n", - ")\n", - "workspace_ml_client.begin_create_or_update(endpoint).result()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Create the deployment." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "deployment_name = \"demo\"\n", - "\n", - "deployment = BatchDeployment(\n", - " name=deployment_name,\n", - " endpoint_name=endpoint_name,\n", - " model=foundation_model.id,\n", - " compute=compute_name,\n", - " error_threshold=0,\n", - " instance_count=1,\n", - " logging_level=\"info\",\n", - " max_concurrency_per_instance=1,\n", - " mini_batch_size=10,\n", - " output_file_name=\"predictions.csv\",\n", - " retry_settings=BatchRetrySettings(max_retries=3, timeout=300),\n", - ")\n", - "workspace_ml_client.begin_create_or_update(deployment).result()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Set the deployment as default." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "endpoint = workspace_ml_client.batch_endpoints.get(endpoint_name)\n", - "endpoint.defaults.deployment_name = deployment_name\n", - "workspace_ml_client.begin_create_or_update(endpoint).wait()\n", - "\n", - "endpoint = workspace_ml_client.batch_endpoints.get(endpoint_name)\n", - "print(f\"The default deployment is {endpoint.defaults.deployment_name}\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 5. Run a batch inference job.\n", - "Invoke the batch endpoint with the input parameter pointing to the folder containing the batch inference input. This creates a pipeline job using the default deployment in the endpoint. Wait for the job to complete." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "input = Input(path=batch_inputs_dir, type=AssetTypes.URI_FOLDER)\n", - "\n", - "job = workspace_ml_client.batch_endpoints.invoke(\n", - " endpoint_name=endpoint.name, input=input\n", - ")\n", - "\n", - "workspace_ml_client.jobs.stream(job.name)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 6. Review inference predictions. \n", - "Download the predictions from the job output and review the predictions using a dataframe." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "scoring_job = list(workspace_ml_client.jobs.list(parent_job_name=job.name))[0]\n", - "\n", - "workspace_ml_client.jobs.download(\n", - " name=scoring_job.name, download_path=batch_dir, output_name=\"score\"\n", - ")\n", - "\n", - "predictions_file = os.path.join(batch_dir, \"named-outputs\", \"score\", \"predictions.csv\")\n", - "\n", - "# Load the batch predictions file with no headers into a dataframe and set your column names\n", - "score_df = pd.read_csv(\n", - " predictions_file,\n", - " header=None,\n", - " names=[\"row_number_per_file\", \"prediction\", \"batch_input_file_name\"],\n", - ")\n", - "score_df.head()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Record the input file name and set the original index value in the `'index'` column for each input file. Join the `train_df` with ground truth into the input dataframe." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "input_df = []\n", - "for file in input_files:\n", - " input = pd.read_csv(os.path.join(batch_inputs_dir, file), index_col=0)\n", - " input.reset_index(inplace=True)\n", - " input[\"batch_input_file_name\"] = file\n", - " input.reset_index(names=[\"row_number_per_file\"], inplace=True)\n", - " input_df.append(input)\n", - "input_df = pd.concat(input_df)\n", - "input_df.set_index(\"index\", inplace=True)\n", - "input_df = input_df.join(train_df).drop(columns=[\"input_string\"])\n", - "\n", - "input_df.head()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Join the predictions with input data to compare them to ground truth." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df = pd.merge(\n", - " input_df, score_df, how=\"inner\", on=[\"row_number_per_file\", \"batch_input_file_name\"]\n", - ")\n", - "\n", - "# Show the first few rows of the results\n", - "df.head(20)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 7. Clean up resources\n", - "Batch endpoints use compute resources only when jobs are submitted. You can keep the batch endpoint for your reference without worrying about compute bills, or choose to delete the endpoint. If you created your compute cluster to have zero minimum instances and scale down soon after being idle, you won't be charged for an unused compute." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "workspace_ml_client.batch_endpoints.begin_delete(name=endpoint_name).result()\n", - "workspace_ml_client.compute.begin_delete(name=compute_name).result()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "base", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.13" - }, - "vscode": { - "interpreter": { - "hash": "2f394aca7ca06fed1e6064aef884364492d7cdda3614a461e02e6407fc40ba69" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/sdk/python/foundation-models/system/inference/token-classification/token-classification-online-endpoint.ipynb b/sdk/python/foundation-models/system/inference/token-classification/token-classification-online-endpoint.ipynb deleted file mode 100644 index 5e47958212..0000000000 --- a/sdk/python/foundation-models/system/inference/token-classification/token-classification-online-endpoint.ipynb +++ /dev/null @@ -1,304 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Token Classification Inference using Online Endpoints\n", - "\n", - "This sample shows how to deploy `token-classification` type models to an online endpoint for inference.\n", - "\n", - "### Task\n", - "`token-classification` assigns a label to individual tokens in a sentence. One of the most common `token-classification` tasks is Named Entity Recognition (NER). NER attempts to find a label for each entity in a sentence, such as a person, location, or organization.\n", - "\n", - "### Model\n", - "Models that can perform the `token-classification` task are tagged with `task: token-classification`. We will use the `Jean-Baptiste-camembert-ner` model in this notebook. If you opened this notebook from a specific model card, remember to replace the specific model name. If you don't find a model that suits your scenario or domain, you can discover and [import models from HuggingFace hub](../../import/import-model-from-huggingface.ipynb) and then use them for inference. \n", - "\n", - "### Inference data\n", - "We will use the [Jean-Baptiste/wikiner_fr](https://huggingface.co/datasets/Jean-Baptiste/wikiner_fr) dataset. A copy of this dataset is available in the [Jean-Baptiste-wikiner_fr](./Jean-Baptiste-wikiner_fr/) folder. \\\n", - "Please note that the dataset used here is a French dataset, as the Jean-Baptiste/camembert-ner model was trained in French.\n", - "\n", - "### Outline\n", - "* Set up pre-requisites.\n", - "* Pick a model to deploy.\n", - "* Prepare data for inference. \n", - "* Deploy the model for real time inference.\n", - "* Test the endpoint\n", - "* Clean up resources." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 1. Set up pre-requisites\n", - "* Install dependencies\n", - "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace ``, `` and `` below.\n", - "* Connect to `azureml` system registry" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azure.ai.ml import MLClient\n", - "from azure.identity import (\n", - " DefaultAzureCredential,\n", - " InteractiveBrowserCredential,\n", - " ClientSecretCredential,\n", - ")\n", - "from azure.ai.ml.entities import AmlCompute\n", - "import time\n", - "\n", - "try:\n", - " credential = DefaultAzureCredential()\n", - " credential.get_token(\"https://management.azure.com/.default\")\n", - "except Exception as ex:\n", - " credential = InteractiveBrowserCredential()\n", - "\n", - "workspace_ml_client = MLClient(\n", - " credential,\n", - " subscription_id=\"\",\n", - " resource_group_name=\"\",\n", - " workspace_name=\"\",\n", - ")\n", - "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n", - "registry_ml_client = MLClient(credential, registry_name=\"azureml-preview\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 2. Pick a model to deploy\n", - "\n", - "Browse models in the Model Catalog in the AzureML Studio, filtering by the `token-classification` task. In this example, we use the `Jean-Baptiste-camembert-ner` model. If you have opened this notebook for a different model, replace the model name and version accordingly. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "model_name = \"Jean-Baptiste-camembert-ner\"\n", - "model_version = \"3\"\n", - "foundation_model = registry_ml_client.models.get(model_name, model_version)\n", - "print(\n", - " \"\\n\\nUsing model name: {0}, version: {1}, id: {2} for inferencing\".format(\n", - " foundation_model.name, foundation_model.version, foundation_model.id\n", - " )\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 3. Prepare data for inference.\n", - "\n", - "A copy of the Jean-Baptiste-wikiner_fr dataset is available in the [Jean-Baptiste-wikiner_fr](./Jean-Baptiste-wikiner_fr/) folder. The next few cells show basic data preparation:\n", - "* Visualize some data rows\n", - "* Save few samples in the format that can be passed as input to the online-inference endpoint." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# load the ./Jean-Baptiste-wikiner_fr/train.jsonl file into a pandas dataframe and show the first 5 rows\n", - "import pandas as pd\n", - "\n", - "pd.set_option(\n", - " \"display.max_colwidth\", 0\n", - ") # set the max column width to 0 to display the full text\n", - "train_df = pd.read_json(\"./Jean-Baptiste-wikiner_fr/train.jsonl\", lines=True)\n", - "train_df.head()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4. Deploy the model to an online endpoint\n", - "Online endpoints give a durable REST API that can be used to integrate with applications that need to use the model." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import time, sys\n", - "from azure.ai.ml.entities import (\n", - " ManagedOnlineEndpoint,\n", - " ManagedOnlineDeployment,\n", - " OnlineRequestSettings,\n", - ")\n", - "\n", - "# Create online endpoint - endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n", - "timestamp = int(time.time())\n", - "online_endpoint_name = \"token-classification-\" + str(timestamp)\n", - "# create an online endpoint\n", - "endpoint = ManagedOnlineEndpoint(\n", - " name=online_endpoint_name,\n", - " description=\"Online endpoint for \"\n", - " + foundation_model.name\n", - " + \", for token-classification task\",\n", - " auth_mode=\"key\",\n", - ")\n", - "workspace_ml_client.begin_create_or_update(endpoint).wait()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# create a deployment\n", - "demo_deployment = ManagedOnlineDeployment(\n", - " name=\"demo\",\n", - " endpoint_name=online_endpoint_name,\n", - " model=foundation_model.id,\n", - " instance_type=\"Standard_DS2_v2\",\n", - " instance_count=1,\n", - " request_settings=OnlineRequestSettings(\n", - " request_timeout_ms=60000,\n", - " ),\n", - ")\n", - "workspace_ml_client.online_deployments.begin_create_or_update(demo_deployment).wait()\n", - "endpoint.traffic = {\"demo\": 100}\n", - "workspace_ml_client.begin_create_or_update(endpoint).result()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 5. Test the endpoint with sample data\n", - "\n", - "We will fetch some sample data from the test dataset and submit to online endpoint for inference. We will then show the display the scored labels alongside the ground truth labels" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "import os\n", - "\n", - "# read the ./Jean-Baptiste-wikiner_fr/train.jsonl file into a pandas dataframe\n", - "df = pd.read_json(\"./Jean-Baptiste-wikiner_fr/train.jsonl\", lines=True)\n", - "# escape single and double quotes in the text column\n", - "df[\"text\"] = df[\"text\"].str.replace(\"'\", \"\\\\'\").str.replace('\"', '\\\\\"')\n", - "# pick 1 random row\n", - "sample_df = df.sample(1)\n", - "# create a json object with the key as \"inputs\" and value as a list of values from the en column of the sample_df dataframe\n", - "sample_json = {\"inputs\": sample_df[\"text\"].tolist()}\n", - "# save the json object to a file named sample_score.json in the ./Jean-Baptiste-wikiner_fr folder\n", - "test_json = {\"inputs\": {\"input_string\": sample_df[\"text\"].tolist()}}\n", - "# save the json object to a file named sample_score.json in the ./Jean-Baptiste-wikiner_fr folder\n", - "with open(os.path.join(\".\", \"Jean-Baptiste-wikiner_fr\", \"sample_score.json\"), \"w\") as f:\n", - " json.dump(test_json, f)\n", - "sample_df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# score the sample_score.json file using the online endpoint with the azureml endpoint invoke method\n", - "response = workspace_ml_client.online_endpoints.invoke(\n", - " endpoint_name=online_endpoint_name,\n", - " deployment_name=\"demo\",\n", - " request_file=\"./Jean-Baptiste-wikiner_fr/sample_score.json\",\n", - ")\n", - "print(\"raw response: \\n\", response, \"\\n\")\n", - "# convert the json response to a pandas dataframe\n", - "response_df = pd.read_json(response)\n", - "response_df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# compare the predicted labels with the actual labels\n", - "num_tokens = len(sample_df[\"text\"].tolist()[0].split())\n", - "predicted_labels = [\"O\"] * num_tokens\n", - "for col in response_df.columns:\n", - " prediction = response_df[col].tolist()[0]\n", - " predicted_labels[prediction[\"index\"] - 1] = prediction[\"entity\"]\n", - "compare_df = pd.DataFrame(\n", - " {\n", - " \"ground_truth_labels\": sample_df[\"ner_tags_str\"].tolist(),\n", - " \"predicted_labels\": [predicted_labels],\n", - " }\n", - ")\n", - "compare_df.head()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 6. Delete the online endpoint\n", - "Don't forget to delete the online endpoint, else you will leave the billing meter running for the compute used by the endpoint" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "workspace_ml_client.online_endpoints.begin_delete(name=online_endpoint_name).wait()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "base", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.13" - }, - "vscode": { - "interpreter": { - "hash": "2f394aca7ca06fed1e6064aef884364492d7cdda3614a461e02e6407fc40ba69" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/sdk/python/foundation-models/system/inference/translation/translation-batch-endpoint.ipynb b/sdk/python/foundation-models/system/inference/translation/translation-batch-endpoint.ipynb deleted file mode 100644 index a5b2e41fdd..0000000000 --- a/sdk/python/foundation-models/system/inference/translation/translation-batch-endpoint.ipynb +++ /dev/null @@ -1,481 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Translation Inference using Batch Endpoints\n", - "\n", - "This sample shows how to deploy `translation` type models to a batch endpoint for inference.\n", - "\n", - "### Task\n", - "`translation` converts a sequence of text from one language to another. It is one of several tasks you can formulate as a sequence-to-sequence problem, a powerful framework for returning some output from an input, like translation or summarization. `translation` systems are commonly used for translation between different language texts, but it can also be used for speech or some combination in between like text-to-speech or speech-to-text.\n", - "\n", - "### Model\n", - "Models that can perform the `translation` task are tagged with `task: translation`. We will use the `t5-small` model in this notebook. If you opened this notebook from a specific model card, remember to replace the specific model name. If you don't find a model that suits your scenario or domain, you can discover and [import models from HuggingFace hub](../../import/import-model-from-huggingface.ipynb) and then use them for inference. \n", - "\n", - "### Inference data\n", - "We will use the [wmt16 (ro-en)](https://huggingface.co/datasets/wmt16) dataset. A copy of this dataset is available in the [wmt16-en-ro-dataset](./wmt16-en-ro-dataset/) folder. \n", - "\n", - "### Outline\n", - "* Set up pre-requisites.\n", - "* Pick a model to deploy.\n", - "* Prepare data for inference. \n", - "* Deploy the model for batch inference.\n", - "* Run a batch inference job.\n", - "* Review inference predictions.\n", - "* Clean up resources." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 1. Set up pre-requisites\n", - "* Install dependencies.\n", - "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace ``, `` and `` below.\n", - "* Connect to `azureml` system registry.\n", - "* Create or update compute." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Import packages used by the following code snippets\n", - "import csv\n", - "import os\n", - "import time\n", - "\n", - "import pandas as pd\n", - "\n", - "from azure.ai.ml import Input, MLClient\n", - "from azure.ai.ml.constants import AssetTypes\n", - "from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential\n", - "from azure.ai.ml.entities import (\n", - " AmlCompute,\n", - " BatchDeployment,\n", - " BatchEndpoint,\n", - " BatchRetrySettings,\n", - " Model,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "subscription_id = \"\"\n", - "resource_group_name = \"\"\n", - "workspace_name = \"\"" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Connect to workspace and registry using ML clients." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "try:\n", - " credential = DefaultAzureCredential()\n", - " credential.get_token(\"https://management.azure.com/.default\")\n", - "except Exception as ex:\n", - " credential = InteractiveBrowserCredential()\n", - "\n", - "workspace_ml_client = MLClient(\n", - " credential,\n", - " subscription_id=subscription_id,\n", - " resource_group_name=resource_group_name,\n", - " workspace_name=workspace_name,\n", - ")\n", - "# The models, fine tuning pipelines, and environments are available in the AzureML system registry, \"azureml\"\n", - "registry_ml_client = MLClient(credential, registry_name=\"azureml\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Create a compute cluster.\n", - "Use the model card from the AzureML system registry to check the minimum required inferencing SKU, referenced as `size` below. If you already have a sufficient compute cluster, you can simply define the name in `compute_name` in the following code block." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "compute_name = \"cpu-cluster\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "compute_cluster = AmlCompute(\n", - " name=compute_name,\n", - " description=\"An AML compute cluster\",\n", - " size=\"Standard_DS3_V2\",\n", - " min_instances=0,\n", - " max_instances=3,\n", - " idle_time_before_scale_down=120,\n", - ") # 120 seconds\n", - "\n", - "workspace_ml_client.begin_create_or_update(compute_cluster)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 2. Pick a model to deploy\n", - "\n", - "Browse models in the Model Catalog in the AzureML Studio, filtering by the `translation` task. In this example, we use the `t5-small` model. If you have opened this notebook for a different model, replace the model name and version accordingly. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "model_name = \"t5-small\"\n", - "model_version = \"1\"\n", - "foundation_model = registry_ml_client.models.get(model_name, model_version)\n", - "print(\n", - " f\"Using model name: {foundation_model.name}, version: {foundation_model.version}, id: {foundation_model.id} for inferencing.\"\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 3. Prepare data for inference.\n", - "\n", - "A copy of the wmt16-en-ro dataset is available in the [wmt16-en-ro-dataset](./wmt16-en-ro-dataset/) folder. The next few cells show basic data preparation:\n", - "* Visualize some data rows\n", - "* We want this sample to run quickly, so save a smaller dataset containing a fraction of the original." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Define directories and filenames as variables\n", - "dataset_dir = \"wmt16-en-ro-dataset\"\n", - "training_datafile = \"train.jsonl\"\n", - "\n", - "batch_dir = \"batch\"\n", - "batch_inputs_dir = os.path.join(batch_dir, \"inputs\")\n", - "batch_input_file = \"batch_input.csv\"\n", - "os.makedirs(batch_dir, exist_ok=True)\n", - "os.makedirs(batch_inputs_dir, exist_ok=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Load the ./wmt16-en-ro-dataset/train.jsonl file into a pandas dataframe and show the first 5 rows\n", - "pd.set_option(\n", - " \"display.max_colwidth\", 0\n", - ") # Set the max column width to 0 to display the full text\n", - "train_df = pd.read_json(os.path.join(\".\", dataset_dir, training_datafile), lines=True)\n", - "train_df.head()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Save a fraction of the input data to files of smaller batches for testing. The MLflow model's signature specifies the input should be a column named `\"input_string\"`, so rename the transformed `\"en\"` column." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "batch_df = train_df[[\"en\"]].rename(columns={\"en\": \"input_string\"}).sample(frac=0.01)\n", - "\n", - "# Divide this into files of 25 rows each\n", - "batch_size_per_predict = 25\n", - "for i in range(0, len(batch_df), batch_size_per_predict):\n", - " j = i + batch_size_per_predict\n", - " batch_df[i:j].to_csv(\n", - " os.path.join(batch_inputs_dir, str(i) + batch_input_file), quoting=csv.QUOTE_ALL\n", - " )\n", - "\n", - "# Check out the first and last file name created\n", - "input_files = os.listdir(batch_inputs_dir)\n", - "print(f\"{input_files[0]} to {str(i)}{batch_input_file}.\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4. Deploy the model to a batch endpoint\n", - "Batch endpoints are endpoints that are used to do batch inferencing on large volumes of data over a period of time. The endpoints receive pointers to data and run jobs asynchronously to process the data in parallel on compute clusters. Batch endpoints store outputs to a data store for further analysis. For more information on batch endpoints and deployments see [What are batch endpoints?](https://learn.microsoft.com/en-us/azure/machine-learning/concept-endpoints?view=azureml-api-2#what-are-batch-endpoints).\n", - "\n", - "* Create a batch endpoint.\n", - "* Create a batch deployment.\n", - "* Set the deployment as default; doing so allows invoking the endpoint without specifying the deployment's name.\n", - "\n", - "#### Create the endpoint." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n", - "timestamp = int(time.time())\n", - "endpoint_name = \"translation-\" + str(timestamp)\n", - "\n", - "endpoint = BatchEndpoint(\n", - " name=endpoint_name,\n", - " description=\"Batch endpoint for \"\n", - " + foundation_model.name\n", - " + \", for translation task\",\n", - ")\n", - "workspace_ml_client.begin_create_or_update(endpoint).result()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Create the deployment." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "deployment_name = \"demo\"\n", - "\n", - "deployment = BatchDeployment(\n", - " name=deployment_name,\n", - " endpoint_name=endpoint_name,\n", - " model=foundation_model.id,\n", - " compute=compute_name,\n", - " error_threshold=0,\n", - " instance_count=1,\n", - " logging_level=\"info\",\n", - " max_concurrency_per_instance=1,\n", - " mini_batch_size=10,\n", - " output_file_name=\"predictions.csv\",\n", - " retry_settings=BatchRetrySettings(max_retries=3, timeout=300),\n", - ")\n", - "workspace_ml_client.begin_create_or_update(deployment).result()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Set the deployment as default." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "endpoint = workspace_ml_client.batch_endpoints.get(endpoint_name)\n", - "endpoint.defaults.deployment_name = deployment_name\n", - "workspace_ml_client.begin_create_or_update(endpoint).wait()\n", - "\n", - "endpoint = workspace_ml_client.batch_endpoints.get(endpoint_name)\n", - "print(f\"The default deployment is {endpoint.defaults.deployment_name}\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 5. Run a batch inference job.\n", - "Invoke the batch endpoint with the input parameter pointing to the folder containing the batch inference input. This creates a pipeline job using the default deployment in the endpoint. Wait for the job to complete." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "input = Input(path=batch_inputs_dir, type=AssetTypes.URI_FOLDER)\n", - "\n", - "job = workspace_ml_client.batch_endpoints.invoke(\n", - " endpoint_name=endpoint.name, input=input\n", - ")\n", - "\n", - "workspace_ml_client.jobs.stream(job.name)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 6. Review inference predictions. \n", - "Download the predictions from the job output and review the predictions using a dataframe." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "scoring_job = list(workspace_ml_client.jobs.list(parent_job_name=job.name))[0]\n", - "\n", - "workspace_ml_client.jobs.download(\n", - " name=scoring_job.name, download_path=batch_dir, output_name=\"score\"\n", - ")\n", - "\n", - "predictions_file = os.path.join(batch_dir, \"named-outputs\", \"score\", \"predictions.csv\")\n", - "\n", - "# Load the batch predictions file with no headers into a dataframe and set your column names\n", - "score_df = pd.read_csv(\n", - " predictions_file,\n", - " header=None,\n", - " names=[\"row_number_per_file\", \"prediction\", \"batch_input_file_name\"],\n", - ")\n", - "score_df.head()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Record the input file name and set the original index value in the `'index'` column for each input file. Join the `train_df` with ground truth into the input dataframe." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "input_df = []\n", - "for file in input_files:\n", - " input = pd.read_csv(os.path.join(batch_inputs_dir, file), index_col=0)\n", - " input.reset_index(inplace=True)\n", - " input[\"batch_input_file_name\"] = file\n", - " input.reset_index(names=[\"row_number_per_file\"], inplace=True)\n", - " input_df.append(input)\n", - "input_df = pd.concat(input_df)\n", - "input_df.set_index(\"index\", inplace=True)\n", - "input_df = input_df.join(train_df).drop(columns=[\"input_string\"])\n", - "\n", - "input_df.head()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Join the predictions with input data to compare them to ground truth." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df = pd.merge(\n", - " input_df, score_df, how=\"inner\", on=[\"row_number_per_file\", \"batch_input_file_name\"]\n", - ")\n", - "\n", - "# Show the first few rows of the results\n", - "df.head(20)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 7. Clean up resources\n", - "Batch endpoints use compute resources only when jobs are submitted. You can keep the batch endpoint for your reference without worrying about compute bills, or choose to delete the endpoint. If you created your compute cluster to have zero minimum instances and scale down soon after being idle, you won't be charged for an unused compute." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "workspace_ml_client.batch_endpoints.begin_delete(name=endpoint_name).result()\n", - "workspace_ml_client.compute.begin_delete(name=compute_name).result()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "base", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.12" - }, - "vscode": { - "interpreter": { - "hash": "2f394aca7ca06fed1e6064aef884364492d7cdda3614a461e02e6407fc40ba69" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/sdk/python/foundation-models/system/inference/translation/translation-online-endpoint.ipynb b/sdk/python/foundation-models/system/inference/translation/translation-online-endpoint.ipynb deleted file mode 100644 index 1e7c59c31f..0000000000 --- a/sdk/python/foundation-models/system/inference/translation/translation-online-endpoint.ipynb +++ /dev/null @@ -1,295 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Translation Inference using Online Endpoints\n", - "\n", - "This sample shows how to deploy `translation` type models to an online endpoint for inference.\n", - "\n", - "### Task\n", - "`translation` converts a sequence of text from one language to another. It is one of several tasks you can formulate as a sequence-to-sequence problem, a powerful framework for returning some output from an input, like translation or summarization. `translation` systems are commonly used for translation between different language texts, but it can also be used for speech or some combination in between like text-to-speech or speech-to-text.\n", - "\n", - "### Model\n", - "Models that can perform the `translation` task are tagged with `task: translation`. We will use the `t5-small` model in this notebook. If you opened this notebook from a specific model card, remember to replace the specific model name. If you don't find a model that suits your scenario or domain, you can discover and [import models from HuggingFace hub](../../import/import-model-from-huggingface.ipynb) and then use them for inference. \n", - "\n", - "### Inference data\n", - "We will use the [wmt16 (ro-en)](https://huggingface.co/datasets/wmt16) dataset. A copy of this dataset is available in the [wmt16-en-ro-dataset](./wmt16-en-ro-dataset/) folder. \n", - "\n", - "### Outline\n", - "* Set up pre-requisites.\n", - "* Pick a model to deploy.\n", - "* Prepare data for inference. \n", - "* Deploy the model for real time inference.\n", - "* Test the endpoint\n", - "* Clean up resources." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 1. Set up pre-requisites\n", - "* Install dependencies\n", - "* Connect to AzureML Workspace. Learn more at [set up SDK authentication](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?tabs=sdk). Replace ``, `` and `` below.\n", - "* Connect to `azureml` system registry" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azure.ai.ml import MLClient\n", - "from azure.identity import (\n", - " DefaultAzureCredential,\n", - " InteractiveBrowserCredential,\n", - " ClientSecretCredential,\n", - ")\n", - "from azure.ai.ml.entities import AmlCompute\n", - "import time\n", - "\n", - "try:\n", - " credential = DefaultAzureCredential()\n", - " credential.get_token(\"https://management.azure.com/.default\")\n", - "except Exception as ex:\n", - " credential = InteractiveBrowserCredential()\n", - "\n", - "workspace_ml_client = MLClient(\n", - " credential,\n", - " subscription_id=\"\",\n", - " resource_group_name=\"\",\n", - " workspace_name=\"\",\n", - ")\n", - "# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n", - "registry_ml_client = MLClient(credential, registry_name=\"azureml-preview\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 2. Pick a model to deploy\n", - "\n", - "Browse models in the Model Catalog in the AzureML Studio, filtering by the `translation` task. In this example, we use the `t5-small` model. If you have opened this notebook for a different model, replace the model name and version accordingly. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "model_name = \"t5-small\"\n", - "model_version = \"4\"\n", - "foundation_model = registry_ml_client.models.get(model_name, model_version)\n", - "print(\n", - " \"\\n\\nUsing model name: {0}, version: {1}, id: {2} for inferencing\".format(\n", - " foundation_model.name, foundation_model.version, foundation_model.id\n", - " )\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 3. Prepare data for inference.\n", - "\n", - "A copy of the wmt16-en-ro dataset is available in the [wmt16-en-ro-dataset](./wmt16-en-ro-dataset/) folder. The next few cells show basic data preparation:\n", - "* Visualize some data rows\n", - "* Save few samples in the format that can be passed as input to the online-inference endpoint." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# load the ./wmt16-en-ro-dataset/train.jsonl file into a pandas dataframe and show the first 5 rows\n", - "import pandas as pd\n", - "\n", - "pd.set_option(\n", - " \"display.max_colwidth\", 0\n", - ") # set the max column width to 0 to display the full text\n", - "train_df = pd.read_json(\"./wmt16-en-ro-dataset/train.jsonl\", lines=True)\n", - "train_df.head()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4. Deploy the model to an online endpoint\n", - "Online endpoints give a durable REST API that can be used to integrate with applications that need to use the model." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import time, sys\n", - "from azure.ai.ml.entities import (\n", - " ManagedOnlineEndpoint,\n", - " ManagedOnlineDeployment,\n", - " OnlineRequestSettings,\n", - ")\n", - "\n", - "# Create online endpoint - endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name\n", - "timestamp = int(time.time())\n", - "online_endpoint_name = \"translation-\" + str(timestamp)\n", - "# create an online endpoint\n", - "endpoint = ManagedOnlineEndpoint(\n", - " name=online_endpoint_name,\n", - " description=\"Online endpoint for \"\n", - " + foundation_model.name\n", - " + \", for translation task\",\n", - " auth_mode=\"key\",\n", - ")\n", - "workspace_ml_client.begin_create_or_update(endpoint).wait()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# create a deployment\n", - "demo_deployment = ManagedOnlineDeployment(\n", - " name=\"demo\",\n", - " endpoint_name=online_endpoint_name,\n", - " model=foundation_model.id,\n", - " instance_type=\"Standard_DS2_v2\",\n", - " instance_count=1,\n", - " request_settings=OnlineRequestSettings(\n", - " request_timeout_ms=60000,\n", - " ),\n", - ")\n", - "workspace_ml_client.online_deployments.begin_create_or_update(demo_deployment).wait()\n", - "endpoint.traffic = {\"demo\": 100}\n", - "workspace_ml_client.begin_create_or_update(endpoint).result()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 5. Test the endpoint with sample data\n", - "\n", - "We will fetch some sample data from the test dataset and submit to online endpoint for inference. We will then show the display the scored labels alongside the ground truth labels" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "import os\n", - "\n", - "# read the ./wmt16-en-ro-dataset/train.jsonl file into a pandas dataframe\n", - "df = pd.read_json(\"./wmt16-en-ro-dataset/train.jsonl\", lines=True)\n", - "# escape single and double quotes in the text column\n", - "df[\"en\"] = df[\"en\"].str.replace(\"'\", \"\\\\'\").str.replace('\"', '\\\\\"')\n", - "# pick 1 random row\n", - "sample_df = df.sample(1)\n", - "# create a json object with the key as \"inputs\" and value as a list of values from the en column of the sample_df dataframe\n", - "test_json = {\n", - " \"inputs\": {\"input_string\": sample_df[\"en\"].tolist()},\n", - " \"parameters\": {\"task_type\": \"translation_en_to_ro\"},\n", - "}\n", - "# save the json object to a file named sample_score.json in the ./wmt16-en-ro-dataset folder\n", - "with open(os.path.join(\".\", \"wmt16-en-ro-dataset\", \"sample_score.json\"), \"w\") as f:\n", - " json.dump(test_json, f)\n", - "sample_df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# score the sample_score.json file using the online endpoint with the azureml endpoint invoke method\n", - "response = workspace_ml_client.online_endpoints.invoke(\n", - " endpoint_name=online_endpoint_name,\n", - " deployment_name=\"demo\",\n", - " request_file=\"./wmt16-en-ro-dataset/sample_score.json\",\n", - ")\n", - "print(\"raw response: \\n\", response, \"\\n\")\n", - "# convert the json response to a pandas dataframe\n", - "response_df = pd.read_json(response)\n", - "response_df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# compare the predicted translation with the ground truth translation\n", - "response_df.rename(columns={\"translation_text\": \"predicted_translation\"}, inplace=True)\n", - "response_df[\"ground_truth_translation\"] = sample_df[\"ro\"].tolist()\n", - "response_df.head()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 6. Delete the online endpoint\n", - "Don't forget to delete the online endpoint, else you will leave the billing meter running for the compute used by the endpoint" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "workspace_ml_client.online_endpoints.begin_delete(name=online_endpoint_name).wait()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "base", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.13" - }, - "vscode": { - "interpreter": { - "hash": "2f394aca7ca06fed1e6064aef884364492d7cdda3614a461e02e6407fc40ba69" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/sdk/python/foundation-models/system/inference/translation/wmt16-en-ro-dataset/download-dataset.py b/sdk/python/foundation-models/system/inference/translation/wmt16-en-ro-dataset/download-dataset.py deleted file mode 100644 index d945ea740e..0000000000 --- a/sdk/python/foundation-models/system/inference/translation/wmt16-en-ro-dataset/download-dataset.py +++ /dev/null @@ -1,45 +0,0 @@ -# import library to parse command line arguments -import argparse, os - -parser = argparse.ArgumentParser() -# add an argument to specify a dataset name to download -parser.add_argument("--dataset", type=str, default="wmt16", help="dataset name") -# add an argument to specify a dataset name to download -parser.add_argument( - "--dataset_subset", type=str, default="ro-en", help="dataset subset name" -) -# argument to save a fraction of the dataset -parser.add_argument( - "--fraction", type=float, default=0.05, help="fraction of the dataset to save" -) -# add an argument to specify the directory to download the dataset to -parser.add_argument( - "--download_dir", - type=str, - default="data", - help="directory to download the dataset to", -) -args = parser.parse_args() - -# create the download directory if it does not exist -if not os.path.exists(args.download_dir): - os.makedirs(args.download_dir) - - -def format_translation(example): - for key in example["translation"]: - example[key] = example["translation"][key] - return example - - -# import hugging face datasets library -from datasets import load_dataset, get_dataset_split_names - -for split in get_dataset_split_names(args.dataset, args.dataset_subset): - # load the split of the dataset - dataset = load_dataset(args.dataset, args.dataset_subset, split=split) - dataset = dataset.map(format_translation, remove_columns=["translation"]) - # save the split of the dataset to the download directory as json lines file - dataset.select(range(int(dataset.num_rows * args.fraction))).to_json( - os.path.join(args.download_dir, f"{split}.jsonl") - ) diff --git a/sdk/python/foundation-models/system/inference/translation/wmt16-en-ro-dataset/sample_score.json b/sdk/python/foundation-models/system/inference/translation/wmt16-en-ro-dataset/sample_score.json deleted file mode 100644 index 83e42a0903..0000000000 --- a/sdk/python/foundation-models/system/inference/translation/wmt16-en-ro-dataset/sample_score.json +++ /dev/null @@ -1 +0,0 @@ -{"inputs": {"input_string": ["European and international systems for monitoring production and the market as an early warning system for identifying production trends;"]}, "parameters": {"task_type": "translation_en_to_ro"}} \ No newline at end of file diff --git a/sdk/python/jobs/automl-standalone-jobs/automl-nlp-text-named-entity-recognition-task-distributed-sweeping/automl-nlp-text-ner-task-distributed-with-sweeping.ipynb b/sdk/python/jobs/automl-standalone-jobs/automl-nlp-text-named-entity-recognition-task-distributed-sweeping/automl-nlp-text-ner-task-distributed-with-sweeping.ipynb index 8b20f2469c..b2acfa109d 100644 --- a/sdk/python/jobs/automl-standalone-jobs/automl-nlp-text-named-entity-recognition-task-distributed-sweeping/automl-nlp-text-ner-task-distributed-with-sweeping.ipynb +++ b/sdk/python/jobs/automl-standalone-jobs/automl-nlp-text-named-entity-recognition-task-distributed-sweeping/automl-nlp-text-ner-task-distributed-with-sweeping.ipynb @@ -54,7 +54,7 @@ "from azure.ai.ml import MLClient\n", "\n", "from azure.ai.ml import Input\n", - "from azure.ai.ml.constants import AssetTypes, NlpModels, NlpLearningRateScheduler\n", + "from azure.ai.ml.constants import AssetTypes, NlpLearningRateScheduler\n", "from azure.ai.ml.automl import SearchSpace\n", "from azure.ai.ml.sweep import Choice, Uniform, BanditPolicy\n", "\n", @@ -302,7 +302,7 @@ "\n", "# Pass the fixed parameters\n", "text_ner_job.set_training_parameters(\n", - " model_name=NlpModels.ROBERTA_BASE,\n", + " model_name=\"roberta-base\",\n", " learning_rate_scheduler=NlpLearningRateScheduler.LINEAR,\n", " warmup_ratio=0.1,\n", ")" @@ -402,10 +402,10 @@ "text_ner_job.extend_search_space(\n", " [\n", " SearchSpace(\n", - " model_name=Choice([NlpModels.BERT_BASE_CASED, NlpModels.ROBERTA_BASE]),\n", + " model_name=Choice([\"bert-base-cased\", \"roberta-base\"]),\n", " ),\n", " SearchSpace(\n", - " model_name=Choice([NlpModels.DISTILROBERTA_BASE]),\n", + " model_name=Choice([\"distilroberta-base\"]),\n", " weight_decay=Uniform(0.01, 0.1),\n", " ),\n", " ]\n", diff --git a/sdk/python/jobs/pipelines/1d_pipeline_with_non_python_components/pipeline_with_non_python_components.ipynb b/sdk/python/jobs/pipelines/1d_pipeline_with_non_python_components/pipeline_with_non_python_components.ipynb index 566dc7b3e3..fae1da4ddb 100644 --- a/sdk/python/jobs/pipelines/1d_pipeline_with_non_python_components/pipeline_with_non_python_components.ipynb +++ b/sdk/python/jobs/pipelines/1d_pipeline_with_non_python_components/pipeline_with_non_python_components.ipynb @@ -144,10 +144,10 @@ " ),\n", " outputs={},\n", " services={\n", - " \"Jupyterlab endpoint\": JobService(job_service_type=\"jupyter_lab\"),\n", - " \"Vscode endpoint\": JobService(job_service_type=\"vs_code\"),\n", + " \"Jupyterlab endpoint\": JobService(type=\"jupyter_lab\"),\n", + " \"Vscode endpoint\": JobService(type=\"vs_code\"),\n", " # \"My_ssh\": JobService(\n", - " # job_service_type = \"ssh\",\n", + " # type = \"ssh\",\n", " # properties={\n", " # \"sshPublicKeys\":\"\"\n", " # }\n", diff --git a/sdk/python/jobs/pipelines/2e_image_classification_keras_minist_convnet/prep/conda.yaml b/sdk/python/jobs/pipelines/2e_image_classification_keras_minist_convnet/prep/conda.yaml index 5672addc92..67160b33a9 100644 --- a/sdk/python/jobs/pipelines/2e_image_classification_keras_minist_convnet/prep/conda.yaml +++ b/sdk/python/jobs/pipelines/2e_image_classification_keras_minist_convnet/prep/conda.yaml @@ -5,4 +5,4 @@ dependencies: - python=3.7.11 - pip=20.0 - pip: - - mldesigner==0.1.0b4 + - mldesigner==0.1.0b12 diff --git a/sdk/python/jobs/pipelines/2e_image_classification_keras_minist_convnet/score/conda.yaml b/sdk/python/jobs/pipelines/2e_image_classification_keras_minist_convnet/score/conda.yaml index b45afea657..3558e295c3 100644 --- a/sdk/python/jobs/pipelines/2e_image_classification_keras_minist_convnet/score/conda.yaml +++ b/sdk/python/jobs/pipelines/2e_image_classification_keras_minist_convnet/score/conda.yaml @@ -5,7 +5,7 @@ dependencies: - python=3.7.11 - pip=20.0 - pip: - - azureml-mlflow==1.42.0 + - azureml-mlflow==1.50.0 - tensorflow==2.7.0 - numpy==1.21.4 - scikit-learn==1.0.1 diff --git a/sdk/python/jobs/pipelines/2e_image_classification_keras_minist_convnet/train/conda.yaml b/sdk/python/jobs/pipelines/2e_image_classification_keras_minist_convnet/train/conda.yaml index 58c1b872c5..cd45bab662 100644 --- a/sdk/python/jobs/pipelines/2e_image_classification_keras_minist_convnet/train/conda.yaml +++ b/sdk/python/jobs/pipelines/2e_image_classification_keras_minist_convnet/train/conda.yaml @@ -3,13 +3,13 @@ channels: - defaults dependencies: - python=3.7.11 - - pip=20.0 + - pip=20.2 - pip: - - mldesigner==0.1.0b4 - - azureml-mlflow==1.42.0 + - mldesigner==0.1.0b12 + - azureml-mlflow==1.50.0 - tensorflow==2.7.0 - numpy==1.21.4 - scikit-learn==1.0.1 - pandas==1.3.4 - matplotlib==3.2.2 - - protobuf==3.20.0 + - protobuf==3.20.0 \ No newline at end of file diff --git a/sdk/python/jobs/single-step/scikit-learn/iris/iris-scikit-learn.ipynb b/sdk/python/jobs/single-step/scikit-learn/iris/iris-scikit-learn.ipynb index 57f68d312f..375c03306d 100644 --- a/sdk/python/jobs/single-step/scikit-learn/iris/iris-scikit-learn.ipynb +++ b/sdk/python/jobs/single-step/scikit-learn/iris/iris-scikit-learn.ipynb @@ -168,10 +168,10 @@ " \"coef0\": 0.1,\n", " },\n", " services={\n", - " \"Jupyterlab endpoint\": JobService(job_service_type=\"jupyter_lab\"),\n", - " \"Vscode endpoint\": JobService(job_service_type=\"vs_code\"),\n", + " \"Jupyterlab endpoint\": JobService(type=\"jupyter_lab\"),\n", + " \"Vscode endpoint\": JobService(type=\"vs_code\"),\n", " # \"My_ssh\": JobService(\n", - " # job_service_type = \"ssh\",\n", + " # type = \"ssh\",\n", " # properties={\n", " # \"sshPublicKeys\":\"\"\n", " # }\n", diff --git a/sdk/python/using-mlflow/readme.md b/sdk/python/using-mlflow/readme.md index f4625fe7c7..1573d18d08 100755 --- a/sdk/python/using-mlflow/readme.md +++ b/sdk/python/using-mlflow/readme.md @@ -14,7 +14,7 @@ notebooks|description [Training and tracking a XGBoost classifier with MLflow using Service Principal authentication](train-and-log/xgboost_service_principal.ipynb)|*Demonstrate how to track experiments using MLflow from compute that is running outside Azure ML and how to authenticate against Azure ML services using a Service Principal.* [Hyper-parameters optimization using child runs with MLflow and HyperOpt optimizer](train-and-log/xgboost_nested_runs.ipynb)|*Demonstrate how to use child runs in MLflow to do hyper-parameter optimization for models using the popular library HyperOpt. It shows how to transfer metrics, params and artifacts from child runs to parent runs.* [Migrating tracking from Azure ML SDK v1 to MLflow](train-and-log/mlflow-v1-comparison.ipynb)|*A comprehensive guideline for moving from Azure ML SDK v1 to use MLflow for tracking experiments in jobs and notebooks.* -[Logging models instead of assets with MLflow](logging-models/logging_and_customizing_models.ipynb)|*Demonstrates how to log models and artifacts with MLflow, including custom models.* +[Logging models instead of assets with MLflow](train-and-log/logging_and_customizing_models.ipynb)|*Demonstrates how to log models and artifacts with MLflow, including custom models.* ### Management with MLflow diff --git a/v1/python-sdk/README.md b/v1/python-sdk/README.md index 8ead142881..ce102de00d 100644 --- a/v1/python-sdk/README.md +++ b/v1/python-sdk/README.md @@ -54,7 +54,7 @@ These concepts are sufficient to understand all examples in this repository, whi path|status|notebooks|description -|-|-|- -[automl-with-azureml](tutorials/automl-with-azureml)|[![automl-nlp-text-classification-multiclass](https://github.com/Azure/azureml-examples/workflows/automl-nlp-text-classification-multiclass/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-automl-nlp-text-classification-multiclass.yml)
[![automl-nlp-text-classification-multilabel](https://github.com/Azure/azureml-examples/workflows/automl-nlp-text-classification-multilabel/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-automl-nlp-text-classification-multilabel.yml)
[![automl-nlp-ner](https://github.com/Azure/azureml-examples/workflows/automl-nlp-ner/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-automl-nlp-ner.yml)
[![auto-ml-classification-bank-marketing-all-features](https://github.com/Azure/azureml-examples/workflows/auto-ml-classification-bank-marketing-all-features/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-classification-bank-marketing-all-features.yml)
[![auto-ml-classification-credit-card-fraud](https://github.com/Azure/azureml-examples/workflows/auto-ml-classification-credit-card-fraud/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-classification-credit-card-fraud.yml)
[![auto-ml-classification-text-dnn](https://github.com/Azure/azureml-examples/workflows/auto-ml-classification-text-dnn/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-classification-text-dnn.yml)
[![auto-ml-continuous-retraining](https://github.com/Azure/azureml-examples/workflows/auto-ml-continuous-retraining/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-continuous-retraining.yml)
[![auto-ml-forecasting-backtest-many-models](https://github.com/Azure/azureml-examples/workflows/auto-ml-forecasting-backtest-many-models/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-forecasting-backtest-many-models.yml)
[![auto-ml-forecasting-backtest-single-model](https://github.com/Azure/azureml-examples/workflows/auto-ml-forecasting-backtest-single-model/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-forecasting-backtest-single-model.yml)
[![auto-ml-forecasting-bike-share](https://github.com/Azure/azureml-examples/workflows/auto-ml-forecasting-bike-share/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-forecasting-bike-share.yml)
[![auto-ml-forecasting-data-preparation](https://github.com/Azure/azureml-examples/workflows/auto-ml-forecasting-data-preparation/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-forecasting-data-preparation.yml)
[![auto-ml-forecasting-demand-tcn](https://github.com/Azure/azureml-examples/workflows/auto-ml-forecasting-demand-tcn/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-forecasting-demand-tcn.yml)
[![auto-ml-forecasting-energy-demand](https://github.com/Azure/azureml-examples/workflows/auto-ml-forecasting-energy-demand/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-forecasting-energy-demand.yml)
[![auto-ml-forecasting-function](https://github.com/Azure/azureml-examples/workflows/auto-ml-forecasting-function/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-forecasting-function.yml)
[![auto-ml-forecasting-github-dau](https://github.com/Azure/azureml-examples/workflows/auto-ml-forecasting-github-dau/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-forecasting-github-dau.yml)
[![auto-ml-forecasting-hierarchical-timeseries](https://github.com/Azure/azureml-examples/workflows/auto-ml-forecasting-hierarchical-timeseries/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-forecasting-hierarchical-timeseries.yml)
[![auto-ml-forecasting-many-models](https://github.com/Azure/azureml-examples/workflows/auto-ml-forecasting-many-models/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-forecasting-many-models.yml)
[![auto-ml-forecasting-orange-juice-sales](https://github.com/Azure/azureml-examples/workflows/auto-ml-forecasting-orange-juice-sales/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-forecasting-orange-juice-sales.yml)
[![auto-ml-forecasting-pipelines](https://github.com/Azure/azureml-examples/workflows/auto-ml-forecasting-pipelines/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-forecasting-pipelines.yml)
[![auto-ml-forecasting-univariate-recipe-experiment-settings](https://github.com/Azure/azureml-examples/workflows/auto-ml-forecasting-univariate-recipe-experiment-settings/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-forecasting-univariate-recipe-experiment-settings.yml)
[![auto-ml-forecasting-univariate-recipe-run-experiment](https://github.com/Azure/azureml-examples/workflows/auto-ml-forecasting-univariate-recipe-run-experiment/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-forecasting-univariate-recipe-run-experiment.yml)
[![auto-ml-image-classification-multiclass-batch-scoring](https://github.com/Azure/azureml-examples/workflows/auto-ml-image-classification-multiclass-batch-scoring/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-image-classification-multiclass-batch-scoring.yml)
[![auto-ml-image-classification-multiclass](https://github.com/Azure/azureml-examples/workflows/auto-ml-image-classification-multiclass/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-image-classification-multiclass.yml)
[![auto-ml-image-classification-multilabel](https://github.com/Azure/azureml-examples/workflows/auto-ml-image-classification-multilabel/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-image-classification-multilabel.yml)
[![auto-ml-image-instance-segmentation](https://github.com/Azure/azureml-examples/workflows/auto-ml-image-instance-segmentation/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-image-instance-segmentation.yml)
[![auto-ml-image-object-detection](https://github.com/Azure/azureml-examples/workflows/auto-ml-image-object-detection/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-image-object-detection.yml)
[![auto-ml-classification-credit-card-fraud-local](https://github.com/Azure/azureml-examples/workflows/auto-ml-classification-credit-card-fraud-local/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-classification-credit-card-fraud-local.yml)
[![binary-classification-metric-and-confidence-interval](https://github.com/Azure/azureml-examples/workflows/binary-classification-metric-and-confidence-interval/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-binary-classification-metric-and-confidence-interval.yml)
[![auto-ml-regression-explanation-featurization](https://github.com/Azure/azureml-examples/workflows/auto-ml-regression-explanation-featurization/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-regression-explanation-featurization.yml)
[![auto-ml-regression](https://github.com/Azure/azureml-examples/workflows/auto-ml-regression/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-regression.yml)|[automl-nlp-text-classification-multiclass.ipynb](tutorials/automl-with-azureml/automl-nlp-multiclass/automl-nlp-text-classification-multiclass.ipynb)
[automl-nlp-text-classification-multilabel.ipynb](tutorials/automl-with-azureml/automl-nlp-multilabel/automl-nlp-text-classification-multilabel.ipynb)
[automl-nlp-ner.ipynb](tutorials/automl-with-azureml/automl-nlp-ner/automl-nlp-ner.ipynb)
[auto-ml-classification-bank-marketing-all-features.ipynb](tutorials/automl-with-azureml/classification-bank-marketing-all-features/auto-ml-classification-bank-marketing-all-features.ipynb)
[auto-ml-classification-credit-card-fraud.ipynb](tutorials/automl-with-azureml/classification-credit-card-fraud/auto-ml-classification-credit-card-fraud.ipynb)
[auto-ml-classification-text-dnn.ipynb](tutorials/automl-with-azureml/classification-text-dnn/auto-ml-classification-text-dnn.ipynb)
[auto-ml-continuous-retraining.ipynb](tutorials/automl-with-azureml/continuous-retraining/auto-ml-continuous-retraining.ipynb)
[auto-ml-forecasting-backtest-many-models.ipynb](tutorials/automl-with-azureml/forecasting-backtest-many-models/auto-ml-forecasting-backtest-many-models.ipynb)
[auto-ml-forecasting-backtest-single-model.ipynb](tutorials/automl-with-azureml/forecasting-backtest-single-model/auto-ml-forecasting-backtest-single-model.ipynb)
[auto-ml-forecasting-bike-share.ipynb](tutorials/automl-with-azureml/forecasting-bike-share/auto-ml-forecasting-bike-share.ipynb)
[auto-ml-forecasting-data-preparation.ipynb](tutorials/automl-with-azureml/forecasting-data-preparation/auto-ml-forecasting-data-preparation.ipynb)
[auto-ml-forecasting-demand-tcn.ipynb](tutorials/automl-with-azureml/forecasting-demand-forecasting-tcn/auto-ml-forecasting-demand-tcn.ipynb)
[auto-ml-forecasting-energy-demand.ipynb](tutorials/automl-with-azureml/forecasting-energy-demand/auto-ml-forecasting-energy-demand.ipynb)
[auto-ml-forecasting-function.ipynb](tutorials/automl-with-azureml/forecasting-forecast-function/auto-ml-forecasting-function.ipynb)
[auto-ml-forecasting-github-dau.ipynb](tutorials/automl-with-azureml/forecasting-github-dau/auto-ml-forecasting-github-dau.ipynb)
[auto-ml-forecasting-hierarchical-timeseries.ipynb](tutorials/automl-with-azureml/forecasting-hierarchical-timeseries/auto-ml-forecasting-hierarchical-timeseries.ipynb)
[auto-ml-forecasting-many-models.ipynb](tutorials/automl-with-azureml/forecasting-many-models/auto-ml-forecasting-many-models.ipynb)
[auto-ml-forecasting-orange-juice-sales.ipynb](tutorials/automl-with-azureml/forecasting-orange-juice-sales/auto-ml-forecasting-orange-juice-sales.ipynb)
[auto-ml-forecasting-pipelines.ipynb](tutorials/automl-with-azureml/forecasting-pipelines/auto-ml-forecasting-pipelines.ipynb)
[auto-ml-forecasting-univariate-recipe-experiment-settings.ipynb](tutorials/automl-with-azureml/forecasting-recipes-univariate/auto-ml-forecasting-univariate-recipe-experiment-settings.ipynb)
[auto-ml-forecasting-univariate-recipe-run-experiment.ipynb](tutorials/automl-with-azureml/forecasting-recipes-univariate/auto-ml-forecasting-univariate-recipe-run-experiment.ipynb)
[auto-ml-image-classification-multiclass-batch-scoring.ipynb](tutorials/automl-with-azureml/image-classification-multiclass-batch-scoring/auto-ml-image-classification-multiclass-batch-scoring.ipynb)
[auto-ml-image-classification-multiclass.ipynb](tutorials/automl-with-azureml/image-classification-multiclass/auto-ml-image-classification-multiclass.ipynb)
[auto-ml-image-classification-multilabel.ipynb](tutorials/automl-with-azureml/image-classification-multilabel/auto-ml-image-classification-multilabel.ipynb)
[auto-ml-image-instance-segmentation.ipynb](tutorials/automl-with-azureml/image-instance-segmentation/auto-ml-image-instance-segmentation.ipynb)
[auto-ml-image-object-detection.ipynb](tutorials/automl-with-azureml/image-object-detection/auto-ml-image-object-detection.ipynb)
[auto-ml-classification-credit-card-fraud-local.ipynb](tutorials/automl-with-azureml/local-run-classification-credit-card-fraud/auto-ml-classification-credit-card-fraud-local.ipynb)
[binary-classification-metric-and-confidence-interval.ipynb](tutorials/automl-with-azureml/metrics/binary-classification-metric-and-confidence-interval.ipynb)
[auto-ml-regression-explanation-featurization.ipynb](tutorials/automl-with-azureml/regression-explanation-featurization/auto-ml-regression-explanation-featurization.ipynb)
[auto-ml-regression.ipynb](tutorials/automl-with-azureml/regression/auto-ml-regression.ipynb)|Tutorials showing how to build high quality machine learning models using Azure Automated Machine Learning. +[automl-with-azureml](tutorials/automl-with-azureml)|[![automl-nlp-text-classification-multiclass](https://github.com/Azure/azureml-examples/workflows/automl-nlp-text-classification-multiclass/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-automl-nlp-text-classification-multiclass.yml)
[![automl-nlp-text-classification-multilabel](https://github.com/Azure/azureml-examples/workflows/automl-nlp-text-classification-multilabel/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-automl-nlp-text-classification-multilabel.yml)
[![automl-nlp-ner](https://github.com/Azure/azureml-examples/workflows/automl-nlp-ner/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-automl-nlp-ner.yml)
[![auto-ml-classification-bank-marketing-all-features](https://github.com/Azure/azureml-examples/workflows/auto-ml-classification-bank-marketing-all-features/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-classification-bank-marketing-all-features.yml)
[![auto-ml-classification-credit-card-fraud](https://github.com/Azure/azureml-examples/workflows/auto-ml-classification-credit-card-fraud/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-classification-credit-card-fraud.yml)
[![auto-ml-classification-text-dnn](https://github.com/Azure/azureml-examples/workflows/auto-ml-classification-text-dnn/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-classification-text-dnn.yml)
[![auto-ml-continuous-retraining](https://github.com/Azure/azureml-examples/workflows/auto-ml-continuous-retraining/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-continuous-retraining.yml)
[![auto-ml-forecasting-backtest-many-models](https://github.com/Azure/azureml-examples/workflows/auto-ml-forecasting-backtest-many-models/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-forecasting-backtest-many-models.yml)
[![auto-ml-forecasting-backtest-single-model](https://github.com/Azure/azureml-examples/workflows/auto-ml-forecasting-backtest-single-model/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-forecasting-backtest-single-model.yml)
[![auto-ml-forecasting-bike-share](https://github.com/Azure/azureml-examples/workflows/auto-ml-forecasting-bike-share/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-forecasting-bike-share.yml)
[![auto-ml-forecasting-data-preparation](https://github.com/Azure/azureml-examples/workflows/auto-ml-forecasting-data-preparation/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-forecasting-data-preparation.yml)
[![auto-ml-forecasting-demand-forecasting-many-models](https://github.com/Azure/azureml-examples/workflows/auto-ml-forecasting-demand-forecasting-many-models/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-forecasting-demand-forecasting-many-models.yml)
[![auto-ml-forecasting-demand-tcn](https://github.com/Azure/azureml-examples/workflows/auto-ml-forecasting-demand-tcn/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-forecasting-demand-tcn.yml)
[![auto-ml-forecasting-energy-demand](https://github.com/Azure/azureml-examples/workflows/auto-ml-forecasting-energy-demand/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-forecasting-energy-demand.yml)
[![auto-ml-forecasting-function](https://github.com/Azure/azureml-examples/workflows/auto-ml-forecasting-function/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-forecasting-function.yml)
[![auto-ml-forecasting-github-dau](https://github.com/Azure/azureml-examples/workflows/auto-ml-forecasting-github-dau/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-forecasting-github-dau.yml)
[![auto-ml-forecasting-hierarchical-timeseries](https://github.com/Azure/azureml-examples/workflows/auto-ml-forecasting-hierarchical-timeseries/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-forecasting-hierarchical-timeseries.yml)
[![auto-ml-forecasting-many-models](https://github.com/Azure/azureml-examples/workflows/auto-ml-forecasting-many-models/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-forecasting-many-models.yml)
[![auto-ml-forecasting-orange-juice-sales](https://github.com/Azure/azureml-examples/workflows/auto-ml-forecasting-orange-juice-sales/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-forecasting-orange-juice-sales.yml)
[![auto-ml-forecasting-pipelines](https://github.com/Azure/azureml-examples/workflows/auto-ml-forecasting-pipelines/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-forecasting-pipelines.yml)
[![auto-ml-forecasting-univariate-recipe-experiment-settings](https://github.com/Azure/azureml-examples/workflows/auto-ml-forecasting-univariate-recipe-experiment-settings/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-forecasting-univariate-recipe-experiment-settings.yml)
[![auto-ml-forecasting-univariate-recipe-run-experiment](https://github.com/Azure/azureml-examples/workflows/auto-ml-forecasting-univariate-recipe-run-experiment/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-forecasting-univariate-recipe-run-experiment.yml)
[![auto-ml-image-classification-multiclass-batch-scoring](https://github.com/Azure/azureml-examples/workflows/auto-ml-image-classification-multiclass-batch-scoring/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-image-classification-multiclass-batch-scoring.yml)
[![auto-ml-image-classification-multiclass](https://github.com/Azure/azureml-examples/workflows/auto-ml-image-classification-multiclass/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-image-classification-multiclass.yml)
[![auto-ml-image-classification-multilabel](https://github.com/Azure/azureml-examples/workflows/auto-ml-image-classification-multilabel/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-image-classification-multilabel.yml)
[![auto-ml-image-instance-segmentation](https://github.com/Azure/azureml-examples/workflows/auto-ml-image-instance-segmentation/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-image-instance-segmentation.yml)
[![auto-ml-image-object-detection](https://github.com/Azure/azureml-examples/workflows/auto-ml-image-object-detection/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-image-object-detection.yml)
[![auto-ml-classification-credit-card-fraud-local](https://github.com/Azure/azureml-examples/workflows/auto-ml-classification-credit-card-fraud-local/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-classification-credit-card-fraud-local.yml)
[![binary-classification-metric-and-confidence-interval](https://github.com/Azure/azureml-examples/workflows/binary-classification-metric-and-confidence-interval/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-binary-classification-metric-and-confidence-interval.yml)
[![auto-ml-regression-explanation-featurization](https://github.com/Azure/azureml-examples/workflows/auto-ml-regression-explanation-featurization/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-regression-explanation-featurization.yml)
[![auto-ml-regression](https://github.com/Azure/azureml-examples/workflows/auto-ml-regression/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-auto-ml-regression.yml)|[automl-nlp-text-classification-multiclass.ipynb](tutorials/automl-with-azureml/automl-nlp-multiclass/automl-nlp-text-classification-multiclass.ipynb)
[automl-nlp-text-classification-multilabel.ipynb](tutorials/automl-with-azureml/automl-nlp-multilabel/automl-nlp-text-classification-multilabel.ipynb)
[automl-nlp-ner.ipynb](tutorials/automl-with-azureml/automl-nlp-ner/automl-nlp-ner.ipynb)
[auto-ml-classification-bank-marketing-all-features.ipynb](tutorials/automl-with-azureml/classification-bank-marketing-all-features/auto-ml-classification-bank-marketing-all-features.ipynb)
[auto-ml-classification-credit-card-fraud.ipynb](tutorials/automl-with-azureml/classification-credit-card-fraud/auto-ml-classification-credit-card-fraud.ipynb)
[auto-ml-classification-text-dnn.ipynb](tutorials/automl-with-azureml/classification-text-dnn/auto-ml-classification-text-dnn.ipynb)
[auto-ml-continuous-retraining.ipynb](tutorials/automl-with-azureml/continuous-retraining/auto-ml-continuous-retraining.ipynb)
[auto-ml-forecasting-backtest-many-models.ipynb](tutorials/automl-with-azureml/forecasting-backtest-many-models/auto-ml-forecasting-backtest-many-models.ipynb)
[auto-ml-forecasting-backtest-single-model.ipynb](tutorials/automl-with-azureml/forecasting-backtest-single-model/auto-ml-forecasting-backtest-single-model.ipynb)
[auto-ml-forecasting-bike-share.ipynb](tutorials/automl-with-azureml/forecasting-bike-share/auto-ml-forecasting-bike-share.ipynb)
[auto-ml-forecasting-data-preparation.ipynb](tutorials/automl-with-azureml/forecasting-data-preparation/auto-ml-forecasting-data-preparation.ipynb)
[auto-ml-forecasting-demand-forecasting-many-models.ipynb](tutorials/automl-with-azureml/forecasting-demand-forecasting-many-models/auto-ml-forecasting-demand-forecasting-many-models.ipynb)
[auto-ml-forecasting-demand-tcn.ipynb](tutorials/automl-with-azureml/forecasting-demand-forecasting-tcn/auto-ml-forecasting-demand-tcn.ipynb)
[auto-ml-forecasting-energy-demand.ipynb](tutorials/automl-with-azureml/forecasting-energy-demand/auto-ml-forecasting-energy-demand.ipynb)
[auto-ml-forecasting-function.ipynb](tutorials/automl-with-azureml/forecasting-forecast-function/auto-ml-forecasting-function.ipynb)
[auto-ml-forecasting-github-dau.ipynb](tutorials/automl-with-azureml/forecasting-github-dau/auto-ml-forecasting-github-dau.ipynb)
[auto-ml-forecasting-hierarchical-timeseries.ipynb](tutorials/automl-with-azureml/forecasting-hierarchical-timeseries/auto-ml-forecasting-hierarchical-timeseries.ipynb)
[auto-ml-forecasting-many-models.ipynb](tutorials/automl-with-azureml/forecasting-many-models/auto-ml-forecasting-many-models.ipynb)
[auto-ml-forecasting-orange-juice-sales.ipynb](tutorials/automl-with-azureml/forecasting-orange-juice-sales/auto-ml-forecasting-orange-juice-sales.ipynb)
[auto-ml-forecasting-pipelines.ipynb](tutorials/automl-with-azureml/forecasting-pipelines/auto-ml-forecasting-pipelines.ipynb)
[auto-ml-forecasting-univariate-recipe-experiment-settings.ipynb](tutorials/automl-with-azureml/forecasting-recipes-univariate/auto-ml-forecasting-univariate-recipe-experiment-settings.ipynb)
[auto-ml-forecasting-univariate-recipe-run-experiment.ipynb](tutorials/automl-with-azureml/forecasting-recipes-univariate/auto-ml-forecasting-univariate-recipe-run-experiment.ipynb)
[auto-ml-image-classification-multiclass-batch-scoring.ipynb](tutorials/automl-with-azureml/image-classification-multiclass-batch-scoring/auto-ml-image-classification-multiclass-batch-scoring.ipynb)
[auto-ml-image-classification-multiclass.ipynb](tutorials/automl-with-azureml/image-classification-multiclass/auto-ml-image-classification-multiclass.ipynb)
[auto-ml-image-classification-multilabel.ipynb](tutorials/automl-with-azureml/image-classification-multilabel/auto-ml-image-classification-multilabel.ipynb)
[auto-ml-image-instance-segmentation.ipynb](tutorials/automl-with-azureml/image-instance-segmentation/auto-ml-image-instance-segmentation.ipynb)
[auto-ml-image-object-detection.ipynb](tutorials/automl-with-azureml/image-object-detection/auto-ml-image-object-detection.ipynb)
[auto-ml-classification-credit-card-fraud-local.ipynb](tutorials/automl-with-azureml/local-run-classification-credit-card-fraud/auto-ml-classification-credit-card-fraud-local.ipynb)
[binary-classification-metric-and-confidence-interval.ipynb](tutorials/automl-with-azureml/metrics/binary-classification-metric-and-confidence-interval.ipynb)
[auto-ml-regression-explanation-featurization.ipynb](tutorials/automl-with-azureml/regression-explanation-featurization/auto-ml-regression-explanation-featurization.ipynb)
[auto-ml-regression.ipynb](tutorials/automl-with-azureml/regression/auto-ml-regression.ipynb)|Tutorials showing how to build high quality machine learning models using Azure Automated Machine Learning. [automl-with-databricks](tutorials/automl-with-databricks)|[![automl-databricks-local-01](https://github.com/Azure/azureml-examples/workflows/automl-databricks-local-01/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-automl-databricks-local-01.yml)
[![automl-databricks-local-with-deployment](https://github.com/Azure/azureml-examples/workflows/automl-databricks-local-with-deployment/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-automl-databricks-local-with-deployment.yml)|[automl-databricks-local-01.ipynb](tutorials/automl-with-databricks/automl-databricks-local-01.ipynb)
[automl-databricks-local-with-deployment.ipynb](tutorials/automl-with-databricks/automl-databricks-local-with-deployment.ipynb)|*no description* [dataset-uploads](tutorials/dataset-uploads)|[![dataset-uploads](https://github.com/Azure/azureml-examples/workflows/python-sdk-tutorial-dataset-uploads/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-dataset-uploads.yml)|[upload_dataframe_register_as_dataset.ipynb](tutorials/dataset-uploads/upload_dataframe_register_as_dataset.ipynb)
[upload_directory_create_file_dataset.ipynb](tutorials/dataset-uploads/upload_directory_create_file_dataset.ipynb)|*no description* [deploy-local](tutorials/deploy-local)|[![deploy-local](https://github.com/Azure/azureml-examples/workflows/python-sdk-tutorial-deploy-local/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/python-sdk-tutorial-deploy-local.yml)|[1.deploy-local.ipynb](tutorials/deploy-local/1.deploy-local.ipynb)
[2.deploy-local-cli.ipynb](tutorials/deploy-local/2.deploy-local-cli.ipynb)|*no description* diff --git a/v1/python-sdk/tutorials/automl-with-azureml/forecasting-demand-forecasting-many-models/auto-ml-forecasting-demand-forecasting-many-models.ipynb b/v1/python-sdk/tutorials/automl-with-azureml/forecasting-demand-forecasting-many-models/auto-ml-forecasting-demand-forecasting-many-models.ipynb new file mode 100644 index 0000000000..fe60ab116c --- /dev/null +++ b/v1/python-sdk/tutorials/automl-with-azureml/forecasting-demand-forecasting-many-models/auto-ml-forecasting-demand-forecasting-many-models.ipynb @@ -0,0 +1,1599 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Automated Machine Learning\n", + "**Demand Forecasting Using Many Models**\n", + "\n", + "## Contents\n", + "1. [Introduction](#Introduction)\n", + "1. [Setup](#Setup)\n", + "1. [Data](#Data)\n", + "1. [Train Many Models](#TrainManyModels)\n", + "1. [Train Baseline](#TrainBaseline)\n", + "1. [Test Set Inference](#TestSetInference)\n", + "1. [Test Set Evaluation](#TestSetEvaluation)\n", + "1. [Generate Forecast](#GenerateForecast)\n", + "1. [Schedule Inference Pipelines](#ScheduleInference)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "## 1. Introduction\n", + "\n", + "The objective of this notebook is to illustrate how to use the AutoML many models solution accelertor for demand forecasting tasks. It walks you through all stages of model evaluation and production process starting with data ingestion and concluding with scheduling inference runs.\n", + "\n", + "We use a subset of UCI electricity data ([link](https://archive.ics.uci.edu/ml/datasets/ElectricityLoadDiagrams20112014#)) with the objective of predicting electricity demand per consumer 24 hours ahead. The data was preprocessed using the [data prep notebook](https://github.com/Azure/azureml-examples/blob/main/v1/python-sdk/tutorials/automl-with-azureml/forecasting-data-preparation/auto-ml-forecasting-data-preparation.ipynb). Please refer to it for illustration on how to download the data from the source, aggregate to an hourly frequency, convert from wide to long format and upload to the Datastore. Here, we will work with the already uploaded data. \n", + "\n", + "Having a problem description such as to generate accurate forecasts 24 hours ahead sounds like a relatively straight forward task. However, there are quite a few steps a user needs to take before the model is put in production. A user needs to prepare the data, partition it into appropriate sets, select the best model, evaluate it against a baseline, and monitor the model in real life to collect enough observations on how it would perform had it been put in production. Some of these steps are time consuming, some require certain expertise in writing code. The steps shown in this notebook follow a typical thought process one follows before the model is put in production.\n", + "\n", + "Make sure you have executed the [configuration](https://github.com/Azure/MachineLearningNotebooks/blob/master/configuration.ipynb) before running this notebook." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " from azureml.pipeline.core import StepRun\n", + "except ModuleNotFoundError:\n", + " print(\n", + " \"Please install azureml-pipeline-core with the command: \\n! pip install azureml-pipeline-core\"\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "import logging\n", + "import os\n", + "\n", + "from matplotlib import pyplot as plt\n", + "import pandas as pd\n", + "\n", + "import azureml.core\n", + "from azureml.core.experiment import Experiment\n", + "from azureml.core.workspace import Workspace\n", + "from azureml.train.automl import AutoMLConfig" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This sample notebook may use features that are not available in previous versions of the Azure ML SDK." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"This notebook was created using version 1.47.0 of the Azure ML SDK\")\n", + "print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Accessing the Azure ML workspace requires authentication with Azure.\n", + "\n", + "The default authentication is interactive authentication using the default tenant. Executing the ws = Workspace.from_config() line in the cell below will prompt for authentication the first time that it is run.\n", + "\n", + "If you have multiple Azure tenants, you can specify the tenant by replacing the ws = Workspace.from_config() line in the cell below with the following:\n", + "```\n", + "from azureml.core.authentication import InteractiveLoginAuthentication\n", + "auth = InteractiveLoginAuthentication(tenant_id = 'mytenantid')\n", + "ws = Workspace.from_config(auth = auth)\n", + "```\n", + "If you need to run in an environment where interactive login is not possible, you can use Service Principal authentication by replacing the ws = Workspace.from_config() line in the cell below with the following:\n", + "```\n", + "from azureml.core.authentication import ServicePrincipalAuthentication\n", + "auth = ServicePrincipalAuthentication('mytenantid', 'myappid', 'mypassword')\n", + "ws = Workspace.from_config(auth = auth)\n", + "```\n", + "For more details, see [aka.ms/aml-notebook-auth](aka.ms/aml-notebook-auth)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import datetime\n", + "import uuid\n", + "\n", + "ws = Workspace.from_config()\n", + "datastore = ws.get_default_datastore()\n", + "\n", + "# Choose a name for the run history container in the workspace.\n", + "experiment_name = \"forecasting-many-models-\" + datetime.datetime.now().strftime(\n", + " \"%Y%m%d\"\n", + ")\n", + "experiment = Experiment(ws, experiment_name)\n", + "experiment = Experiment(ws, experiment_name)\n", + "\n", + "output = {}\n", + "output[\"Subscription ID\"] = ws.subscription_id\n", + "output[\"Workspace\"] = ws.name\n", + "output[\"Resource Group\"] = ws.resource_group\n", + "output[\"Location\"] = ws.location\n", + "output[\"Run History Name\"] = experiment_name\n", + "pd.set_option(\"display.max_colwidth\", None)\n", + "outputDf = pd.DataFrame(data=output, index=[\"\"])\n", + "outputDf.T" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2.1. Compute \n", + "\n", + "#### Create or Attach existing AmlCompute\n", + "\n", + "You will need to create a compute target for your AutoML run. In this tutorial, you will create AmlCompute as your training compute resource.\n", + "\n", + "> Note that if you have an AzureML Data Scientist role, you will not have permission to create compute resources. Talk to your workspace or IT admin to create the compute targets described in this section, if they do not already exist.\n", + "\n", + "\n", + "To run deep learning models we recommend to use GPU compute. Here, we use a 5 node cluster of the `STANDARD_DS12_V2` series for illustration purposes. You will need to adjust the compute type and the number of nodes based on your needs which can be driven by the speed needed for model seelction, data size, etc. \n", + "\n", + "#### Creation of AmlCompute takes approximately 5 minutes. \n", + "If the AmlCompute with that name is already in your workspace, this code will skip the creation process.\n", + "As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from azureml.core.compute import ComputeTarget, AmlCompute\n", + "from azureml.core.compute_target import ComputeTargetException\n", + "\n", + "# Choose a name for your CPU cluster\n", + "amlcompute_cluster_name = \"demand-fcst-mm-cluster\"\n", + "\n", + "# Verify that cluster does not exist already\n", + "try:\n", + " compute_target = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)\n", + " print(\"Found existing cluster, use it.\")\n", + "except ComputeTargetException:\n", + " compute_config = AmlCompute.provisioning_configuration(\n", + " vm_size=\"STANDARD_DS12_V2\", max_nodes=5\n", + " )\n", + " compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)\n", + "compute_target.wait_for_completion(show_output=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Besides the compute cluster, we also need to define the python environment, which will be used for the experiment. We can either use the curated environment, provided by AutoML (recommended), or we can create a new environment from scratch. To create a custom environment, please set the `USE_CURATED_ENV` parameter to `False`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.runconfig import RunConfiguration\n", + "from azureml.core.conda_dependencies import CondaDependencies\n", + "from azureml.core import Environment\n", + "\n", + "aml_run_config = RunConfiguration()\n", + "aml_run_config.target = compute_target\n", + "\n", + "USE_CURATED_ENV = True\n", + "if USE_CURATED_ENV:\n", + " curated_environment = Environment.get(\n", + " workspace=ws, name=\"AzureML-sklearn-0.24-ubuntu18.04-py37-cpu\"\n", + " )\n", + " aml_run_config.environment = curated_environment\n", + "else:\n", + " aml_run_config.environment.python.user_managed_dependencies = False\n", + "\n", + " # Add some packages relied on by data prep step\n", + " aml_run_config.environment.python.conda_dependencies = CondaDependencies.create(\n", + " conda_packages=[\"pandas\", \"scikit-learn\"],\n", + " pip_packages=[\"azureml-sdk\", \"azureml-dataset-runtime[fuse,pandas]\"],\n", + " pin_sdk_version=False,\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "## 3. Data\n", + "If you ran the data [preparation notebook](https://github.com/Azure/azureml-examples/blob/main/v1/python-sdk/tutorials/automl-with-azureml/forecasting-data-preparation/auto-ml-forecasting-data-preparation.ipynb) and want to use the registered data, skip section 3.1 and, instead, uncomment and execute the code in section 3.2. If, on the other hand, you did not run the notebook and want to use the data that we pre-processed and saved in the public blob, execute the code in section 3.1." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3.1 Loading and registering the data from public blob store\n", + "\n", + "Run the code in this section only if you want to use the data that is already available in the blobstore. If you want to use your own data that is already registered in your workspace, skip this section and procceed to run the commented out code in section 3.2." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following code registers a datastore `autom_fcst_many_models` in your workspace and links the data from the container `automl-sample-notebook-data`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Datastore\n", + "\n", + "# Please change the following to point to your own blob container and pass in account_key\n", + "blob_datastore_name = \"autom_fcst_many_models\"\n", + "container_name = \"automl-sample-notebook-data\"\n", + "account_name = \"automlsamplenotebookdata\"\n", + "\n", + "print(f'Creating datastore \"{blob_datastore_name}\" in your workspace ...\\n---')\n", + "demand_mm_datastore = Datastore.register_azure_blob_container(\n", + " workspace=ws,\n", + " datastore_name=blob_datastore_name,\n", + " container_name=container_name,\n", + " account_name=account_name,\n", + " create_if_not_exists=True,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following code registers datasets from the `automl-sample-notebook-data` container in the datastore we just created. Once the datasets are registered, we will be able to use them in our experiments." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Dataset\n", + "\n", + "print(\"Registering datasets in your workspace ...\\n---\")\n", + "\n", + "FOLDER_PREFIX_NAME = \"uci_electro_small_public_mm\"\n", + "\n", + "target_path_train = f\"{FOLDER_PREFIX_NAME}_train\"\n", + "target_path_test = f\"{FOLDER_PREFIX_NAME}_test\"\n", + "target_path_inference = f\"{FOLDER_PREFIX_NAME}_infer\"\n", + "\n", + "train_dataset = Dataset.Tabular.from_delimited_files(\n", + " path=demand_mm_datastore.path(target_path_train + \"/\"),\n", + " validate=False,\n", + " infer_column_types=True,\n", + ").register(workspace=ws, name=target_path_train, create_new_version=True)\n", + "\n", + "test_dataset = Dataset.Tabular.from_delimited_files(\n", + " path=demand_mm_datastore.path(target_path_test + \"/\"),\n", + " validate=False,\n", + " infer_column_types=True,\n", + ").register(workspace=ws, name=target_path_test, create_new_version=True)\n", + "\n", + "inference_dataset = Dataset.Tabular.from_delimited_files(\n", + " path=demand_mm_datastore.path(target_path_inference + \"/\"),\n", + " validate=False,\n", + " infer_column_types=True,\n", + ").register(workspace=ws, name=target_path_inference, create_new_version=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3.2 Using data that is registered in your workspace\n", + "\n", + "If you ran the [data prep notebook](https://github.com/Azure/azureml-examples/blob/main/v1/python-sdk/tutorials/automl-with-azureml/forecasting-data-preparation/auto-ml-forecasting-data-preparation.ipynb) notebook, the train, test and inference sets are already uploaded and registered in your workspace. Uncomment the following code and change the `DATASET_PREFIX_NAME`, to match the value in the data preparation notebook, and run the code." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# from azureml.core.dataset import Dataset\n", + "\n", + "# DATASET_PREFIX_NAME = \"uci_electro_small_mm\"\n", + "# print(f'Dataset prefix name: {DATASET_PREFIX_NAME}\\n---\\nLoading train, validation, test and inference sets ...\\n---')\n", + "\n", + "# target_path_train = f\"{DATASET_PREFIX_NAME}_train\"\n", + "# target_path_test = f\"{DATASET_PREFIX_NAME}_test\"\n", + "# target_path_inference = f\"{DATASET_PREFIX_NAME}_inference\"\n", + "\n", + "# train_dataset = Dataset.get_by_name(ws, name=target_path_train)\n", + "# test_dataset = Dataset.get_by_name(ws, name=target_path_test)\n", + "# inference_dataset = Dataset.get_by_name(ws, name=target_path_inference)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3.3 Test and inference sets" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that we have *test* and *inference* sets. The difference between the two is the presence of the target column. The test set contains the target column and is used to evaluate model performance using [rolling forecast](https://learn.microsoft.com/en-us/azure/machine-learning/v1/how-to-auto-train-forecast-v1#evaluating-model-accuracy-with-a-rolling-forecast). On the other hand, the target column is not present in the inference set to illustrate how to generate an actual forecast." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The first few rows of the test set ...\\n---\")\n", + "print(test_dataset.take(5).to_pandas_dataframe())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"The first few rows of the inference set ...\\n---\")\n", + "print(inference_dataset.take(5).to_pandas_dataframe())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's set up what we know about the dataset.\n", + "\n", + "- **Target column** is what we want to forecast. In our case it is electricity consumption per customer measured in kilowatt hours (kWh).\n", + "- **Time column** is the time axis along which to predict.\n", + "- **Time series identifier columns** are identified by values of the columns listed `time_series_id_column_names`. In our case all unique time series are identified by a single column `customer_id`. However, it is quite common to have multiple columns identifying unique time series. See the [link](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-auto-train-forecast#configuration-settings) for a more detailed explanation on this topic." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "target_column_name = \"usage\"\n", + "time_column_name = \"datetime\"\n", + "GRAIN_COL = \"customer_id\"\n", + "time_series_id_column_names = [GRAIN_COL]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, we download training data from the Datastore to make sure it looks as expected. If your dataset is large, there is no need to store it in the memory. In this case, skip the next block of code." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "train_df = train_dataset.to_pandas_dataframe()\n", + "\n", + "nseries = train_df.groupby(time_series_id_column_names).ngroups\n", + "print(\n", + " f\"Data contains {nseries} individual time-series:\\n{list(train_df[GRAIN_COL].unique())}\\n---\"\n", + ")\n", + "print(\"Printing the first few rows of the training data ...\\n---\")\n", + "train_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Train Many Models\n", + "\n", + "In this section we will train many models as well as the baseline model. The baseline model will be used as a reference point to understand many models' accuracy performance. The goal of forecasting is to have the most accurate predictions measured by some accuracy metric. What is considered an accurate prediction is fairly subjective. Take, for example, the MAPE (mean absolute percentage error) metric. A perfect forecast will result in the MAPE value of zero, which is not achievable using business data. For this reason it is imperative to have a baseline model to compare AutoML results against. Doing this adds objectivity to the model acceptance criteria. \n", + "\n", + "The baseline model can be the model that is currently in production. Oftentimes, the baseline is set to be a Naive forecast, which we will use in this notebook. The choice of the baseline is also specific to the data. For example, if there is a clear trend in the data one may not want to use a Naive model. Instead, one can use an ARIMA model. Please see this [document](https://learn.microsoft.com/en-us/azure/machine-learning/v1/how-to-configure-auto-train-v1#supported-models) for a list of AutoML models one can chose from to use as a baseline model." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following 2 parameters allow us to re-use training runs for the many-models and baseline models, respectively. This can be helpful it you need to experiment with the post model training steps thus avoiding the need to kick off a new training run which can be computationally expensive." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "IS_MANY_MODELS_TRAINED = False\n", + "IS_BASE_MODEL_TRAINED = False" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4.1 Train AutoML model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 4.1.1 Set up training parameters\n", + "We need to provide the `ForecastingParameters`, `AutoMLConfig` and `ManyModelsTrainParameters` objects. For the forecasting task we also need to define several settings including the name of the time column, the maximum forecast horizon, and the partition column name(s) definition.\n", + "\n", + "#### Forecasting Parameters\n", + "To define forecasting parameters for your experiment training, you can leverage the `ForecastingParameters` class. The table below details the forecasting parameters we will be passing into our experiment.\n", + "\n", + "\n", + "|Property|Description|\n", + "|-|-|\n", + "|**time_column_name**|The name of the time column in the data.|\n", + "|**forecast_horizon**|The forecast horizon is how many periods forward you would like to forecast. This integer horizon is in units of the timeseries frequency (e.g. daily, weekly).|\n", + "|**time_series_id_column_names**|The column names used to uniquely identify the time series in data that has multiple rows with the same timestamp. If the time series identifiers are not defined, the data set is assumed to be one time series.|\n", + "| **cv_step_size**| Number of periods between two consecutive cross-validation folds. The default value is \"auto\", in which case AutoMl determines the cross-validation step size automatically, if a validation set is not provided. Or users could specify an integer value. |\n", + "|**freq**|Forecast frequency. This optional parameter represents the period for which the forecast is desired, for example, daily, weekly, yearly, etc. Use this parameter for the correction of time series containing irregular data points or for padding of short time series. The frequency needs to be a pandas offset alias. Please refer to [pandas documentation](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects) for more information.\n", + "\n", + "\n", + "#### AutoMLConfig arguments\n", + "|Property|Description|\n", + "|-|-|\n", + "| **task** | forecasting |\n", + "| **primary_metric** | This is the metric that you want to optimize. Forecasting supports the following primary metrics
  • `normalized_root_mean_squared_error`
  • `normalized_mean_absolute_error`
  • `spearman_correlation`
  • `r2_score`
We recommend using either the normalized root mean squared error or normalized mean absolute erorr as a primary metric because they measure forecast accuracy. See the [link](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-automl-forecasting-faq#how-do-i-choose-the-primary-metric) for a more detailed discussion on this topic. |\n", + "| **experiment_timeout_hours** | Maximum amount of time in hours that each experiment can take before it terminates. This is optional but provides customers with greater control on exit criteria. When setting this criteria we advise to take into account the number of desired iterations parameter and set experiment timeout setting such that the desired number of iterations will be completed.|\n", + "| **iterations** | Number of models to train. This is optional but provides customers with greater control on exit criteria. |\n", + "| **label_column_name** | The name of the target column we are trying to predict. |\n", + "| **n_cross_validations** | Number of cross validation splits. The default value is \"auto\", in which case AutoMl determines the number of cross-validations automatically. Or users could specify an integer value. Rolling Origin Validation is used to split time-series in a temporally consistent way.|\n", + "| **enable_early_stopping** | Flag to enable early termination if the primary metric is no longer improving. |\n", + "| **blocked_models** | List of models we want to block. For illustration purposes and to reduce the runtime, we block all time series specific models. The defaule value is None or an empty list.|\n", + "\n", + "#### ManyModelsTrainParameters arguments\n", + "|Property|Description|\n", + "|-|-|\n", + "| **automl_settings** | The `AutoMLConfig` object defined above. |\n", + "| **partition_column_names** | The names of columns used to group your models. For timeseries, the groups must not split up individual time-series. That is, each group must contain one or more whole time-series. |" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note, that the `time_series_id_column_names` and `partition_column_names` do not have to be the same. In our scenario, they are the same since we are interested in training one model per customer. We have 10 customers in our dataset and there will be 10 models trained. Say, you decide to cluster customers into groups, for example, each group has 2 customers and you want to train one model per group of customers. In such scenario, you will partition the data by groups, and the `time_series_id_column_names` will be different from the `partition_column_names`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.automl.core.forecasting_parameters import ForecastingParameters\n", + "from azureml.train.automl.automlconfig import AutoMLConfig\n", + "from azureml.train.automl.runtime._many_models.many_models_parameters import (\n", + " ManyModelsTrainParameters,\n", + ")\n", + "\n", + "forecast_horizon = 24\n", + "partition_column_names = time_series_id_column_names\n", + "\n", + "BLOCKED_MODELS = [\n", + " \"Naive\",\n", + " \"SeasonalNaive\",\n", + " \"Average\",\n", + " \"SeasonalAverage\",\n", + " \"Prophet\",\n", + " \"ExponentialSmoothing\",\n", + " \"ExtremeRandomTrees\",\n", + " \"AutoArima\",\n", + " \"Arimax\",\n", + "]\n", + "EXPERIMENT_TIMEOUT_HOURS = 1\n", + "\n", + "forecasting_parameters = ForecastingParameters(\n", + " time_column_name=time_column_name,\n", + " forecast_horizon=forecast_horizon,\n", + " time_series_id_column_names=time_series_id_column_names,\n", + " cv_step_size=\"auto\",\n", + " freq=\"H\",\n", + ")\n", + "\n", + "automl_settings = AutoMLConfig(\n", + " task=\"forecasting\",\n", + " primary_metric=\"normalized_root_mean_squared_error\",\n", + " iteration_timeout_minutes=20,\n", + " iterations=25,\n", + " experiment_timeout_hours=EXPERIMENT_TIMEOUT_HOURS,\n", + " label_column_name=target_column_name,\n", + " n_cross_validations=\"auto\", # Feel free to set to a small integer (>=2) if runtime is an issue.\n", + " blocked_models=BLOCKED_MODELS,\n", + " track_child_runs=False,\n", + " forecasting_parameters=forecasting_parameters,\n", + ")\n", + "\n", + "mm_paramters = ManyModelsTrainParameters(\n", + " automl_settings=automl_settings, partition_column_names=partition_column_names\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Set up many models pipeline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Parallel run step is leveraged to train multiple models at once. To configure the ParallelRunConfig you will need to determine the appropriate number of workers and nodes for your use case. The `process_count_per_node` is based off the number of cores of the compute VM. The node_count will determine the number of master nodes to use, increasing the node count will speed up the training process.\n", + "\n", + "| Property | Description|\n", + "|-|-|\n", + "| **experiment** | The experiment used for training. |\n", + "| **train_data** | The file dataset to be used as input to the training run. |\n", + "| **node_count** | The number of compute nodes to be used for running the user script. We recommend to start with 3 and increase the node_count if the training time is taking too long. |\n", + "| **process_count_per_node** | Process count per node, we recommend 2:1 ratio for number of cores: number of processes per node. eg. If node has 16 cores then configure 8 or less process count per node for optimal performance. |\n", + "| **train_pipeline_parameters** | The set of configuration parameters defined in the previous section. |\n", + "| **run_invocation_timeout** | Maximum amount of time in seconds that the `ParallelRunStep` class is allowed. This is optional but provides customers with greater control on exit criteria. This must be greater than `experiment_timeout_hours` by at least 300 seconds. Here, we we add a buffer of 1000 seconds. |\n", + "| **arguments** | Arguments to be passed to training script. Here, we pass the parameter `retrain_failed_models` and set it to True. If training a model for any partition fails, AutoML will kick off a new child run for that partition.|\n", + "\n", + "**Note**: Total time it takes for the **training step** in the pipeline to complete equals to \n", + "\n", + "$$\n", + "\\left( \\frac{t}{ p \\times n } \\right) \\times k\n", + "$$\n", + "\n", + "where\n", + "- $ t $ is time it takes to train one partition (can be viewed in the training logs)\n", + "- $ p $ is the process count per node\n", + "- $ n $ is the node count\n", + "- $ k $ is total number of partitions in time series based on `partition_column_names`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.contrib.automl.pipeline.steps import AutoMLPipelineBuilder\n", + "\n", + "\n", + "training_pipeline_steps = AutoMLPipelineBuilder.get_many_models_train_steps(\n", + " experiment=experiment,\n", + " train_data=train_dataset,\n", + " compute_target=compute_target,\n", + " node_count=5,\n", + " process_count_per_node=2,\n", + " run_invocation_timeout=(EXPERIMENT_TIMEOUT_HOURS * 3600 + 1000),\n", + " train_pipeline_parameters=mm_paramters,\n", + " arguments=[\"--retrain_failed_models\", \"True\"],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note, the output of the previous cell prints out the name of the partitioned dataset. This allows you to run a new experiment on the already partiitoned dataset. What this does is it skips a data partitioning step, and reduces the runtime. To use already partioned dataset, uncomment and execute the following code _**before**_ building the `training_pipeline_steps`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# from azureml.core.dataset import Dataset\n", + "\n", + "# PARTITIONED_TRAIN_DATASET_NAME = \"\"\n", + "# train_dataset = Dataset.get_by_name(ws, name=PARTITIONED_TRAIN_DATASET_NAME)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Submit the pipeline to run\n", + "Next we submit our pipeline to run. The whole training pipeline takes about 20 minutes on a 5 node STANDARD_DS15_V2 cluster." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.pipeline.core import Pipeline\n", + "\n", + "training_pipeline = Pipeline(ws, steps=training_pipeline_steps)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(f\"Are many models trained? {IS_MANY_MODELS_TRAINED}\\n---\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if not IS_MANY_MODELS_TRAINED:\n", + " print(\"Training new AutoML model ...\\n---\")\n", + " training_run = experiment.submit(training_pipeline)\n", + " training_run.wait_for_completion(show_output=False)\n", + " IS_MODEL_TRAINED = True\n", + "else:\n", + " from azureml.train.automl.run import AutoMLRun\n", + " from azureml.pipeline.core.run import PipelineRun\n", + "\n", + " PIPELINE_RUN_ID = \"\" # Copy the output of Submitted PipelineRun to re-use trained models\n", + " training_run = PipelineRun(experiment=experiment, run_id=PIPELINE_RUN_ID)\n", + " print(f\"Using previously trained model. Pipeline run ID: {PIPELINE_RUN_ID}\\n---\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Train the baseline model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We will use Naive model as our baseline. To train it, we kick off another automl experiment with the following settings. Please note that we added the Naive model to the allowed models list. To reduce the training time, we set the number of cross validations to 2. Read the following [document](https://learn.microsoft.com/en-us/azure/machine-learning/v1/how-to-auto-train-forecast-v1#training-and-validation-data) for more information on this topic.\n", + "\n", + "The only `AutoMLConfig` settings you might consider changing are the `experiment_timeout_hours` and `allowed_models`. You might want to increase the experiment timeout if your data has lots of unique time series. The allowed model list can be modified to refect a different choice of the baseline model and can be selected from the supported [forecasting models](https://learn.microsoft.com/en-us/python/api/azureml-train-automl-client/azureml.train.automl.constants.supportedmodels.forecasting) and [regression models](https://learn.microsoft.com/en-us/python/api/azureml-train-automl-client/azureml.train.automl.constants.supportedmodels.regression)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.automl.core.forecasting_parameters import ForecastingParameters\n", + "\n", + "forecasting_parameters = ForecastingParameters(\n", + " time_column_name=time_column_name,\n", + " forecast_horizon=forecast_horizon,\n", + " time_series_id_column_names=time_series_id_column_names,\n", + " cv_step_size=1,\n", + " freq=\"H\",\n", + ")\n", + "\n", + "automl_config = AutoMLConfig(\n", + " task=\"forecasting\",\n", + " debug_log=\"baseline.log\",\n", + " primary_metric=\"normalized_root_mean_squared_error\",\n", + " experiment_timeout_hours=1,\n", + " training_data=train_dataset,\n", + " label_column_name=target_column_name,\n", + " compute_target=compute_target,\n", + " enable_early_stopping=True,\n", + " n_cross_validations=2,\n", + " verbosity=logging.INFO,\n", + " max_cores_per_iteration=-1,\n", + " enable_dnn=False,\n", + " allowed_models=[\"Naive\"],\n", + " forecasting_parameters=forecasting_parameters,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(f\"Is the baseline model trained? {IS_BASE_MODEL_TRAINED}\\n---\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "if not IS_BASE_MODEL_TRAINED:\n", + " remote_base_run = experiment.submit(automl_config, show_output=False)\n", + " remote_base_run.wait_for_completion(show_output=False)\n", + " IS_BASE_MODEL_TRAINED = True\n", + "else:\n", + " from azureml.train.automl.run import AutoMLRun\n", + "\n", + " BASE_RUN_ID = \"\"\n", + " # during the initial training run copy-paste the run id to be utilized later if needed.\n", + " remote_base_run = AutoMLRun(experiment=experiment, run_id=BASE_RUN_ID)\n", + " print(f\"Using previously trained model. Run ID: {BASE_RUN_ID}\\n---\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 6. Test set inference " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 6.1 Many models inferences\n", + "\n", + "We create an output folder which will be used to save the output of our experiments." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# create an output folder\n", + "OUTPUT_DIR = \"forecast_output\"\n", + "os.makedirs(OUTPUT_DIR, exist_ok=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "test_experiment = Experiment(ws, experiment_name + \"_inference\")\n", + "test_experiment_base = Experiment(ws, experiment_name + \"_inference_base\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 6.1.1 Set up output dataset for inference data\n", + "Output of inference can be represented as [OutputFileDatasetConfig](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.data.output_dataset_config.outputdatasetconfig?view=azure-ml-py) object and OutputFileDatasetConfig can be registered as a dataset. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true, + "tags": [] + }, + "outputs": [], + "source": [ + "from azureml.data import OutputFileDatasetConfig\n", + "\n", + "output_test_data_ds = OutputFileDatasetConfig(\n", + " name=\"many_models_inference_output\",\n", + " destination=(datastore, \"uci_electro_small/test_set_output/\"),\n", + ").register_on_complete(name=\"uci_electro_small_test_data_ds\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For many models we need to provide the ManyModelsInferenceParameters object.\n", + "\n", + "#### `ManyModelsInferenceParameters` arguments\n", + "| Property | Description|\n", + "| :--------------- | :------------------- |\n", + "| **partition_column_names** | List of column names that identifies groups. |\n", + "| **target_column_name** | \\[Optional\\] Column name only if the inference dataset has the target. |\n", + "| **time_column_name** | \\[Optional\\] Time column name only if it is timeseries. |\n", + "| **inference_type** | \\[Optional\\] Which inference method to use on the model. For the forecasting tasks set this value to 'forecast'. |\n", + "| **forecast_mode** | \\[Optional\\] The type of forecast to be used, either 'rolling' or 'recursive'; defaults to 'recursive'. |\n", + "| **step** | \\[Optional\\] Number of periods to advance the forecasting window in each iteration **(for rolling forecast only)**; defaults to 1. |\n", + "\n", + "#### `get_many_models_batch_inference_steps` arguments\n", + "| Property | Description|\n", + "| :--------------- | :------------------- |\n", + "| **experiment** | The experiment used for inference run. |\n", + "| **inference_data** | The data to use for inferencing. It should be the same schema as used for training.\n", + "| **compute_target** | The compute target that runs the inference pipeline. |\n", + "| **node_count** | The number of compute nodes to be used for running the user script. We recommend to start with the number of cores per node (varies by compute sku). |\n", + "| **process_count_per_node** | \\[Optional\\] The number of processes per node. By default it's 2 (should be at most half of the number of cores in a single node of the compute cluster that will be used for the experiment).\n", + "| **inference_pipeline_parameters** | \\[Optional\\] The `ManyModelsInferenceParameters` object defined above. |\n", + "| **append_row_file_name** | \\[Optional\\] The name of the output file (optional, default value is 'parallel_run_step.txt'). Supports 'txt' and 'csv' file extension. A 'txt' file extension generates the output in 'txt' format with space as separator without column names. A 'csv' file extension generates the output in 'csv' format with comma as separator and with column names. |\n", + "| **train_run_id** | \\[Optional\\] The run id of the **training pipeline**. By default it is the latest successful training pipeline run in the experiment. |\n", + "| **train_experiment_name** | \\[Optional\\] The train experiment that contains the train pipeline. This one is only needed when the train pipeline is not in the same experiement as the inference pipeline. |\n", + "| **run_invocation_timeout** | \\[Optional\\] Maximum amount of time in seconds that the `ParallelRunStep` class is allowed. This is optional but provides customers with greater control on exit criteria. |\n", + "| **output_datastore** | \\[Optional\\] The `Datastore` or `OutputDatasetConfig` to be used for output. If specified any pipeline output will be written to that location. If unspecified the default datastore will be used. |\n", + "| **arguments** | \\[Optional\\] Arguments to be passed to inference script. Possible argument is '--forecast_quantiles' followed by quantile values. |" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.contrib.automl.pipeline.steps import AutoMLPipelineBuilder\n", + "from azureml.train.automl.runtime._many_models.many_models_parameters import (\n", + " ManyModelsInferenceParameters,\n", + ")\n", + "\n", + "output_file_name = \"parallel_run_step.csv\"\n", + "\n", + "mm_parameters = ManyModelsInferenceParameters(\n", + " partition_column_names=time_series_id_column_names,\n", + " time_column_name=time_column_name,\n", + " target_column_name=target_column_name,\n", + ")\n", + "\n", + "inference_steps = AutoMLPipelineBuilder.get_many_models_batch_inference_steps(\n", + " experiment=experiment,\n", + " inference_data=test_dataset,\n", + " node_count=2,\n", + " process_count_per_node=8,\n", + " compute_target=compute_target,\n", + " run_invocation_timeout=300,\n", + " output_datastore=output_test_data_ds,\n", + " train_run_id=training_run.id,\n", + " train_experiment_name=training_run.experiment.name,\n", + " inference_pipeline_parameters=mm_parameters,\n", + " append_row_file_name=output_file_name,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.pipeline.core import Pipeline\n", + "\n", + "inference_pipeline = Pipeline(ws, steps=inference_steps)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 6.1.2 Run the pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true, + "tags": [] + }, + "outputs": [], + "source": [ + "inference_run = experiment.submit(inference_pipeline)\n", + "inference_run.wait_for_completion(show_output=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 6.2 Retreive the test set predictions from the many models\n", + "\n", + "The forecasting pipeline forecasts the orange juice quantity for a Store by Brand. The pipeline returns one file with the predictions for each store and outputs the result to the forecasting_output Blob container. The details of the blob container is listed in 'forecasting_output.txt' under Outputs+logs. \n", + "\n", + "The following code snippet:\n", + "1. Downloads the contents of the output folder that is passed in the parallel run step \n", + "2. Reads the output file that has the predictions as pandas dataframe \n", + "3. Displays the top 5 rows of the predictions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# from azureml.pipeline.core import StepRun\n", + "# ! pip show azureml-pipeline-core" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.contrib.automl.pipeline.steps.utilities import get_output_from_mm_pipeline\n", + "\n", + "forecasting_results_name = \"forecasting_results\"\n", + "forecasting_output_name = \"many_models_inference_output\"\n", + "forecast_file = get_output_from_mm_pipeline(\n", + " inference_run, forecasting_results_name, forecasting_output_name, output_file_name\n", + ")\n", + "df = pd.read_csv(forecast_file)\n", + "print(\n", + " \"Prediction has \", df.shape[0], \" rows. Here the first 5 rows are being displayed.\"\n", + ")\n", + "df.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.rename(columns={\"Predictions\": \"predicted\"}, inplace=True)\n", + "df.to_csv(os.path.join(OUTPUT_DIR, \"test-set-predictions-many-models.csv\"), index=False)\n", + "df.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 6.3 Inference the baseline model\n", + "\n", + "Next, we perform a rolling evaluation on the test set for the baseline model. To do this, we use the `run_remote_inference` method which downloads the pickle file of the model into the temporary folder `forecast_naive` and copies the `inference_script_naive.py` file to it. This folder is then uploaded on the compute cluster where inference is performed. The `inference_script_naive.py` script performs a rolling evaluation on the test set, similarly to what we have done in section 6.2. Upon completion of this step, we delete the newly created `forecast_naive` folder." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "baseline_run = remote_base_run.get_best_child()\n", + "baseline_model_name = baseline_run.properties[\"model_name\"]\n", + "baseline_run" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import shutil\n", + "from scripts.helper_scripts import run_remote_inference_naive\n", + "\n", + "remote_base_run_test = run_remote_inference_naive(\n", + " test_experiment=test_experiment_base,\n", + " compute_target=compute_target,\n", + " train_run=baseline_run,\n", + " test_dataset=test_dataset,\n", + " target_column_name=target_column_name,\n", + " rolling_evaluation_step_size=forecast_horizon,\n", + " inference_folder=\"./forecast_naive\",\n", + ")\n", + "remote_base_run_test.wait_for_completion(show_output=False)\n", + "\n", + "# download the forecast file to the local machine\n", + "print(\"Downloading test data with prediction ...\\n---\")\n", + "remote_base_run_test.download_file(\n", + " \"outputs/predictions.csv\",\n", + " os.path.join(OUTPUT_DIR, \"test-set-predictions-base.csv\"),\n", + ")\n", + "\n", + "# delete downloaded scripts\n", + "print(\"Removing auxiliary files ...\\n---\")\n", + "shutil.rmtree(\"./forecast_naive\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 7. Test set model evaluation\n", + "\n", + "In this section we will evaluate the test set performance for many models and compare it with the baseline. We will generate time series plots for forecasts and actuals, calculate accuracy metrics and plot the evolution of metrics for each model over time. All output from this section will be stored in the `forecast_output` folder and can be referenced any time you need it." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 7.1 Load test set results\n", + "\n", + "Here, we will import test set results for both many-models and baseline experiments." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "backtest_automl = pd.read_csv(\n", + " os.path.join(OUTPUT_DIR, \"test-set-predictions-many-models.csv\"),\n", + " parse_dates=[time_column_name],\n", + ")\n", + "backtest_base = pd.read_csv(\n", + " os.path.join(OUTPUT_DIR, \"test-set-predictions-base.csv\"),\n", + " parse_dates=[time_column_name],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, we combine outputs into a single dataframe which will be used for plotting and scoring." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "backtest = backtest_automl.merge(\n", + " backtest_base.drop(target_column_name, axis=1),\n", + " on=[\"customer_id\", \"datetime\"],\n", + " how=\"inner\",\n", + " suffixes=[\"\", \"_base\"],\n", + ")\n", + "backtest.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\n", + " f\"N model: {backtest_automl.shape[0]}. N baseline: {backtest_base.shape[0]}. N merged: {backtest.shape[0]}\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The data we are working with has an hourly frequency and we plan to generate the forecasts every 24 hours. If the model were to be put in production such that the forecasts are generated and model's performance is monitored every 24 hours, we will mimic the scoring process on the test set by generating daily accuracy metrics. To do this, we create a date column (\"ymd\"). If you want to score the output at any other frequency, say, weekly, just change the frequency parameter to the desired frequency." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "PERIOD_COLUMN = \"ymd\"\n", + "\n", + "backtest[PERIOD_COLUMN] = backtest[time_column_name].dt.to_period(\n", + " \"D\"\n", + ") # year-month-day to be used for daily metrics computation\n", + "backtest.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 7.2 Generate time series plots\n", + "\n", + "Here, we generate forecast versus actuals plot for the test set for both the best many models and the baseline. Since we use rolling evaluation with the step size of 24 hours, this mimics the behavior of putting both models in production and monitoring their behavior for the duration of the test set. This step allows users to make informed decisons about model performance and saves numerous costs associated with productionalizing the model and monitoring its performance in real life. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from scripts.helper_scripts import draw_one_plot\n", + "from matplotlib import pyplot as plt\n", + "from matplotlib.backends.backend_pdf import PdfPages\n", + "\n", + "plot_filename = \"forecast_vs_actual.pdf\"\n", + "\n", + "pdf = PdfPages(os.path.join(os.getcwd(), OUTPUT_DIR, plot_filename))\n", + "for _, one_forecast in backtest.groupby(GRAIN_COL):\n", + " one_forecast[time_column_name] = pd.to_datetime(one_forecast[time_column_name])\n", + " one_forecast.sort_values(time_column_name, inplace=True)\n", + " draw_one_plot(\n", + " one_forecast,\n", + " time_column_name,\n", + " target_column_name,\n", + " [GRAIN_COL],\n", + " [target_column_name, \"predicted\", \"predicted_base\"],\n", + " pdf,\n", + " plot_predictions=True,\n", + " )\n", + "pdf.close()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 7.3 Calculate metrics\n", + "Here, we will calculate the metric of interest for each day. For illustration purposes we use root mean squared error as the metric of choice. However, the `compute_all_metrics` method calculated all primary and secondary metrics for AutoML runs. Please refer to this *Regression/forecasting metrics* section in this [document](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-understand-automated-ml#regressionforecasting-metrics) for the list of available metrics. We will calculate the distribution of this metric for each time series in our dataset. Looking at the descrptive stats of such metrics can be more informative than calculating a single metric such as the mean for each time series. As an example, we are looking at the RMSE (root mean squared error) metric, but you can choose any other metric computed." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from scripts.helper_scripts import compute_all_metrics\n", + "\n", + "metrics_per_grain_day = compute_all_metrics(\n", + " fcst_df=backtest,\n", + " actual_col=target_column_name,\n", + " fcst_col=\"predicted\",\n", + " ts_id_colnames=[GRAIN_COL, PERIOD_COLUMN],\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\n", + " f'List of available metrics: {metrics_per_grain_day[\"metric_name\"].unique()}\\n---'\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "DESIRED_METRIC_NAME = \"root_mean_squared_error\"\n", + "\n", + "metrics_per_grain_day = metrics_per_grain_day.query(\n", + " f'metric_name == \"{DESIRED_METRIC_NAME}\"'\n", + ")\n", + "metrics_per_grain_day[[GRAIN_COL, PERIOD_COLUMN]] = metrics_per_grain_day[\n", + " \"time_series_id\"\n", + "].str.split(\"|\", 1, expand=True)\n", + "metrics_per_grain_day.to_csv(\n", + " os.path.join(OUTPUT_DIR, \"metrics-automl.csv\"), index=False\n", + ")\n", + "metrics_per_grain_day.groupby(GRAIN_COL)[\"metric\"].describe()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "metrics_per_grain_day_base = compute_all_metrics(\n", + " fcst_df=backtest,\n", + " actual_col=target_column_name,\n", + " fcst_col=\"predicted_base\",\n", + " ts_id_colnames=[GRAIN_COL, PERIOD_COLUMN],\n", + ")\n", + "metrics_per_grain_day_base = metrics_per_grain_day_base.query(\n", + " f'metric_name == \"{DESIRED_METRIC_NAME}\"'\n", + ")\n", + "metrics_per_grain_day_base[[GRAIN_COL, PERIOD_COLUMN]] = metrics_per_grain_day[\n", + " \"time_series_id\"\n", + "].str.split(\"|\", 1, expand=True)\n", + "metrics_per_grain_day_base.to_csv(\n", + " os.path.join(OUTPUT_DIR, \"metrics-base.csv\"), index=False\n", + ")\n", + "metrics_per_grain_day_base.groupby(GRAIN_COL)[\"metric\"].describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 7.4 Visualize metrics\n", + "\n", + "In this section we plot metric evolution over time for the best AutoML and the baseline models." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "metrics_df = metrics_per_grain_day.drop(\"time_series_id\", axis=1).merge(\n", + " metrics_per_grain_day_base.drop(\"time_series_id\", axis=1),\n", + " on=[\"metric_name\", GRAIN_COL, PERIOD_COLUMN],\n", + " how=\"inner\",\n", + " suffixes=[\"\", \"_base\"],\n", + ")\n", + "metrics_df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### Save the metrics evolution plots to pdf file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "grain = [GRAIN_COL]\n", + "plot_filename = \"metrics_plot.pdf\"\n", + "\n", + "pdf = PdfPages(os.path.join(os.getcwd(), OUTPUT_DIR, plot_filename))\n", + "for _, one_forecast in metrics_df.groupby(grain):\n", + " one_forecast[PERIOD_COLUMN] = pd.to_datetime(one_forecast[PERIOD_COLUMN])\n", + " one_forecast.sort_values(PERIOD_COLUMN, inplace=True)\n", + " draw_one_plot(\n", + " one_forecast,\n", + " PERIOD_COLUMN,\n", + " target_column_name,\n", + " grain,\n", + " [\"metric\", \"metric_base\"],\n", + " pdf,\n", + " plot_predictions=True,\n", + " )\n", + "pdf.close()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import IFrame\n", + "\n", + "IFrame(os.path.join(\"./forecast_output/metrics_plot.pdf\"), width=800, height=300)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 8. Inference\n", + "\n", + "In this step, we generate an actual forecast by providing an inference set that does not contain actual values. This illustrates how to generate production forecasts in real life. The code in this section is pretty much identical to the one in section 6.1 with one exception, we set the `run_rolling_evaluation` argument to `False`." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 8.1 Set up output dataset for inference data\n", + "\n", + "Output of inference step can be represented as [OutputFileDatasetConfig](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.data.output_dataset_config.outputdatasetconfig?view=azure-ml-py) object which, in turn, will be registered as a dataset." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.data import OutputFileDatasetConfig\n", + "\n", + "output_inference_data_ds = OutputFileDatasetConfig(\n", + " name=\"many_models_inference_output\",\n", + " destination=(datastore, \"uci_electro_small/inference_output/\"),\n", + ").register_on_complete(name=\"uci_electro_small_inference_data_ds\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "inference_dataset.take(5).to_pandas_dataframe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For many models we need to provide the ManyModelsInferenceParameters object." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.contrib.automl.pipeline.steps import AutoMLPipelineBuilder\n", + "from azureml.train.automl.runtime._many_models.many_models_parameters import (\n", + " ManyModelsInferenceParameters,\n", + ")\n", + "\n", + "output_file_name = \"parallel_run_step.csv\"\n", + "inference_ds_small = inference_dataset\n", + "\n", + "mm_parameters = ManyModelsInferenceParameters(\n", + " partition_column_names=time_series_id_column_names,\n", + " time_column_name=time_column_name,\n", + ")\n", + "\n", + "inference_steps = AutoMLPipelineBuilder.get_many_models_batch_inference_steps(\n", + " experiment=experiment,\n", + " inference_data=inference_ds_small,\n", + " node_count=2,\n", + " process_count_per_node=8,\n", + " compute_target=compute_target,\n", + " run_invocation_timeout=300,\n", + " output_datastore=output_inference_data_ds,\n", + " train_run_id=training_run.id,\n", + " train_experiment_name=training_run.experiment.name,\n", + " inference_pipeline_parameters=mm_parameters,\n", + " append_row_file_name=output_file_name,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from azureml.pipeline.core import Pipeline\n", + "\n", + "inference_pipeline = Pipeline(ws, steps=inference_steps)\n", + "inference_run = experiment.submit(inference_pipeline)\n", + "inference_run.wait_for_completion(show_output=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 8.2 Get the predicted data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.contrib.automl.pipeline.steps.utilities import get_output_from_mm_pipeline\n", + "\n", + "forecasting_results_name = \"forecasting_results\"\n", + "forecasting_output_name = \"many_models_inference_output\"\n", + "forecast_file = get_output_from_mm_pipeline(\n", + " inference_run, forecasting_results_name, forecasting_output_name, output_file_name\n", + ")\n", + "inference_df = pd.read_csv(forecast_file)\n", + "print(\n", + " \"Prediction has \",\n", + " inference_df.shape[0],\n", + " \" rows. Here the first 5 rows are being displayed.\",\n", + ")\n", + "inference_df.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "inference_df.rename(columns={\"Predictions\": \"predicted\"}, inplace=True)\n", + "inference_df.to_csv(\n", + " os.path.join(OUTPUT_DIR, \"inference-set-predictions-many-models.csv\"), index=False\n", + ")\n", + "inference_df.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 9. Schedule Pipeline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This section is about how to schedule a pipeline for periodically predictions. For more info about pipeline schedule and pipeline endpoint, please follow this [notebook](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-setup-schedule-for-a-published-pipeline.ipynb)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "inference_published_pipeline = inference_pipeline.publish(\n", + " name=\"automl_forecast_many_models\",\n", + " description=\"forecast many models\",\n", + " version=\"1\",\n", + " continue_on_step_failure=False,\n", + ")\n", + "print(\"Newly published pipeline id: {}\".format(inference_published_pipeline.id))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If `inference_dataset` is going to refresh every 24 hours and we want to predict every 24 hours (forecast_horizon), we can schedule our pipeline to run every day at 11 pm to get daily inference results. You can refresh your test dataset (a newer version will be created) periodically when new data is available (i.e. target column in test dataset would have values in the beginning as context data, and followed by NaNs to be predicted). The inference pipeline will pick up context to further improve the forecast accuracy. See the Forecasting away from training data in this [notebook](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/forecasting-forecast-function/auto-ml-forecasting-function.ipynb)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If `test_dataset` is going to refresh every 4 weeks before Friday 16:00 and we want to predict every 4 weeks (forecast_horizon), we can schedule our pipeline to run every 4 weeks at 16:00 to get daily inference results. You can refresh your test dataset (a newer version will be created) periodically when new data is available (i.e. target column in test dataset would have values in the beginning as context data, and followed by NaNs to be predicted). The inference pipeline will pick up context to further improve the forecast accuracy." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.pipeline.core.schedule import ScheduleRecurrence, Schedule\n", + "\n", + "recurrence = ScheduleRecurrence(\n", + " frequency=\"Day\", interval=1, hours=[23], minutes=[00] # Runs every day at 11:00 pm\n", + ")\n", + "\n", + "schedule = Schedule.create(\n", + " workspace=ws,\n", + " name=\"many_models_inference_schedule\",\n", + " pipeline_id=inference_published_pipeline.id,\n", + " experiment_name=\"schedule-run-mm-uci-electro\",\n", + " recurrence=recurrence,\n", + " wait_for_provisioning=True,\n", + " description=\"Schedule Run\",\n", + ")\n", + "\n", + "# You may want to make sure that the schedule is provisioned properly\n", + "# before making any further changes to the schedule\n", + "\n", + "print(\"Created schedule with id: {}\\n---\".format(schedule.id))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 9.1 [Optional] Disable schedule" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "schedule.disable()" + ] + } + ], + "metadata": { + "authors": [ + { + "name": "jialiu" + } + ], + "category": "tutorial", + "celltoolbar": "Raw Cell Format", + "compute": [ + "Remote" + ], + "datasets": [ + "Orange Juice Sales" + ], + "deployment": [ + "Azure Container Instance" + ], + "exclude_from_index": false, + "framework": [ + "Azure ML AutoML" + ], + "friendly_name": "Forecasting orange juice sales with deployment", + "index_order": 1, + "kernelspec": { + "display_name": "Python 3.6 - AzureML", + "language": "python", + "name": "python3-azureml" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + }, + "tags": [ + "None" + ], + "task": "Forecasting", + "vscode": { + "interpreter": { + "hash": "6bd77c88278e012ef31757c15997a7bea8c943977c43d6909403c00ae11d43ca" + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/v1/python-sdk/tutorials/automl-with-azureml/forecasting-demand-forecasting-many-models/scripts/helper_scripts.py b/v1/python-sdk/tutorials/automl-with-azureml/forecasting-demand-forecasting-many-models/scripts/helper_scripts.py new file mode 100644 index 0000000000..37e037c61e --- /dev/null +++ b/v1/python-sdk/tutorials/automl-with-azureml/forecasting-demand-forecasting-many-models/scripts/helper_scripts.py @@ -0,0 +1,251 @@ +from typing import Any, Dict, Optional, List + +import argparse +import json +import os +import re +import shutil + +import pandas as pd + +from matplotlib import pyplot as plt +from matplotlib.backends.backend_pdf import PdfPages + +from azureml.core import ScriptRunConfig +from azureml.automl.core.shared import constants +from azureml.automl.core.shared.types import GrainType +from azureml.automl.runtime.shared.score import scoring + + +GRAIN = "time_series_id" +BACKTEST_ITER = "backtest_iteration" +ACTUALS = "actual_level" +PREDICTIONS = "predicted_level" +ALL_GRAINS = "all_sets" + +FORECASTS_FILE = "forecast.csv" +SCORES_FILE = "scores.csv" +SCORES_FILE_GRAIN = "scores_per_grain.csv" +PLOTS_FILE = "plots_fcst_vs_actual.pdf" +PLOTS_FILE_GRAIN = "plots_fcst_vs_actual_per_grain.pdf" +RE_INVALID_SYMBOLS = re.compile("[: ]") + + +def _compute_metrics( + df: pd.DataFrame, metrics: List[str], actual_col: str, fcst_col: str +): + """ + Compute metrics for one data frame. + + :param df: The data frame which contains actual_level and predicted_level columns. + :return: The data frame with two columns - metric_name and metric. + """ + scores = scoring.score_regression( + y_test=df[actual_col], y_pred=df[fcst_col], metrics=metrics + ) + metrics_df = pd.DataFrame(list(scores.items()), columns=["metric_name", "metric"]) + metrics_df.sort_values(["metric_name"], inplace=True) + metrics_df.reset_index(drop=True, inplace=True) + return metrics_df + + +def _format_grain_name(grain: GrainType) -> str: + """ + Convert grain name to string. + + :param grain: the grain name. + :return: the string representation of the given grain. + """ + if not isinstance(grain, tuple) and not isinstance(grain, list): + return str(grain) + grain = list(map(str, grain)) + return "|".join(grain) + + +def compute_all_metrics( + fcst_df: pd.DataFrame, + actual_col: str, + fcst_col: str, + ts_id_colnames: List[str], + metric_names: Optional[List[set]] = None, +): + """ + Calculate metrics per grain. + + :param fcst_df: forecast data frame. Must contain 2 columns: 'actual_level' and 'predicted_level' + :param metric_names: (optional) the list of metric names to return + :param ts_id_colnames: (optional) list of grain column names + :return: dictionary of summary table for all tests and final decision on stationary vs nonstaionary + """ + if not metric_names: + metric_names = list(constants.Metric.SCALAR_REGRESSION_SET) + + if ts_id_colnames is None: + ts_id_colnames = [] + + metrics_list = [] + if ts_id_colnames: + for grain, df in fcst_df.groupby(ts_id_colnames): + one_grain_metrics_df = _compute_metrics( + df, metric_names, actual_col, fcst_col + ) + one_grain_metrics_df[GRAIN] = _format_grain_name(grain) + metrics_list.append(one_grain_metrics_df) + + # overall metrics + one_grain_metrics_df = _compute_metrics(fcst_df, metric_names, actual_col, fcst_col) + one_grain_metrics_df[GRAIN] = ALL_GRAINS + metrics_list.append(one_grain_metrics_df) + + # collect into a data frame + return pd.concat(metrics_list) + + +def draw_one_plot( + df: pd.DataFrame, + time_column_name: str, + target_column_name: str, + grain_column_names: List[str], + columns_to_plot: List[str], + pdf: PdfPages, + plot_predictions=False, +) -> None: + """ + Draw the single plot. + + :param df: The data frame with the data to build plot. + :param time_column_name: The name of a time column. + :param grain_column_names: The name of grain columns. + :param pdf: The pdf backend used to render the plot. + """ + if isinstance(grain_column_names, str): + grain_column_names = [grain_column_names] + fig, _ = plt.subplots(figsize=(20, 10)) + df = df.set_index(time_column_name) + plt.plot(df[columns_to_plot]) + plt.xticks(rotation=45) + if grain_column_names: + grain_name = [df[grain].iloc[0] for grain in grain_column_names] + plt.title(f"Time series ID: {_format_grain_name(grain_name)}") + plt.legend(columns_to_plot) + plt.close(fig) + pdf.savefig(fig) + + +def calculate_scores_and_build_plots( + input_dir: str, output_dir: str, automl_settings: Dict[str, Any] +): + os.makedirs(output_dir, exist_ok=True) + grains = automl_settings.get(constants.TimeSeries.GRAIN_COLUMN_NAMES) + time_column_name = automl_settings.get(constants.TimeSeries.TIME_COLUMN_NAME) + if grains is None: + grains = [] + if isinstance(grains, str): + grains = [grains] + while BACKTEST_ITER in grains: + grains.remove(BACKTEST_ITER) + + dfs = [] + for fle in os.listdir(input_dir): + file_path = os.path.join(input_dir, fle) + if os.path.isfile(file_path) and file_path.endswith(".csv"): + df_iter = pd.read_csv(file_path, parse_dates=[time_column_name]) + for _, iteration in df_iter.groupby(BACKTEST_ITER): + dfs.append(iteration) + forecast_df = pd.concat(dfs, sort=False, ignore_index=True) + # == Per grain-iteration analysis + # To make sure plots are in order, sort the predictions by grain and iteration. + ts_index = grains + [BACKTEST_ITER] + forecast_df.sort_values(by=ts_index, inplace=True) + pdf = PdfPages(os.path.join(output_dir, PLOTS_FILE)) + for _, one_forecast in forecast_df.groupby(ts_index): + _draw_one_plot(one_forecast, time_column_name, grains, pdf) + pdf.close() + forecast_df.to_csv(os.path.join(output_dir, FORECASTS_FILE), index=False) + metrics = compute_all_metrics(forecast_df, grains + [BACKTEST_ITER]) + metrics.to_csv(os.path.join(output_dir, SCORES_FILE), index=False) + + # == Per grain analysis + pdf = PdfPages(os.path.join(output_dir, PLOTS_FILE_GRAIN)) + for _, one_forecast in forecast_df.groupby(grains): + _draw_one_plot(one_forecast, time_column_name, grains, pdf) + pdf.close() + metrics = compute_all_metrics(forecast_df, grains) + metrics.to_csv(os.path.join(output_dir, SCORES_FILE_GRAIN), index=False) + + +def run_remote_inference_naive( + test_experiment, + compute_target, + train_run, + test_dataset, + target_column_name, + rolling_evaluation_step_size=1, + inference_folder="./forecast", +): + # Create local directory to copy the model.pkl and forecsting_script.py files into. + # These files will be uploaded to and executed on the compute instance. + os.makedirs(inference_folder, exist_ok=True) + shutil.copy("scripts/inference_script_naive.py", inference_folder) + + # Find the extension of the model file (.pkl or .pt) + ls = train_run.get_file_names() # list artifacts + regex = re.compile("outputs/model[.](pt|pkl)") + model_path = None + for v in ls: + matcher = regex.match(v) + if matcher: + model_path = matcher[0] + break + model_name = os.path.split(model_path)[-1] + + train_run.download_file(model_path, os.path.join(inference_folder, model_name)) + + inference_env = train_run.get_environment() + print("Finished getting training environment ...\n---") + + config = ScriptRunConfig( + source_directory=inference_folder, + script="inference_script_naive.py", + arguments=[ + "--target_column_name", + target_column_name, + "--test_dataset", + test_dataset.as_named_input(test_dataset.name), + "--rolling_evaluation_step_size", + rolling_evaluation_step_size, + ], + compute_target=compute_target, + environment=inference_env, + ) + + print("Submitting experiment ...\n---") + run = test_experiment.submit( + config, + tags={ + "training_run_id": train_run.id, + "run_algorithm": train_run.properties["run_algorithm"], + "valid_score": train_run.properties["score"], + "primary_metric": train_run.properties["primary_metric"], + }, + ) + + run.log("run_algorithm", run.tags["run_algorithm"]) + return run + + +if __name__ == "__main__": + args = {"forecasts": "--forecasts", "scores_out": "--output-dir"} + parser = argparse.ArgumentParser("Parsing input arguments.") + for argname, arg in args.items(): + parser.add_argument(arg, dest=argname, required=True) + parsed_args, _ = parser.parse_known_args() + input_dir = parsed_args.forecasts + output_dir = parsed_args.scores_out + with open( + os.path.join( + os.path.dirname(os.path.realpath(__file__)), "automl_settings.json" + ) + ) as json_file: + automl_settings = json.load(json_file) + calculate_scores_and_build_plots(input_dir, output_dir, automl_settings) diff --git a/v1/python-sdk/tutorials/automl-with-azureml/forecasting-demand-forecasting-many-models/scripts/inference_script_naive.py b/v1/python-sdk/tutorials/automl-with-azureml/forecasting-demand-forecasting-many-models/scripts/inference_script_naive.py new file mode 100644 index 0000000000..7297955e36 --- /dev/null +++ b/v1/python-sdk/tutorials/automl-with-azureml/forecasting-demand-forecasting-many-models/scripts/inference_script_naive.py @@ -0,0 +1,123 @@ +""" +This is the script that is executed on the compute instance. It relies +on the model.pkl file which is uploaded along with this script to the +compute instance. +""" + +import os +import argparse +from azureml.core import Dataset, Run +from sklearn.externals import joblib +from pandas.tseries.frequencies import to_offset + +try: + import torch + + _torch_present = True +except ImportError: + _torch_present = False + + +def map_location_cuda(storage, loc): + return storage.cuda() + + +def get_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--target_column_name", + type=str, + dest="target_column_name", + help="Target Column Name", + ) + parser.add_argument( + "--test_dataset", type=str, dest="test_dataset", help="Test Dataset" + ) + parser.add_argument( + "--rolling_evaluation_step_size", + type=int, + default=1, + dest="rolling_evaluation_step_size", + help="Rolling evaluation step size (optional).", + ) + + # args = parser.parse_args() + args, unknown = parser.parse_known_args() + + return args + + +def load_model(): + list_artifacts = os.listdir(".") + print("All artifacts ...\n---") + print(list_artifacts) + print("---") + + if "model.pt" in list_artifacts: + assert _torch_present, "Loading DNN models needs torch to be presented." + if torch.cuda.is_available(): + map_location = map_location_cuda + else: + map_location = "cpu" + with open("model.pt", "rb") as fh: + fitted_model = torch.load(fh, map_location=map_location) + else: + fitted_model = joblib.load("model.pkl") + return fitted_model + + +def get_data(run, test_dataset_id): + ws = run.experiment.workspace + + # get the input dataset by id + test_dataset = Dataset.get_by_id(ws, id=test_dataset_id) + + test_df = test_dataset.to_pandas_dataframe().reset_index(drop=True) + return test_df + + +if __name__ == "__main__": + run = Run.get_context() + args = get_args() + target_column_name = args.target_column_name + test_dataset_id = args.test_dataset + rolling_evaluation_step_size = args.rolling_evaluation_step_size + predicted_column_name = "predicted" + + print(f"Target column name: {target_column_name}\n---") + print(f"Test dataset: {test_dataset_id}\n---") + print(f"Rolling evaluation step size: {rolling_evaluation_step_size}\n---") + + # Load model + fitted_model = load_model() + + # Get data + test_df = get_data(run, test_dataset_id) + + if target_column_name in test_df: + y_test = test_df.pop(target_column_name).values + print( + "Target column is present in the test dataset ...\n---\nFirst few rows of the test dataset after remving target column ...\n---" + ) + print(test_df.head()) + print("---") + else: + y_test = np.full(test_df.shape[0], np.nan) + + print("Rolling evaluation ...\n---") + df_all = fitted_model.rolling_forecast( + test_df, y_test, step=rolling_evaluation_step_size, ignore_data_errors=True + ) + + assign_dict = { + fitted_model.forecast_origin_column_name: "forecast_origin", + fitted_model.forecast_column_name: "predicted", + fitted_model.actual_column_name: target_column_name, + } + df_all.rename(columns=assign_dict, inplace=True) + + file_name = "outputs/predictions.csv" + export_csv = df_all.to_csv(file_name, header=True, index=False) # added Index + + # Upload the predictions into artifacts + run.upload_file(name=file_name, path_or_stream=file_name)