## Kaggle data management api

In [None]:
# !cd /home/ec2-user/SageMaker
# !pip install kaggle # now download the api token and store it to /home/ec2-user/.kaggle/kaggle.json
# !chmod 600 /home/ec2-user/.kaggle/kaggle.json # for privacy
# !kaggle competitions download -c aptos2019-blindness-detection

## Preprocessing the authenication management

In [1]:
%%time
import boto3
import os 
import urllib.request
import re
import sagemaker
from sagemaker import get_execution_role
from sagemaker.amazon.amazon_estimator import get_image_uri

role = get_execution_role()

bucket = "dataset-retinopathy"
region_name="us-east-1"

training_image = get_image_uri(boto3.Session().region_name, 'image-classification')

def download(url):
    filename = url.split("/")[-1]
    if not os.path.exists(filename):
        urllib.request.urlretrieve(url, filename)

def upload_to_s3(channel, file):
    s3 = boto3.resource('s3')
    data = open(file, "rb")
    key = channel + '/' + file
    s3.Bucket(bucket).put_object(Key=key, Body=data)
    
    
# download('http://data.mxnet.io/data/caltech-256/caltech-256-60-train.rec')
# download('http://data.mxnet.io/data/caltech-256/caltech-256-60-val.rec')

# upload_to_s3(s3_train_key, 'caltech-256-60-train.rec')
# upload_to_s3(s3_validation_key, 'caltech-256-60-val.rec')
import botocore
from botocore.exceptions import ClientError

def upload_dir_to_s3(bucket, s3_folder, dir_to_upload):
    s3_client = boto3.client('s3', region_name=region_name)
    print("Uploading {} to s3://{}/{}".format(dir_to_upload, bucket, s3_folder))
    # enumerate local files recursively
    for root, dirs, files in os.walk(dir_to_upload):
        for filename in files:
            # construct the full local path
            local_path = os.path.join(root, filename)
            # construct the full Dropbox path
            relative_path = os.path.relpath(local_path, dir_to_upload)
            s3_path = os.path.join(s3_folder, relative_path).replace("\\", "/")
            try:
                s3_client.head_object(Bucket=bucket, Key=s3_path)
                print("Path found on S3! Deleting %s..." % s3_path)
                try:
                    s3_client.delete_object(Bucket=bucket, Key=s3_path)
                    try:
                        s3_client.upload_file(local_path, Bucket=bucket, Key=s3_path)
                    except ClientError as e:
                        logging.error(e)
                except:
                    print("Unable to delete from s3 %s..." % s3_path)
            except:
                try:
                    s3_client.upload_file(local_path, Bucket=bucket, Key=s3_path)
                except ClientError as e:
                    logging.error(e)
    print("Upload completed successfully.")
    
def download_dir(s3_folder, local_path, bucket=""):
    """
    params:
    - s3_folder: pattern to match in s3
    - local_path: local_path path to folder in which to place files
    - bucket: s3 bucket with target contents
    - client: initialized s3 client object
    """
    client = boto3.client('s3', region_name=region_name)
    keys = []
    dirs = []
    next_token = ''
    base_kwargs = {
        'Bucket': bucket,
        'Prefix': s3_folder,
    }
    while next_token is not None:
        kwargs = base_kwargs.copy()
        if next_token != '':
            kwargs.update({'ContinuationToken': next_token})
        results = client.list_objects_v2(**kwargs)
        contents = results.get('Contents')
        for i in contents:
            k = i.get('Key')
            if k[-1] != '/':
                keys.append(k)
            else:
                dirs.append(k)
        next_token = results.get('NextContinuationToken')
    for d in dirs:
        dest_pathname = os.path.join(local_path, d)
        if not os.path.exists(os.path.dirname(dest_pathname)):
            os.makedirs(os.path.dirname(dest_pathname))
    for k in keys:
        dest_pathname = os.path.join(local_path, k)
        if not os.path.exists(os.path.dirname(dest_pathname)):
            os.makedirs(os.path.dirname(dest_pathname))
        try:
            print("Downloading {}".format(dest_pathname))
            client.download_file(bucket, k, dest_pathname)
        except botocore.exceptions.ClientError as e:
            if e.response['Error']['Code'] == "404":
                print("The object does not exist.")
            else:
                raise



The method get_image_uri has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
Defaulting to the only supported framework/algorithm version: 1. Ignoring framework/algorithm version: 1.


CPU times: user 719 ms, sys: 131 ms, total: 849 ms
Wall time: 943 ms


## Preprocessing the files before uploading

In [2]:
# mv /home/ec2-user/aptos2019-blindness-detection.zip /home/ec2-user/SageMaker/
# !unzip -q /home/ec2-user/SageMaker/aptos2019-blindness-detection.zip -d /home/ec2-user/SageMaker/data  #-q for quitely no verbose
# !rm -rf /home/ec2-user/SageMaker/data

## Uploading files to s3

In [3]:
# upload_dir_to_s3(bucket=bucket, s3_folder='aptos-2019', dir_to_upload="/home/ec2-user/SageMaker/data")
# !rm -rf /home/ec2-user/SageMaker/imageclassification_caltech_2020-12-24

In [17]:
# !git clone https://github.com/RamsteinWR/Diabetic-Retinopathy-Blindness-Detection.git

Cloning into 'Diabetic-Retinopathy-Blindness-Detection'...
remote: Enumerating objects: 5, done.[K
remote: Counting objects: 100% (5/5), done.[K
remote: Compressing objects: 100% (5/5), done.[K
remote: Total 178 (delta 0), reused 1 (delta 0), pack-reused 173[K
Receiving objects: 100% (178/178), 102.77 MiB | 46.84 MiB/s, done.
Resolving deltas: 100% (17/17), done.


In [4]:
# !cd /home/ec2-user/SageMaker/Diabetic-Retinopathy-Blindness-Detection
# !pip install -r requirements.txt


### AssertionError: NVidia Apex package must be installed. See https://github.com/NVIDIA/apex.


In [None]:
# !pip install --quiet -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" git+https://github.com/NVIDIA/apex
# !pip install git+https://github.com/mapillary/inplace_abn.git@v1.0.3

<a id='idg4c.1'></a>
## Dependencies
___
### import packages and check SageMaker version

In [6]:
import json
import torch
import tarfile
import pickle
import matplotlib.pyplot as plt
import torchvision as tv
import pathlib                          # Path management tool (standard library)
import subprocess                       # Runs shell commands via Python (standard library)
import sagemaker                        # SageMaker Python SDK
from sagemaker.pytorch import PyTorch   # PyTorch Estimator for TensorFlow

Using manual seed: 42
InplaceABN not available, using classic BatchNorm+Act
Dropped 250 bad samples
Train [1328, 244, 666, 126, 194] Valid [477, 87, 254, 61, 78]
Datasets         : /home/ec2-user/SageMaker/data
  Train size     : 42 2558
  Valid size     : 16 957
  Aptos 2019     : True
  Aptos 2015     : False
  IDRID          : False
  Messidor       : False
Train session    : Dec24_22_50/efficientb6_max_512_medium_aptos2019_fold0_hardcore_euclid
  FP16 mode      : True
  Fast mode      : False
  Mixup          : False
  Balance cls.   : False
  Balance ds.    : False
  Warmup epoch   : 10
  Train epochs   : 100
  Workers        : 2
  Fold           : 0
  Log dir        : runs/Dec24_22_50/efficientb6_max_512_medium_aptos2019_fold0_hardcore_euclid
  Augmentations  : medium
Model            : efficientb6_max
  Parameters     : (39449482, 39449482)
  Image size     : (512, 512)
  Freeze encoder : False
  Dropout        : 0.3
  Classes        : ['No DR', 'Mild', 'Moderate', 'Severe', 'Pr