# Programmatically Start/Terminate AWS Service Needed for Capstone
Works provided... 
- the capstone.cfg has the proper credentials
- configuration variable for your environment are input
- the workspace doesn't reset while running
- the workspace doesn't reset between running a cluster and before terminating a cluster

# Import Global Libraries

In [1]:
import os
import boto3
import pandas as pd
import json
import botocore
import time
import datetime as dt

# Obtain Needed Configuration Variables

In [2]:
def config_variables(cfgPath):
    """
    using the filepath to the cfg, 
    parses variables from a cfg file
    """
    import configparser 
    config = configparser.ConfigParser()
    config.read_file(open(cfgPath))
    
    return config

In [58]:
cfgPath='/home/workspace/capstone.cfg'

# aws profile
os.environ["AWS_ACCESS_KEY_ID"] = \
    config_variables(cfgPath).get("AWS", "AWS_ACCESS_KEY_ID")

os.environ['AWS_SECRET_ACCESS_KEY'] = \
    config_variables(cfgPath).get("AWS", "AWS_SECRET_ACCESS_KEY")

os.environ['REGION'] = config_variables(cfgPath).get("AWS", "REGION")

os.environ['DEFAULT_OUTPUT_FORMAT'] = \
    config_variables(cfgPath).get("AWS", "DEFAULT_OUTPUT_FORMAT")

# S3 
REGION = config_variables(cfgPath).get("AWS", "REGION")
SOURCE_BUCKET = config_variables('/home/workspace/capstone.cfg') \
    .get("S3", "SOURCE_BUCKET")
# print(REGION, SOURCE_BUCKET)

# EMR 
key_pair_name = config_variables(cfgPath).get("EMR", "key_pair_name")
pem = config_variables(cfgPath).get("EMR", "pem")
pemPath = config_variables(cfgPath).get("EMR", "pemPath")
profile = config_variables(cfgPath).get("EMR", "profile")
region = config_variables(cfgPath).get("EMR", "region")

#IAM
IAM_ROLE_NAME = config_variables(cfgPath).get("IAM", "IAM_ROLE_NAME")

# print('key_pair_name: ', key_pair_name)
# print('pem:           ', pem)
# print('pemPath:       ', pemPath)
# print('profile:       ', profile)
# print('region:        ', region)

# Run Terminal Commands

In [61]:
def run_CmdList(cmd_list):
    """
    - prereq: import subprocess
    - relies on the subprocess.call method to open
      the respective shell of the operating system
      and execute a list of commands (as strings)
    """
    import subprocess
    for cmd in cmd_list:
        subprocess.call(cmd, shell=True)
        print('command run: '+ cmd)

## Customize Workspace - Programmatically

In [60]:
run_CmdList(
    [
        ('pip uninstall awscli'),
        
        ('rm -rf awscliv2.zip + \
          rm -rf aws'),
        
        ('curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"'),
        
        ('unzip awscliv2.zip'),
        
        ('sudo ./aws/install'),
        
        ('sudo apt-get update + \
          sudo apt-get install -yy less + \
          sudo apt-get install openssh-client'),
        
#         ('sudo wget https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/2.7.3/hadoop-aws-2.7.3.jar'),
#         ('sudo wget https://jar-download.com/artifacts/org.apache.hadoop/hadoop-aws/2.7.0/source-code/org/apache/hadoop/fs/s3/S3FileSystem.java'),
        
        ('sudo python3 -m pip install pandas'),
        ('pip install --upgrade numpy')
    ]
)

command run: pip uninstall awscli
command run: rm -rf hadoop-aws-2.7.3.jar +           rm -rf awscliv2.zip +           rm -rf aws
command run: curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"
command run: unzip awscliv2.zip
command run: sudo ./aws/install
command run: sudo apt-get update +           sudo apt-get install -yy less +           sudo apt-get install openssh-client
command run: sudo wget https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/2.7.3/hadoop-aws-2.7.3.jar
command run: sudo python3 -m pip install pandas
command run: pip install --upgrade numpy


## AWS Configure - Programmatically 

In [62]:
profile = "default"

# suppress print for aws configure variables
import contextlib

with contextlib.redirect_stdout(None):
    run_CmdList(
        [
            ('rm -rf ~/.aws/config + \
              rm -rf ~/.aws/credentials'),
            
            ('rmdir /root/.aws'),
            ('mkdir /root/.aws'),
            
            ('aws --profile '
                 + profile \
                 + ' configure set aws_access_key_id ' \
                 + os.environ["AWS_ACCESS_KEY_ID"]),
            
            ('aws --profile '
                 + profile \
                 + ' configure set aws_secret_access_key ' \
                 + os.environ["AWS_SECRET_ACCESS_KEY"]),
            
            ('aws --profile '
                 + profile \
                 + ' configure set region ' \
                 + os.environ["REGION"]), 
            
            ('aws --profile '
                 + profile \
                 + ' configure set output_format ' \
                 + os.environ["DEFAULT_OUTPUT_FORMAT"] ), 
            
            ('export AWS_CONFIG_FILE=~/.aws/config'),
            ('export AWS_SHARED_CREDENTIALS_FILE=~/.aws/credentials')
        ]
    )
print('AWS configure variables have been set')

AWS configure variables have been set


# Create boto3 session, clients and resources

Create boto3 session

In [101]:
def createBoto3Session(profile_name='default'): 
    """
    create boto3 session
    params: profile can be set 
    or defaults to 'default'
    """
#     import boto3
    session = boto3.session.Session(profile_name=profile_name)    
    print('Created session')
    return session

In [102]:
session = createBoto3Session(profile)

Created session


Create boto3 clients

In [103]:
def createSessionClients(session, awsService, REGION):  
    """
    create boto3 client 
    from a session.
    
    params: aws service and region  
    """
#     import boto3
    client = session.client(awsService,
                          region_name= REGION)
    print("Created Client: " + awsService)
    
    return client

In [104]:
ec2 = createSessionClients(session,'ec2', REGION)
emr = createSessionClients(session,'emr', REGION)
iam = createSessionClients(session,'iam', REGION)
s3_client = createSessionClients(session,'s3', REGION)
sagemaker = createSessionClients(session,'sagemaker', REGION)

Created Client: ec2
Created Client: emr
Created Client: iam
Created Client: s3
Created Client: sagemaker


Create boto3 resources

In [105]:
def createSessionResources(session, awsService, REGION): 
    """
    creates boto3 resource
    from a session
    
    params: aws service and region
    """
#     import boto3
    resource = session.resource(awsService, region_name= REGION)
    print("Created Resource: " + awsService)
    
    return resource

In [106]:
ec2_resource = createSessionResources(session, 'ec2', REGION)
s3_resource = createSessionResources(session, 's3', REGION)

Created Resource: ec2
Created Resource: s3


# IAM Role

### Create new role
- Create an IAM Role that makes enables Sagemaker access

In [None]:
# def createNew_IAM_Role(IAM_ROLE_NAME):
#     """  
#     - prereq: using boto3 - create clients
#     - uses the create_role method to create a new IAM Role
#     - referenced: Udacity, Lesson 3: Implementing Data Warehouses on AWS, Exercise 2: Infrastructure as Code
#     """ 
#     print('Creating a new IAM Role')
#     try:        
#         name = iam.create_role(
#             Path='/',
#             RoleName=IAM_ROLE_NAME,       
#             Description = "Allows sagemaker to call AWS services on your behalf.",
#             AssumeRolePolicyDocument=json.dumps({
#                         "Version": "2012-10-17",
#                         "Statement": [
#                             {"Sid": "",
#                                 "Effect": "Allow",
#                                 "Principal": {"Service": "sagemaker.amazonaws.com"},
#                                 "Action": "sts:AssumeRole"} 
#                             ]}) 
#         )
#         return(name)        
#     except Exception as e:
#         print(e)

In [None]:
# # print(IAM_ROLE_NAME)
# createNew_IAM_Role(IAM_ROLE_NAME)

### Attach Policy

In [None]:
# def IAM_attachingPolicy(IAM_ROLE_NAME):
#     """  
#     - prereq: using boto3 - create clients, create needed IAM Role
#     - uses the attach_role_policy method to attach a policy to an IAM Role
#     - referenced: Udacity, Lesson 3: Implementing Data Warehouses on AWS, Exercise 2: Infrastructure as Code
#     """ 
#     print('Creating the IAM attaching policy')
#     try:
#         iam.attach_role_policy(RoleName=IAM_ROLE_NAME,
#                            PolicyArn="arn:aws:iam::aws:policy/AmazonSageMakerFullAccess"
#                           )['ResponseMetadata']['HTTPStatusCode']
#     except Exception as e:
#         print(e)

In [None]:
# # IAM_attachingPolicy(IAM_ROLE_NAME)
# IAM_attachingPolicy(IAM_ROLE_NAME)

### Get ARN

In [None]:
# def IAM_roleARN(IAM_ROLE_NAME):   
#     """  
#     - prereq: using boto3 - create clients, create needed IAM Role
#     - uses the get-role method to obtain the ARN
#     - referenced: Udacity, Lesson 3: Implementing Data Warehouses on AWS, Exercise 2: Infrastructure as Code
#     """  
#     try: 
#         roleArn = iam.get_role(RoleName=IAM_ROLE_NAME)['Role']['Arn']
#         return roleArn
#     except Exception as e:
#         print(e)

In [None]:
# IAM_roleARN(IAM_ROLE_NAME)

# Create a new S3 Bucket and Load with source data files

Create new s3 bucket

In [35]:
def create_new_S3(s3_client, BUCKET, REGION):
    """
    create a new S3 bucket
    
    params: bucket name and region
    """
    print("Creating new S3 Bucket")
    try: 
        s3_new = s3_client.create_bucket(
                    ACL= 'public-read-write',
                    Bucket= BUCKET,
                    CreateBucketConfiguration={
                        'LocationConstraint': REGION})
         
        return s3_new
            
    except Exception as e:
        print(e)

In [36]:
create_new_S3(s3_client, SOURCE_BUCKET, REGION)

Creating new S3 Bucket
An error occurred (BucketAlreadyOwnedByYou) when calling the CreateBucket operation: Your previous request to create the named bucket succeeded and you already own it.


Upload source files to an s3 bucket

In [None]:
def upload_multiple_files_toS3Folder(s3_client, PATH, KEY, BUCKET, folder):  
    """
    using the boto3 s3 client
    loops through a directory for file types
    uploads to an S3 bucket by partition
    """
#     import os
    import glob    
    
    print("Uploading source files")    
    files = glob.glob(PATH + KEY)  

    try: 
        for f in files: 
            upload = s3_client.upload_file(
                Filename = f,
                Bucket = BUCKET,
                Key = folder + f.split("/")[-1])

        return upload
    
    except Exception as e:
        print(e)  

In [None]:
upload_multiple_files_toS3Folder(s3_client, 'source_namesbystate/', '*.TXT', SOURCE_BUCKET, 'SOURCE/')
upload_multiple_files_toS3Folder(s3_client, 'source_weather/', '*.txt', SOURCE_BUCKET, 'SOURCE/')
upload_multiple_files_toS3Folder(s3_client, 'source_weather/', '*.csv', SOURCE_BUCKET, 'SOURCE/')
upload_multiple_files_toS3Folder(s3_client, 'source_states/', '*.json', SOURCE_BUCKET, 'SOURCE/')
# upload_multiple_files_toS3Folder(s3_client, 'notebook/', '*.ipynb', SOURCE_BUCKET, 'NOTEBOOK/')


# Create emr cluster

### configure a key pair with private permissions - and move into ~/.aws

In [107]:
# reference: https://www.learnaws.org/2020/12/16/aws-ec2-boto3-ultimate-guide/

def configureKeyPair(keyName, sourcePath, destinationPath ):   
    """
    configure an EC2 keyname pair 
    write the private key to file with 400 permissions
    """
#     import os
    import shutil
    key_pair = ec2.create_key_pair(KeyName=keyName)
    private_key = key_pair["KeyMaterial"]

    # write private key to file with 400 permissions
    with os.fdopen(os.open(sourcePath, os.O_WRONLY | os.O_CREAT, 0o400), "w+") as fileHandle:
        fileHandle.write(private_key)
        
    # move file to ~/.aws
    shutil.move(sourcePath, destinationPath)

In [108]:
configureKeyPair(key_pair_name, "/home/workspace/capstoneCluster_west.pem", "/root/.aws/capstoneCluster_west.pem")

### create EMR cluster with an instance group

In [109]:
# reference: https://hands-on.cloud/working-with-emr-in-python-using-boto3/#h-create-emr-cluster-with-an-instance-group

def createEmrClusterInstnceGrp(clusterName, 
                               emrRelease, 
                               MstInstnceType, 
                               CoreInstnceType, 
                               CoreInstanceCount,
                               Ec2KeyName, 
                               Ec2SubnetId, 
                               appName):
    """
    create an emr cluster with an instance group
    """
    # import boto3
    import json

    response = emr.run_job_flow(
        Name=clusterName,
        ReleaseLabel=emrRelease,
        Instances={
            'KeepJobFlowAliveWhenNoSteps': True,
            'TerminationProtected': False,
            'InstanceGroups': [
                {
                    'Name': 'Master',
                    'Market': 'ON_DEMAND',
                    'InstanceRole': 'MASTER',
                    'InstanceType': MstInstnceType,
                    'InstanceCount': 1,
                    'EbsConfiguration': {
                        'EbsBlockDeviceConfigs': [
                            {
                                'VolumeSpecification': {
                                    'VolumeType': 'gp2',
                                    'SizeInGB': 10
                                },
                                'VolumesPerInstance': 1
                            },
                        ],
                        'EbsOptimized': False
                    }
                },
                {
                    'Name': 'Core',
                    'Market': 'ON_DEMAND',
                    'InstanceRole': 'CORE',
                    'InstanceType': CoreInstnceType,
                    'InstanceCount': CoreInstanceCount,
                    'EbsConfiguration': {
                        'EbsBlockDeviceConfigs': [
                            {
                                'VolumeSpecification': {
                                    'VolumeType': 'gp2',
                                    'SizeInGB': 10
                                },
                                'VolumesPerInstance': 1
                            },
                        ],
                        'EbsOptimized': False
                    }
                },
            ],
            'Ec2KeyName': Ec2KeyName,
            'Ec2SubnetId': Ec2SubnetId
        },
        Applications=[
            {
                'Name': appName,
            },

        ],
        VisibleToAllUsers=True,
        ServiceRole='EMR_DefaultRole',
        JobFlowRole='EMR_EC2_DefaultRole'
    )



#     print(json.dumps(response, indent=4, sort_keys=True, default=str))  
    response = emr.list_clusters()
    cluster_id = response['Clusters'][0]['Id']
    print("Created " + clusterName + ": " + cluster_id)

In [110]:
createEmrClusterInstnceGrp("capstoneCluster", 
                               'emr-5.28.0', 
                               'm5.xlarge', 
                               'm5.xlarge', 
                               1,
                               key_pair_name, 
                               'subnet-08e09f3646458e68f', 
                               'Spark')

Created capstoneCluster: j-21Q51T38GLHHT


In [111]:
def get_newlyCreated_emrClusterId(emr_client):
    """
    using boto3 emr client, gets
    the cluster id
    """
    response = emr.list_clusters()
    cluster_id = response['Clusters'][0]['Id']
    cluster_name = response['Clusters'][0]['Name']
    print("Cluster Name = " + cluster_name + ", Cluster ID = " + cluster_id)
    
    return cluster_id

In [112]:
clusterId = get_newlyCreated_emrClusterId(emr)

Cluster Name = capstoneCluster, Cluster ID = j-21Q51T38GLHHT


In [113]:
def get_emrClusterStatus(emr_client, clusterId):
    """
    using the boto3 emr client, get the status of the
    cluster using the cluster id
    """
    try: 
        response = emr.list_clusters()
        for cluster in response['Clusters']:
            if cluster['Id'] == clusterId:
                clusterStatus = cluster['Status']['State']
#                 print("Cluster State = " + clusterStatus)
                return clusterStatus
            else: 
                pass

    except Exception as e: 
        print(e)

In [114]:
clusterStatus = get_emrClusterStatus(emr, clusterId)
clusterStatus

'STARTING'

In [115]:
# clusterStatus = get_emrClusterStatus(emr, 'j-2SZAN9KS8PBH')
# clusterStatus

### identify the master node

In [116]:
def identifyMasterNode(emr_client, clusterId):  
    """
    using the boto3 emr client and cluster id of an
    emr cluster, waits for the cluster status to move from 
    starting to waiting before brining back the 
    master node
    """
    import time 
#     import datetime as dt

    try: 
        startTime = dt.datetime.now() 
        print("Cluster Id: " + clusterId)
#         print("So begins the wait...")
        clusterStatus = get_emrClusterStatus(emr_client, clusterId)           
        while True:
            if clusterStatus == 'STARTING':                
                currentTime = dt.datetime.now()
                time.sleep(30)                
                clusterStatus = get_emrClusterStatus(emr, clusterId)                
                print("elapsed time: ", (currentTime - startTime))   
            
            elif clusterStatus == 'WAITING':                
                response = emr.list_instances(
                ClusterId=clusterId,
                InstanceGroupTypes=['MASTER'])
                
                response
                masterNode= response['Instances'][0]['PublicDnsName'] 

                print('Master Node: ' + masterNode)
                return masterNode
            
            else:
                print("Uh-oh! Cluster Status not 'STARTING' or 'WAITING'")
                break
                
    except Exception as e:
        print(e)      

In [117]:
identifyMasterNode(emr, clusterId)

Cluster Id: j-21Q51T38GLHHT
elapsed time:  0:00:00.196990
elapsed time:  0:00:30.678513
elapsed time:  0:01:01.157459
elapsed time:  0:01:31.732478
elapsed time:  0:02:02.210017
elapsed time:  0:02:32.591587
elapsed time:  0:03:03.058114
elapsed time:  0:03:33.586440
Master Node: ec2-54-190-29-181.us-west-2.compute.amazonaws.com


'ec2-54-190-29-181.us-west-2.compute.amazonaws.com'

### load files to MasterNode

In [118]:
def scp_CmdCalls(emr_client, clusterId, pemPath, pem): 
    """
    using the boto3 emr client, 
    load files to the master node 
    in a specific cluster
    """
#     import boto3
    
    masterNode = identifyMasterNode(emr_client, clusterId) 

    scp_cmdList = []

    pem_toMasterNode = 'scp -i ' + pemPath + ' ' + pemPath + ' hadoop@' + masterNode + ':/home/hadoop/'
    cfg_toMasterNode = 'scp -i ~/.aws/' + pem + ' capstone.cfg hadoop@' + masterNode + ':/home/hadoop/'
#     py_toMasterNode = 'scp -i ~/.aws/' + pem + ' capstone.py hadoop@' + masterNode + ':/home/hadoop/'

    scp_cmdList.append(pem_toMasterNode + ", " + cfg_toMasterNode)    
    scp_cmds = run_CmdList(scp_cmdList)

    return scp_cmds

In [119]:
scp_CmdCalls(emr, clusterId, pemPath, pem)

Cluster Id: j-21Q51T38GLHHT
Master Node: ec2-54-190-29-181.us-west-2.compute.amazonaws.com
command run: scp -i ~/.aws/capstoneCluster_west.pem ~/.aws/capstoneCluster_west.pem hadoop@ec2-54-190-29-181.us-west-2.compute.amazonaws.com:/home/hadoop/, scp -i ~/.aws/capstoneCluster_west.pem capstone.cfg hadoop@ec2-54-190-29-181.us-west-2.compute.amazonaws.com:/home/hadoop/


### MANUAL STEP: connect to master node

In [120]:
masterNode = identifyMasterNode(emr, clusterId)
print('run this command in the terminal-->  ' + 'ssh hadoop@' + masterNode + ' -i ' + pemPath)

Cluster Id: j-21Q51T38GLHHT
Master Node: ec2-54-190-29-181.us-west-2.compute.amazonaws.com
run this command in the terminal-->  ssh hadoop@ec2-54-190-29-181.us-west-2.compute.amazonaws.com -i ~/.aws/capstoneCluster_west.pem


In [None]:
# def connectSSH_toMasterNode(emr_client, clusterId, pemPath):
#     """
#     """
# #     import boto3

#     masterNode = identifyMasterNode(emr_client, clusterId)    
#     connectSSH = ['ssh hadoop@' + masterNode + ' -i ' + pemPath]    
#     connectionEMR = run_CmdList(connectSSH)
    
#     return connectionEMR

In [None]:
# connectSSH_toMasterNode(emr, clusterId, pemPath)

# Cleanup 

### terminate emr cluster

In [121]:
def terminate_emrCluster(emr_client, clusterId):
    """
    user input required to start termination of a cluster
    looks to ensure status of the cluster is terminated
    """
#     import boto3
#     import datetime as dt
    try:
        terminate_emrCluster = input('Ready to delete your EMR Cluster? Please answer: Yes or No...').lower()  
        
        if terminate_emrCluster.startswith('y'):  
            
            startTime = dt.datetime.now()
            clusterStatus = get_emrClusterStatus(emr_client, clusterId)   
            
            if clusterStatus == 'WAITING':
                response = emr_client.terminate_job_flows(
                JobFlowIds=[clusterId]) 
                
                clusterStatus = get_emrClusterStatus(emr_client, clusterId)   
                
                while "TERM" not in clusterStatus:
                    currentTime = dt.datetime.now()
                    time.sleep(10)

                    clusterStatus = get_emrClusterStatus(emr_client, clusterId)

                    print("elapsed time: ", (currentTime - startTime))
                    
                print("Rest Assured! Your EMR = " + clusterStatus)

            else:
                print("Congrats! Cluster Status = " + clusterStatus)
                
        else:
            print("Okaaaay. It's your funeral!")
    
    except Exception as e:
        print(e)     

In [122]:
# happy path
terminate_emrCluster(emr, clusterId)

Ready to delete your EMR Cluster? Please answer: Yes or No... yes


Rest Assured! Your EMR = TERMINATING


In [86]:
# test
# terminate_emrCluster(emr, 'j-19112GB5ITTAP')

In [123]:
# double-check cluster status
get_emrClusterStatus(emr, clusterId)

'TERMINATING'

### delete the key pair

In [124]:
def deleteKeyPair(keyName):
    """
    user input required to terminate an EC2 keypair
    """
    try:
        deleteKeyPair = input('Ready to delete your EC2 KeyPair? Please answer: Yes or No...').lower()
        if deleteKeyPair.startswith('y'):
            response = ec2.delete_key_pair(KeyName=keyName)
            return response
        else: 
            print("Okay. Maybe later then.")

    except Exception as e:
        print(e)

In [125]:
deleteKeyPair(key_pair_name)

Ready to delete your EC2 KeyPair? Please answer: Yes or No... yes


{'ResponseMetadata': {'RequestId': 'eec83bac-5925-4325-ae64-9c93bf273717',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'eec83bac-5925-4325-ae64-9c93bf273717',
   'cache-control': 'no-cache, no-store',
   'strict-transport-security': 'max-age=31536000; includeSubDomains',
   'content-type': 'text/xml;charset=UTF-8',
   'content-length': '227',
   'date': 'Sun, 16 Oct 2022 17:47:13 GMT',
   'server': 'AmazonEC2'},
  'RetryAttempts': 0}}

### empty s3 buckets and delete s3 buckets

In [126]:
def delete_S3(s3_resource, BUCKET):
    """
    using boto3 s3 resource, empties the contents of an S3 bucket
    then deletes the bucket
    """
    try:
        deleteS3 = input('Ready to delete your S3 Bucket? Please answer: Yes or No...').lower()
        if deleteS3.startswith('y'):
            # to use .Bucket, the boto3 resource must be used
            s3_bucket = s3_resource.Bucket(BUCKET)
            
            s3_bucket.objects.all().delete()
            print('Bucket:', BUCKET, 'has been emptied')
            
            s3_bucket.delete()
            print('Bucket:', BUCKET, 'has been deleted')
        else: 
            print("Okay. Maybe later then.")

    except Exception as e:
        print(e)

In [127]:
delete_S3(s3_resource, SOURCE_BUCKET)

Ready to delete your S3 Bucket? Please answer: Yes or No... yes


Bucket: capstonesources has been emptied
Bucket: capstonesources has been deleted


### delete notebook (WIP)

In [None]:
# def deleteEMRnb(sagemaker_client, notebook):
#     """
#     using sagemaker, stop then deletes a notebook instance
#     """
#     try:
#         deleteEMRnb = input('Ready to delete your EMR Notebook? Please answer: Yes or No...').lower()
#         if deleteEMRnb.startswith('y'):
#             response_stop = sagemaker.stop_notebook_instance(NotebookInstanceName=notebook)
#             response_delete = sagemaker.delete_notebook_instance(NotebookInstanceName=notebook)
#             return response_delete
#         else: 
#             print("Okay. Maybe later then.")

#     except Exception as e:
#         print(e)

In [None]:
# deleteEMRnb(sagemaker, 'capstone')

### delete IAM Role
- to conserve costs, all AWS resources must be deleted after the required jobs have been completed and validated
- running end-to-end as program (vs. in ipython), by requiring input, the user is forced to formally acknowledge the deletion of the resource before the job will continue

In [None]:
# def deleteIAM():
#     """
#     deletes an iam role
#     """
#     deleteIAM = input('Ready to delete your IAM Role Name? Please answer: Yes or No...').lower()

#     try:
#         if deleteIAM.startswith('y'):
#             iam.detach_role_policy(RoleName=IAM_ROLE_NAME, PolicyArn="arn:aws:iam::aws:policy/AmazonSageMakerFullAccess")
#             iam.delete_role(RoleName=IAM_ROLE_NAME)
#             print('Role:',IAM_ROLE_NAME,'has been deleted')
#         else: 
#             print("Okay. Maybe later then.")

#     except Exception as e:
#         print(e)

### remove folders/files from workspace

In [None]:
def workspace_cleanUpFiles(cmd_list):
    """
    removes files from the workspace
    """
    try:
        cleanWrksp = input('Ready to cleanup your Udacity Workspace? Please answer: Yes or No...').lower()
        if cleanWrksp.startswith('y'):
            rmFiles = run_CmdList(cmd_list)
            return rmFiles
        else: 
            print('Manual cleanup for you then!')
    except Exception as e:
        print(e)

In [None]:
workspace_cleanUpFiles(
    [
        ('rm -rf hadoop-aws-2.7.0.jar'),
        ('rm -rf S3FileSystem.java'),
        ('rm -rf awscliv2.zip'),
        ('rm -rf aws')
    ]
)