### Copying the CSV data to the S3 Bucket

In [1]:
# View our data
import pandas as pd

data = pd.read_csv("csv/new_dataset.csv")
data.head()

Unnamed: 0,track_id,artists,popularity,duration_ms,explicit,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,track_genre
0,5SuOikwiRyPMVoIQDJUgSV,Gen Hoshino,73,230666,False,0.676,0.461,1,-6.746,0,0.143,0.0322,1e-06,0.358,0.715,87.917,4,acoustic
1,4qPNDBW1i3p13qLCt0Ki3A,Ben Woodward,55,149610,False,0.42,0.166,1,-17.235,1,0.0763,0.924,6e-06,0.101,0.267,77.489,4,acoustic
2,1iJBSr7s7jYXzM8EGcbK5b,Ingrid Michaelson;ZAYN,57,210826,False,0.438,0.359,0,-9.734,1,0.0557,0.21,0.0,0.117,0.12,76.332,4,acoustic
3,6lfxq3CG4xtTiEg7opyCyx,Kina Grannis,71,201933,False,0.266,0.0596,0,-18.515,1,0.0363,0.905,7.1e-05,0.132,0.143,181.74,3,acoustic
4,5vjLSffimiIP26QG5WcN2K,Chord Overstreet,82,198853,False,0.618,0.443,2,-9.681,1,0.0526,0.469,0.0,0.0829,0.167,119.949,4,acoustic


### Check Pre-Requisites from an earlier notebook

In [2]:
%store -r setup_s3_bucket_passed

In [3]:
try:
    setup_s3_bucket_passed
except NameError:
    print("+++++++++++++++++++++++++++++++")
    print("[ERROR] YOU HAVE TO RUN ALL NOTEBOOKS IN THE SETUP FOLDER FIRST. You are missing Setup S3 Bucket.")
    print("+++++++++++++++++++++++++++++++")

In [4]:
print(setup_s3_bucket_passed)

True


In [5]:
import boto3
import sagemaker
import pandas as pd

sess = sagemaker.Session()
bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name
account_id = boto3.client("sts").get_caller_identity().get("Account")

sm = boto3.Session().client(service_name="sagemaker", region_name=region)

### Set S3 Source Location (Local CSV File)

In [6]:
data_path = "/root/AAI-540/Module2/csv"
print(data_path)

/root/AAI-540/Module2/csv


In [7]:
%store data_path

Stored 'data_path' (str)


### Set S3 Destination Location (Our Private S3 Bucket)

In [8]:
s3_private_path_csv = "s3://{}/module2_data/csv".format(bucket)
print(s3_private_path_csv)

s3://sagemaker-us-east-1-004608622582/module2_data/csv


In [9]:
%store s3_private_path_csv

Stored 's3_private_path_csv' (str)


### Copy Data From the Local File to Private S3 Bucket

In [10]:
!aws s3 cp --recursive $data_path/ $s3_private_path_csv/ --exclude "*" --include "new_dataset.csv"

upload: csv/new_dataset.csv to s3://sagemaker-us-east-1-004608622582/module2_data/csv/new_dataset.csv


### Check to see if the files are copied over successfully

In [11]:
!aws s3 ls $s3_private_path_csv/

2024-05-18 03:30:50   14676903 new_dataset.csv


In [12]:
from IPython.core.display import display, HTML

display(
    HTML(
        '<b>Review <a target="blank" href="https://s3.console.aws.amazon.com/s3/buckets/sagemaker-{}-{}/module2_data/?region={}&tab=overview">S3 Bucket</a></b>'.format(
            region, account_id, region
        )
    )
)

  from IPython.core.display import display, HTML


### Store and Close Notebook

In [13]:
%store

Stored variables and their in-db values:
data_path                                         -> '/root/AAI-540/Module2/csv'
ingest_create_athena_db_mod2_passed               -> True
ingest_create_athena_table_csv_passed             -> True
s3_private_path_csv                               -> 's3://sagemaker-us-east-1-004608622582/module2_dat
setup_dependencies_mod2_passed                    -> True
setup_s3_bucket_passed                            -> True


In [14]:
%%html

<p><b>Shutting down your kernel for this notebook to release resources.</b></p>
<button class="sm-command-button" data-commandlinker-command="kernelmenu:shutdown" style="display:none;">Shutdown Kernel</button>
        
<script>
try {
    els = document.getElementsByClassName("sm-command-button");
    els[0].click();
}
catch(err) {
    // NoOp
}    
</script>