In [1]:
import logging

In [2]:
import boto3
import joblib
import pandas as pd
import s3fs

In [3]:
logging.basicConfig(level=logging.INFO)
# Define one logger for current file, per
# https://www.loggly.com/blog/4-reasons-a-python-logging-library-is-much-better-than-putting-print-statements-everywhere/
LOGGER = logging.getLogger(__name__)

In [4]:
BUCKET_NAME = "stats404-project-jaehee"
KEY_NAME_DATA = "bankrupt_data.csv"
KEY_NAME_MODEL = "cat_model.joblib"
FILE_NAME = "/Users/jeongjaehui/Documents/Stats_404/JEONG-JAEHEE/bankrupt_data.csv"

In [5]:
if __name__ == '__main__':
    ### ---------------------------------------------------------------------------
    ### --- Part 1: Connect to S3 Bucket on AWS
    ### ---------------------------------------------------------------------------
    LOGGER.info("--- Part 1: Connect to S3 Bucket on AWS")

    # Approach 1:
    s3 = boto3.resource('s3')

    # Approach 2:
    # - anon=False: use AWS credentials to connect to file system, not as an anonymous user
    s3_fs = s3fs.S3FileSystem(anon=False)
    LOGGER.info("List of buckets currently available on AWS S3:")
    for bucket in s3.buckets.all():
        if bucket.name == BUCKET_NAME:
            LOGGER.info(f"    {bucket.name}")
    LOGGER.info(f"List of objects in bucket {BUCKET_NAME}:")
    for file in s3.Bucket(BUCKET_NAME).objects.all():
        LOGGER.info(f"    {file.key}")

    ### ---------------------------------------------------------------------------
    ### --- Part 2: Upload CSV File to S3 Bucket
    ### ---------------------------------------------------------------------------
    LOGGER.info("--- Part 2: Upload CSV File to S3 Bucket")

    # --- Create a data set to upload -- or use one for your project:
    LOGGER.info("    Download a dataset for bankruptcy")
    df = pd.read_csv(filepath_or_buffer=FILE_NAME,
                     encoding='latin-1',
                     nrows=6819,
                     index_col=0
                    )

    # --- Specify name of file to be created on s3, to store this CSV:
    LOGGER.info(f"    Uploading file: {KEY_NAME_DATA} to S3 bucket = {BUCKET_NAME}")
    with s3_fs.open(f"{BUCKET_NAME}/{KEY_NAME_DATA}", "w") as file:
        df.to_csv(file)
    LOGGER.info(f"    Uploaded file: {KEY_NAME_DATA} to S3 bucket = {BUCKET_NAME}")
    LOGGER.info(f"List of objects in bucket {BUCKET_NAME} now:")
    for file in s3.Bucket(BUCKET_NAME).objects.all():
        LOGGER.info(f"    {file.key}")

    ### ---------------------------------------------------------------------------
    ### --- Part 3: Upload Model Object to S3 Bucket
    ### ---------------------------------------------------------------------------
    LOGGER.info("--- Part 3: Upload Model Object to S3 Bucket")
    LOGGER.info("    Loading Catboost model object")
    cat_model = joblib.load("cat_model.joblib")

    # --- Specify name of file to be created on s3, to store this model object:
    LOGGER.info(f"    Uploading file: {KEY_NAME_MODEL} to S3 bucket = {BUCKET_NAME}")
    with s3_fs.open(f"{BUCKET_NAME}/{KEY_NAME_MODEL}", "wb") as file:
        joblib.dump(cat_model, file)
    LOGGER.info(f"    Uploaded file: {KEY_NAME_MODEL} to S3 bucket = {BUCKET_NAME}")
    LOGGER.info(f"List of objects in bucket {BUCKET_NAME} now:")
    for file in s3.Bucket(BUCKET_NAME).objects.all():
        LOGGER.info(f"    {file.key}")

INFO:__main__:--- Part 1: Connect to S3 Bucket on AWS
INFO:botocore.credentials:Found credentials in shared credentials file: ~/.aws/credentials
INFO:botocore.credentials:Found credentials in shared credentials file: ~/.aws/credentials
INFO:__main__:List of buckets currently available on AWS S3:
INFO:__main__:    stats404-project-jaehee
INFO:__main__:List of objects in bucket stats404-project-jaehee:
INFO:__main__:    bankrupt_data.csv
INFO:__main__:    cat_model.joblib
INFO:__main__:    new_bankrupt_data.csv
INFO:__main__:--- Part 2: Upload CSV File to S3 Bucket
INFO:__main__:    Download a dataset for bankruptcy
INFO:__main__:    Uploading file: bankrupt_data.csv to S3 bucket = stats404-project-jaehee
INFO:__main__:    Uploaded file: bankrupt_data.csv to S3 bucket = stats404-project-jaehee
INFO:__main__:List of objects in bucket stats404-project-jaehee now:
INFO:__main__:    bankrupt_data.csv
INFO:__main__:    cat_model.joblib
INFO:__main__:    new_bankrupt_data.csv
INFO:__main__:---

In [6]:
df

Unnamed: 0_level_0,ROA(C) before interest and depreciation before interest,ROA(A) before interest and % after tax,ROA(B) before interest and depreciation after tax,operating gross margin,realized sales gross margin,operating profit rate,tax Pre-net interest rate,after-tax net interest rate,non-industry income and expenditure/revenue,continuous interest rate (after tax),...,net income to total assets,total assets to GNP price,No-credit interval,Gross profit to Sales,Net income to stockholder's Equity,liability to equity,Degree of financial leverage (DFL),Interest coverage ratio( Interest expense to EBIT ),one if net income was negative for the last two year zero otherwise,equity to liability
Bankrupt?,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.370594,0.424389,0.405750,0.601457,0.601457,0.998969,0.796887,0.808809,0.302646,0.780985,...,0.716845,0.009219,0.622879,0.601453,0.827890,0.290202,0.026601,0.564050,1,0.016469
1,0.464291,0.538214,0.516730,0.610235,0.610235,0.998946,0.797380,0.809301,0.303556,0.781506,...,0.795297,0.008323,0.623652,0.610237,0.839969,0.283846,0.264577,0.570175,1,0.020794
1,0.426071,0.499019,0.472295,0.601450,0.601364,0.998857,0.796403,0.808388,0.302035,0.780284,...,0.774670,0.040003,0.623841,0.601449,0.836774,0.290189,0.026555,0.563706,1,0.016474
1,0.399844,0.451265,0.457733,0.583541,0.583541,0.998700,0.796967,0.808966,0.303350,0.781241,...,0.739555,0.003252,0.622929,0.583538,0.834697,0.281721,0.026697,0.564663,1,0.023982
1,0.465022,0.538432,0.522298,0.598783,0.598783,0.998973,0.797366,0.809304,0.303475,0.781550,...,0.795016,0.003878,0.623521,0.598782,0.839973,0.278514,0.024752,0.575617,1,0.035490
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,0.493687,0.539468,0.543230,0.604455,0.604462,0.998992,0.797409,0.809331,0.303510,0.781588,...,0.799927,0.000466,0.623620,0.604455,0.840359,0.279606,0.027064,0.566193,1,0.029890
0,0.475162,0.538269,0.524172,0.598308,0.598308,0.998992,0.797414,0.809327,0.303520,0.781586,...,0.799748,0.001959,0.623931,0.598306,0.840306,0.278132,0.027009,0.566018,1,0.038284
0,0.472725,0.533744,0.520638,0.610444,0.610213,0.998984,0.797401,0.809317,0.303512,0.781546,...,0.797778,0.002840,0.624156,0.610441,0.840138,0.275789,0.026791,0.565158,1,0.097649
0,0.506264,0.559911,0.554045,0.607850,0.607850,0.999074,0.797500,0.809399,0.303498,0.781663,...,0.811808,0.002837,0.623957,0.607846,0.841084,0.277547,0.026822,0.565302,1,0.044009
