# 

In [5]:
settings = {
    'URL': 'https://tmtgroup.sharepoint.com/sites/ABMDataPlatform',
    'USERNAME': 'user',
    'PASSWORD': 'password',
    'LIST': 'TR_TRIP',
    'S3_BUCKET': 'abm-data-platform-s3-raw'
}

In [6]:
from io import StringIO
import json
import pandas as pd
import boto3
from office365.runtime.auth.authentication_context import AuthenticationContext
from office365.sharepoint.client_context import ClientContext
from office365.runtime.utilities.request_options import RequestOptions
from settings import settings

ctxAuth = AuthenticationContext(url=settings['URL'])
if ctxAuth.acquire_token_for_user(username=settings['USERNAME'], password=settings['PASSWORD']):
    ctx = ClientContext(settings['URL'], ctxAuth)

In [None]:
list_items_all = []

### Function to get field / column on sharepoint list

In [None]:
def sharepoint_get_fields(ctx):
    """Get fields
    Get fields name from a List
    """
    filter = "Hidden eq false and ReadOnlyField eq false and Group eq 'Custom Columns'"
    options = RequestOptions(
        "{0}/_api/lists/getbytitle('TR_TRIP')/fields?$filter={1}".format(settings['URL'], filter))
    options.set_header('Accept', 'application/json')
    options.set_header('Content-Type', 'application/json')
    data = ctx.execute_request_direct(options)
    fields = json.loads(data.content)
    return fields


### Function to get item or row data

In [None]:
def sharepoint_get_item(ctx, url=''):
    """Get items
    Get items from a List
    """
    if url == '':
        url = "{0}/_api/lists/getbytitle('TR_TRIP')/items".format(settings['URL'])
    options = RequestOptions(url)
    options.set_header('Accept', 'application/json')
    options.set_header('Content-Type', 'application/json')
    data = ctx.execute_request_direct(options)
    items = json.loads(data.content)
    for item in items['value']:
        list_items_all.append(item)

    # If list items more than 100 data
    if 'odata.nextLink' in items:
        print('[+] Get next items:', items['odata.nextLink'])
        sharepoint_get_item(ctx, items['odata.nextLink'])
    return list_items_all

### Function to read all data in sharepoint than transform to csv

In [None]:
def sharepoint_to_csv():
    """Export to CSV
    Export items to CSV file
    """
    ctxAuth = AuthenticationContext(url=settings['URL'])
    if ctxAuth.acquire_token_for_user(username=settings['USERNAME'], password=settings['PASSWORD']):
        ctx = ClientContext(settings['URL'], ctxAuth)

        # Populate fields name from List dynamically
        print('[+] Get fields name from a List')
        fieldId = []
        fieldTitle = []
        fields = sharepoint_get_fields(ctx)
        for field in fields['value']:
            fieldId.append(field['EntityPropertyName'])
            fieldTitle.append(field['Title'])

        # Populate data to array
        print('[+] Get items from a List')
        items = sharepoint_get_item(ctx)
        list_items = []
        for item in items:
            list_item = []
            for field in fieldId:
                list_item.append(item[field])
            list_items.append(list_item)

        print('[+] Total items:', len(list_items), ' item(s)')

        # Create pandas dataframe from populated data
        print('[+] Create pandas dataframe to CSV')
        df = pd.DataFrame(list_items, columns=fieldTitle)
        csv_buffer = StringIO()
        df.to_csv(csv_buffer, index=False)

        # Save CSV file to S3
        print('[+] Upload CSV to S3')
        object_url = 'bdd/raw/rawcsv_bdd_ds_tr_trip/TR_TRIP.csv'
        s3_resource = boto3.resource('s3')
        s3_resource.Object(settings['S3_BUCKET'], object_url).put(
            Body=csv_buffer.getvalue())
        print('[+] S3 Object URL:', settings['S3_BUCKET'] + object_url)
    else:
        print(ctxAuth.get_last_error())
    return

# --------------------------------------------------------------------------------------------------------

In [45]:
print(fieldId)

['Title', 'ID_MS_BARGE', 'FLEET_SPOT_NAME', 'SPOT_TUG', 'SPOT_BARGE', 'ID_MS_CUST_TR', 'ID_MS_SERVICE_TR', 'CARGO_NUM', 'TRV_NUM', 'MV_NAME_LOOK', 'DATA_OWNER', 'DATA_DATE', 'POL_TXT', 'POD_TXT', 'VESS_POL_ARR', 'VESS_POL_ASIDE', 'VESS_POL_COMM_LOAD', 'VESS_POL_COMP_LOAD', 'VESS_POL_COFF', 'DOC_POL_OVERHD', 'DOC_POL_ONB', 'VESS_POL_DEP', 'VESS_POD_ARR', 'VESS_POD_ASIDE', 'VESS_POD_COMM_LOAD', 'VESS_POD_COMP_LOAD', 'VESS_POD_COFF', 'VESS_POD_DEP', 'VESS_POL_ARR_2', 'ASSIST_POL_IN', 'ASSIST_POL_OUT', 'ASSIST_POD_IN', 'ASSIST_POD_OUT', 'BUNKER_ROB_2', 'BUNKER_ROB_1', 'COST_AGENCY_NUM', 'COST_DUES_NUM', 'REMARKS_TXT', 'BONUS_TRNSHP_NUM', 'ID_MS_FLEET_TR_x003a_ID_MS_FLEET', 'ID_MS_CUST_TR_x003a_ID_MS_CUST', 'ID_MS_SERVICE_TR_x003a_ID_MS_SER', 'STAT_TXT', 'TOTAL_BUNKER_CONS', 'SAVING_BUNKER', 'VOYAGE_CODE', 'ID_MS_BARGE_x003a_ID_MS_BARGE', 'CHECK_DATEFILL', 'MONTH_NAME', 'YEAR_NAME', 'FLEET_ACTIVITY_STATUS', 'FLEET_LOCATION', 'FLEET_ACTIVITY_DATE', 'STATUS_RC', 'RC_CARGO_VOLUME', 'MV_COMPLET

In [65]:
print(fieldTitle)

['ID_MS_FLEET_TR', 'ID_MS_BARGE', 'FLEET_SPOT_NAME', 'SPOT_TUG', 'SPOT_BARGE', 'ID_MS_CUST_TR', 'ID_MS_SERVICE_TR', 'CARGO_NUM', 'TRV_NUM', 'MV_NAME_LOOK', 'DATA_OWNER', 'DATA_DATE', 'POL_TXT', 'POD_TXT', 'VESS_POL_ARR', 'VESS_POL_ASIDE', 'VESS_POL_COMM_LOAD', 'VESS_POL_COMP_LOAD', 'VESS_POL_COFF', 'DOC_POL_OVERHD', 'DOC_POL_ONB', 'VESS_POL_DEP', 'VESS_POD_ARR', 'VESS_POD_ASIDE', 'VESS_POD_COMM_LOAD', 'VESS_POD_COMP_LOAD', 'VESS_POD_COFF', 'VESS_POD_DEP', 'VESS_POL_ARR_2', 'ASSIST_POL_IN', 'ASSIST_POL_OUT', 'ASSIST_POD_IN', 'ASSIST_POD_OUT', 'BUNKER_ROB_2', 'BUNKER_ROB_1', 'COST_AGENCY_NUM', 'COST_DUES_NUM', 'REMARKS_TXT', 'BONUS_TRNSHP_NUM', 'ID_MS_FLEET_TR:ID_MS_FLEET', 'ID_MS_CUST_TR:ID_MS_CUST', 'ID_MS_SERVICE_TR:ID_MS_SERVICE', 'STAT_TXT', 'TOTAL_BUNKER_CONS', 'SAVING_BUNKER', 'VOYAGE_CODE', 'ID_MS_BARGE:ID_MS_BARGE', 'CHECK_DATEFILL', 'MONTH_NAME', 'YEAR_NAME', 'FLEET_ACTIVITY_STATUS', 'FLEET_LOCATION', 'FLEET_ACTIVITY_DATE', 'STATUS_RC', 'RC_CARGO_VOLUME', 'MV_COMPLETION', 'MV_N

In [53]:
# Get next URL
options = RequestOptions("{0}/_api/lists/getbytitle('TR_TRIP')/items?$top=5".format(settings['URL']))
options.set_header('Accept', 'application/json')
options.set_header('Content-Type', 'application/json')
data = ctx.execute_request_direct(options)
items = json.loads(data.content)
list_items = []
for item in items['value']:
    list_item = []
    for field in fieldId:
        list_item.append(item[field])
    list_items.append(list_item)
print(list_items)

[['KAILI II', 'MOANA II', None, None, None, 'TIA', 'TRANSHIPMENT', 1957.541, 7.0, 'MV. ADRIATICA GRAECA', 'Trophy Geo Disansyah', '2019-01-07T23:00:00Z', 'Bunati Anchorage', 'Bunati Anchorage', None, None, None, None, None, None, None, None, '2018-12-31T19:00:00Z', '2018-12-31T20:40:00Z', '2018-12-31T21:40:00Z', '2019-01-01T01:25:00Z', '2019-01-01T01:35:00Z', '2019-01-01T01:50:00Z', '2019-01-01T23:00:00Z', None, None, 'KAILI I', 'TRISTAN SATU', 19458.0, 21024.0, None, None, 'Rc from mv aifanourios', None, '2.00000000000000', '1.00000000000000', '1.00000000000000', None, 1566.0, 1134.0, None, '2.00000000000000', 1.0, 'JAN', '2019', 'Full Away to Bunati', 'N/A', '22-01-2019 00:15', True, 1957.541, True, '209', 'SITE BUNATI', None, '2018-12-21 15:00:00', None, '69728.2840000000', None, None, None, None, '72800.0000000000', None, 1.0, '2019-01-22T23:15:39Z', 1957.541, 'JAN', '2019', '2019-01-22T23:15:39Z', 'Arya Wirawan', None, False], ['TRISTAN SATU', 'AQUILA SATU', None, None, None, 'TIA

In [21]:
now = datetime.now()
datetime_string = now.strftime("%Y%m%d%H%M%S")
object_url = 'bdd/raw/TR_TRIP_{0}.csv'.format(datetime_string)

s3_resource = boto3.resource('s3')
s3_resource.Object(settings['S3_BUCKET'], object_url).put(Body=csv_buffer.getvalue())
print('Object URL: ', object_url)

Object URL:  bdd/raw/TR_TRIP_20190823140504.csv
