In [2]:
from requests import get
from pandas import DataFrame, concat, to_datetime, to_timedelta, json_normalize, notna, isna
from time import time
import numpy as np
from dotenv import load_dotenv
import json, os
from datetime import datetime

ModuleNotFoundError: No module named 'dotenv'

In [None]:
fetchSaleData = True
fetchReturnData = True

lastUpdatedInDays = 1

In [None]:
# current time
currentTime = time()
timeToCheckAgainst = datetime.fromtimestamp(currentTime - lastUpdatedInDays*24*60*60)

if not os.path.exists('../data files'):
    os.makedirs('../data files')

print('Checking for sale data...')
if os.path.exists("../data files/rawSaleData.json"):
    saleDataTime = datetime.fromtimestamp(os.path.getmtime('../data files/rawSaleData.json'))
    print('Found sale data from:', saleDataTime.strftime('%c'))
    
    # if older than 'lastUpdatedInDays' days, fetch again
    if saleDataTime <= timeToCheckAgainst:
        print('Locally saved data is older than {} hours!'.format(lastUpdatedInDays*24))
        print('Have to fetch again...')
        fetchSaleData = True
    else:
        print('Will be using this locally saved data...')
        fetchSaleData = False
else:
    print('No local data found!')
    print('Have to fetch again...')
    fetchSaleData = True

print('Checking for return data...')
if os.path.exists("../data files/rawReturnData.json"):
    returnDataTime = datetime.fromtimestamp(os.path.getmtime('../data files/rawReturnData.json'))
    print('Found return data from:', returnDataTime.strftime('%c'))
    
    # if older than 'lastUpdatedInDays' days, fetch again
    if returnDataTime <= timeToCheckAgainst:
        print('Locally saved data is older than {} hours!'.format(lastUpdatedInDays*24))
        print('Have to fetch again...')
        fetchReturnData = True
    else:
        print('Will be using this locally saved data...')
        fetchReturnData = False
else:
    print('No local data found!')
    print('Have to fetch again...')
    fetchReturnData = True

In [None]:
load_dotenv()

domain = os.getenv("URL")

if fetchSaleData:
    print('Getting Sale data... Please wait!')
    startTime = time()

    # get sale data
    response = get('https://' + domain + 'sale')
    saleData = response.json()

    print('Took', round(time() - startTime, 2), 'seconds to get Sale data.')
    
    # save locally for later reuse
    with open('../data files/rawSaleData.json', 'w') as fp:
        json.dump(saleData, fp, indent=4)
else:
    # use locally saved data
    with open('../data files/rawSaleData.json', 'r') as fp:
        saleData = json.load(fp)

In [None]:
if fetchReturnData:
    print('Getting Return data... Please wait!')
    startTime = time()

    # get return data
    response = get('https://' + domain + 'return')
    returnData = response.json()

    print('Took', round(time() - startTime, 2), 'seconds to get Return data.')
    
    # save locally for later reuse
    with open('../data files/rawReturnData.json', 'w') as fp:
        json.dump(returnData, fp, indent=4)
else:
    # use locally saved data
    with open('../data files/rawReturnData.json', 'r') as fp:
        returnData = json.load(fp)

## Taking Sale Data

In [None]:
# build dataframe
saleDF = DataFrame.from_dict(saleData, orient='columns')

# drop columns
saleDF = saleDF.drop(['scanned_at', 'scanned_at_melbourne_date_time', 'return_id'], axis=1)

# rename
saleDF = saleDF.rename(columns={"scanned_at_melbourne_date": "sale_date", "scanned_at_melbourne_time": "sale_time", "id": "sale_id"})

# merging date and time, converting new column to datetime64 datatype
saleDF['date_time_concat'] = saleDF['sale_date'] + ' ' + saleDF['sale_time']
saleDF['date_time'] = to_datetime(saleDF['date_time_concat'], dayfirst=True)
saleDF = saleDF.drop(['sale_date', 'sale_time', 'date_time_concat'], axis=1)

# filter to get records post 22nd Jan
saleDF = saleDF[saleDF['date_time'] >= '2020/01/22 00:00:00']
saleDF = saleDF.sort_values(by='date_time', ascending = True).reset_index(drop=True)

saleDF.head()

In [None]:
saleDF['week'] =  saleDF.date_time.dt.isocalendar().week
saleDF.head()

In [None]:
groupDF = saleDF.groupby(by=['cup_id', 'week']).size().reset_index(name='occurrence_per_week')
#groupDF.loc[groupDF['occurrence_per_week'] > 1, :]
groupDF.head()

In [None]:
#groupDF.groupby(by='cup_id')['occurrence_per_week'].apply(list).reset_index(name='new')
df = groupDF.pivot(index='cup_id', columns='week', values='occurrence_per_week').reset_index()
df.columns.name = None
df.head()

In [None]:
jsonObject = {}
nodesArray = []
linksArray = []

def updateNode(flag, weekStart, weekEnd=0):
            
    # update node for a cup's single use in all weeks
    if flag == 0:
        # find the existing node
        for index in range(len(nodesArray)):
            node = nodesArray[index]
            if node['week'] == weekStart:
                nodesArray[index]['single'] += 1
                break

    # update node for a cup's use in the same week (i.e used 2, 3 or more times in that week)
    elif flag == 1:
        # find the existing node
        for index in range(len(nodesArray)):
            node = nodesArray[index]
            if node['week'] == weekStart:
                # if not the first time, add 1
                if node['self'] != 0:
                    nodesArray[index]['self'] += 1
                    
                # if first time, add 2
                else:
                    nodesArray[index]['self'] += 2
                break

    # update nodes for a cup's use in 'start' and 'end' week
    else:
        # find the existing nodes
        for index in range(len(nodesArray)):
            node = nodesArray[index]
            if node['week'] == weekStart or node['week'] == weekEnd:
                if 'other' in node:
                    nodesArray[index]['other'] += 1
                else:
                    nodesArray[index]['other'] = 1

In [3]:
def populateNodes(df):
    for node in list(df.columns[1:]):
        nodesArray.append({
            'week': node,
            'single': 0,
            'self': 0,
            'other': 0
        })
        
def populateLinks(df):
    #count = 0
    for index, row in df.iterrows():
        #print(row)
        start = None
        end = None
        for week in list(df.columns[1:]):
            if notna(row[week]):
                for uses in range(0, int(row[week])):
                    if start == None:
                        start = week
                    else:
                        end = week

                        # flag
                        found = False

                        # search for an existing link between start and end
                        for index in range(len(linksArray)):
                            link = linksArray[index]

                            # if link found, update count
                            if link['source'] == start and link['target'] == end:
                                found = True
                                linksArray[index]['count'] += 1
                                break

                        # if link is not found, create new link
                        if found == False:
                            if start != end:
                                linksArray.append({
                                    'source': start,
                                    'target': end,
                                    'count' : 1
                                })
                            else:
                                linksArray.append({
                                    'source': start,
                                    'target': end,
                                    'count' : 2
                                })

                        if start == end:
                            # update node with week = start for self use denoted by flag = 1
                            updateNode(1, start)
                        else:
                            # update both nodes with week = start and week = end for 'start' (in past) week use 
                            # denoted by flag = 2
                            updateNode(2, start, end)

                        start = week
        if end == None:
            # update node with week = start for single use denoted by flag = 0
            updateNode(0, start)

        #count += 1
        #if count == 10:
        #  break
#print('nodes', nodesArray)
#print('links', linksArray)
#print('singleuse', singleUse)

In [4]:
populateNodes(df)

populateLinks(df)

NameError: name 'df' is not defined

In [None]:
# sorting in descending order so that bigger arc widths dont obscure smaller arc widths
linksArray = sorted(linksArray, key=lambda d: (-d['count']))

In [None]:
jsonObject['nodes'] = nodesArray
jsonObject['links'] = linksArray

with open('../data files/arcSaleData.json', 'w') as fp:
    json.dump(jsonObject, fp, indent=4)

## Taking Return Data

In [None]:
# build dataframe
returnDF = DataFrame.from_dict(json_normalize(returnData), orient='columns')

# dropping columns 
returnDF = returnDF.drop(['scanned_at', 'bin_id', 'cafe_id', 'dishwasher_id', 'id'], axis=1)

# renaming columns
returnDF = returnDF.rename(columns={"scanned_at_melbourne_date_time.date": "return_date", "scanned_at_melbourne_date_time.time": "return_time"})

# merging date and time, converting new column to datetime64 datatype
returnDF['date_time_concat'] = returnDF['return_date'] + ' ' + returnDF['return_time']
returnDF['date_time'] = to_datetime(returnDF['date_time_concat'], dayfirst=True)
returnDF = returnDF.drop(['return_date', 'return_time', 'date_time_concat'], axis=1)

# filter to get records post 22nd Jan
returnDF = returnDF[returnDF['date_time'] >= '2020/01/22 00:00:00']
returnDF = returnDF.sort_values(by='date_time', ascending = True)

returnDF.head()

In [None]:
returnDF['week'] =  returnDF.date_time.dt.isocalendar().week
returnDF.head()

In [None]:
groupDF = returnDF.groupby(by=['cup_id', 'week']).size().reset_index(name='occurrence_per_week')
#groupDF.loc[groupDF['occurrence_per_week'] > 1, :]
groupDF.head()

In [None]:
#groupDF.groupby(by='cup_id')['occurrence_per_week'].apply(list).reset_index(name='new')
df = groupDF.pivot(index='cup_id', columns='week', values='occurrence_per_week').reset_index()
df.columns.name = None
df.head()

In [None]:
jsonObject = {}
nodesArray = []
linksArray = []

populateNodes(df)

populateLinks(df)

In [None]:
# sorting in descending order so that bigger arc widths dont obscure smaller arc widths
linksArray = sorted(linksArray, key=lambda d: (-d['count']))

In [None]:
jsonObject['nodes'] = nodesArray
jsonObject['links'] = linksArray

with open('../data files/arcReturnData.json', 'w') as fp:
    json.dump(jsonObject, fp, indent=4)