In [1]:
#We need to construct a global dataframe object where we can query everything

In [2]:
import requests
import json
from urllib.parse import urlparse, urlsplit
import pandas as pd
pd.set_option('display.max_rows', None)

In [3]:
def generate_output_dict(url_list):
    output = {}
    for url in url_list:
        parsed_url = urlparse(url)
        key = parsed_url.path.split('/')[-1]
        output[key] = {
            "url": url,
            "data": None
        }
    return output

#Adding the invoice-lines and payment-allocations
url_list = [
    "https://hackathon.syftanalytics.com/api/contacts",
    "https://hackathon.syftanalytics.com/api/item",
    "https://hackathon.syftanalytics.com/api/invoice",
    "https://hackathon.syftanalytics.com/api/payment",
    "https://hackathon.syftanalytics.com/api/invoice-lines"
]

output = generate_output_dict(url_list)
headers = {'x-api-key': 'e6506999-8738-4866-a13f-2a2cfb14ba99'}

# Iterating through the URLs and getting the data back
for key in output:
    response = requests.get(output[key]['url'], headers=headers)
    
    # Check if the request was successful
    if response.status_code == 200:
        json_data = response.json()["data"]
        # Add the data of each URL to the output dictionary
        output[key]['data'] = json_data
        #print(f"Data for {key}:")
        #print(json.dumps(json_data, indent=4))
        
    else:
        print(key)
        print(f"Failed to get data for {key}: {response.content}")

In [4]:
list(output.keys())

['contacts', 'item', 'invoice', 'payment', 'invoice-lines']

In [5]:
#Creating dataframes for each of the elements we want to analyse
contacts_df = pd.DataFrame(output["contacts"]["data"]) 
item_df = pd.DataFrame(output["item"]["data"])
invoice_df = pd.DataFrame(output["invoice"]["data"])
payment_df = pd.DataFrame(output["payment"]["data"])
invoice_info_df = pd.DataFrame(output["invoice-lines"]["data"])

Database amalgamation

In [8]:
#Joining the invoice info to the invoice df => their common columns are id and invoice_id
#creating a column called invoice_id in invoice df to match with the info df
invoice_df["invoice_id"] = invoice_df["id"]

#doing the match based on the common columns
invoice_merged = pd.merge(invoice_df, invoice_info_df, on='invoice_id', how='outer')#using outer so that the one's that do not match are kept

In [9]:
invoice_merged.head()

Unnamed: 0,id_x,issue_date,due_date,paid_date,paid,contact_id,total_x,amount_due,exchange_rate,currency,is_sale,invoice_id,id_y,description,item_code,total_y,quantity
0,90381d5a-9348-4208-915c-60b082378370,2021-01-31T00:00:00.000Z,2021-02-06T00:00:00.000Z,2021-02-06T00:00:00.000Z,True,8e81fbc4-4f33-4ae9-bf5e-a2415372e77b,174.8,0.0,1.0,GBP,True,90381d5a-9348-4208-915c-60b082378370,a90834f9-90b5-4342-9cb6-f8d45b35fd7e,Rental income,,174.8,1
1,e4f53ac4-3aa8-4332-9082-c6551e09d90b,2021-02-23T00:00:00.000Z,2021-02-28T00:00:00.000Z,,False,e0857d09-69cc-4608-83b8-7e2083e678a3,10000.0,0.0,1.0,GBP,True,e4f53ac4-3aa8-4332-9082-c6551e09d90b,0163c3f0-97e7-4309-9056-0d1d745db656,Purchase of building,,10000.0,1
2,3dec381c-7af5-436d-8b35-2d3a69b95f89,2021-02-28T00:00:00.000Z,2021-03-06T00:00:00.000Z,2021-03-06T00:00:00.000Z,True,8e81fbc4-4f33-4ae9-bf5e-a2415372e77b,174.8,0.0,1.0,GBP,True,3dec381c-7af5-436d-8b35-2d3a69b95f89,3554bf7d-bd10-449c-b05a-5a5e478bab73,Rental income,,174.8,1
3,aca26d49-abcb-4320-ac50-e0e613e34393,2021-03-07T00:00:00.000Z,2021-03-25T00:00:00.000Z,2022-04-05T00:00:00.000Z,True,cf2b417f-352e-486f-a5e0-a32cbbfa0826,848.346016,0.0,1.9317,NZD,True,aca26d49-abcb-4320-ac50-e0e613e34393,6299f20b-baf1-46f6-9c87-531ef2a91cda,Baking Lessons,,848.346016,1
4,d1c5eee6-9b87-46ed-a153-0a344f242675,2021-03-14T00:00:00.000Z,2021-04-30T00:00:00.000Z,2021-04-30T00:00:00.000Z,True,d7d2b5fc-32e7-4011-94a4-2c8aa0ff9e4f,1725.0,0.0,1.0,GBP,True,d1c5eee6-9b87-46ed-a153-0a344f242675,3e2ee452-d7c7-4bf5-95ba-c0aae26815a6,Cinderella Cupcakes,CIN001,1725.0,100


In [12]:
#Fixing the columns that have been renamed
invoice_merged = invoice_merged.rename(columns={"id_x":"id_invoice", "id_y":"id_invoice_info", "total_x":"total"})

In [13]:
#dropping the total_y because it is a repeat of the total
invoice_merged.drop(["total_y"], axis=1, inplace=True)

In [14]:
invoice_merged.head()

Unnamed: 0,id_invoice,issue_date,due_date,paid_date,paid,contact_id,total,amount_due,exchange_rate,currency,is_sale,invoice_id,id_invoice_info,description,item_code,quantity
0,90381d5a-9348-4208-915c-60b082378370,2021-01-31T00:00:00.000Z,2021-02-06T00:00:00.000Z,2021-02-06T00:00:00.000Z,True,8e81fbc4-4f33-4ae9-bf5e-a2415372e77b,174.8,0.0,1.0,GBP,True,90381d5a-9348-4208-915c-60b082378370,a90834f9-90b5-4342-9cb6-f8d45b35fd7e,Rental income,,1
1,e4f53ac4-3aa8-4332-9082-c6551e09d90b,2021-02-23T00:00:00.000Z,2021-02-28T00:00:00.000Z,,False,e0857d09-69cc-4608-83b8-7e2083e678a3,10000.0,0.0,1.0,GBP,True,e4f53ac4-3aa8-4332-9082-c6551e09d90b,0163c3f0-97e7-4309-9056-0d1d745db656,Purchase of building,,1
2,3dec381c-7af5-436d-8b35-2d3a69b95f89,2021-02-28T00:00:00.000Z,2021-03-06T00:00:00.000Z,2021-03-06T00:00:00.000Z,True,8e81fbc4-4f33-4ae9-bf5e-a2415372e77b,174.8,0.0,1.0,GBP,True,3dec381c-7af5-436d-8b35-2d3a69b95f89,3554bf7d-bd10-449c-b05a-5a5e478bab73,Rental income,,1
3,aca26d49-abcb-4320-ac50-e0e613e34393,2021-03-07T00:00:00.000Z,2021-03-25T00:00:00.000Z,2022-04-05T00:00:00.000Z,True,cf2b417f-352e-486f-a5e0-a32cbbfa0826,848.346016,0.0,1.9317,NZD,True,aca26d49-abcb-4320-ac50-e0e613e34393,6299f20b-baf1-46f6-9c87-531ef2a91cda,Baking Lessons,,1
4,d1c5eee6-9b87-46ed-a153-0a344f242675,2021-03-14T00:00:00.000Z,2021-04-30T00:00:00.000Z,2021-04-30T00:00:00.000Z,True,d7d2b5fc-32e7-4011-94a4-2c8aa0ff9e4f,1725.0,0.0,1.0,GBP,True,d1c5eee6-9b87-46ed-a153-0a344f242675,3e2ee452-d7c7-4bf5-95ba-c0aae26815a6,Cinderella Cupcakes,CIN001,100


In [None]:
#Now merging the contacts in the common column is contacts
#creating the common column in the contacts_df
contacts_df["contact_id"] = contacts_df["id"]
contacts_merged = pd.merge(invoice_merged, contacts_df, on='contact_id', how='outer')

In [15]:
contacts_df.head()

Unnamed: 0,id,name,is_supplier,is_customer,email,phone
0,b422b740-54e0-405f-94fc-f5dd32b55e91,Frozen King,False,False,,
1,d7d2b5fc-32e7-4011-94a4-2c8aa0ff9e4f,Cindarella,False,True,,
2,e0857d09-69cc-4608-83b8-7e2083e678a3,Maleficent,True,True,,
3,8e81fbc4-4f33-4ae9-bf5e-a2415372e77b,Snow White,False,True,,
4,94589e82-ecdf-4c7b-9896-f5e54b8d95af,Ariel new,False,True,ariel@mail.com,


In [11]:
invoice_info_df.head()

Unnamed: 0,id,invoice_id,description,item_code,total,quantity
0,a90834f9-90b5-4342-9cb6-f8d45b35fd7e,90381d5a-9348-4208-915c-60b082378370,Rental income,,174.8,1
1,0163c3f0-97e7-4309-9056-0d1d745db656,e4f53ac4-3aa8-4332-9082-c6551e09d90b,Purchase of building,,10000.0,1
2,3554bf7d-bd10-449c-b05a-5a5e478bab73,3dec381c-7af5-436d-8b35-2d3a69b95f89,Rental income,,174.8,1
3,6299f20b-baf1-46f6-9c87-531ef2a91cda,aca26d49-abcb-4320-ac50-e0e613e34393,Baking Lessons,,848.346016,1
4,3e2ee452-d7c7-4bf5-95ba-c0aae26815a6,d1c5eee6-9b87-46ed-a153-0a344f242675,Cinderella Cupcakes,CIN001,1725.0,100


In [10]:
invoice_df.head()

Unnamed: 0,id,issue_date,due_date,paid_date,paid,contact_id,total,amount_due,exchange_rate,currency,is_sale,invoice_id
0,90381d5a-9348-4208-915c-60b082378370,2021-01-31T00:00:00.000Z,2021-02-06T00:00:00.000Z,2021-02-06T00:00:00.000Z,True,8e81fbc4-4f33-4ae9-bf5e-a2415372e77b,174.8,0.0,1.0,GBP,True,90381d5a-9348-4208-915c-60b082378370
1,e4f53ac4-3aa8-4332-9082-c6551e09d90b,2021-02-23T00:00:00.000Z,2021-02-28T00:00:00.000Z,,False,e0857d09-69cc-4608-83b8-7e2083e678a3,10000.0,0.0,1.0,GBP,True,e4f53ac4-3aa8-4332-9082-c6551e09d90b
2,3dec381c-7af5-436d-8b35-2d3a69b95f89,2021-02-28T00:00:00.000Z,2021-03-06T00:00:00.000Z,2021-03-06T00:00:00.000Z,True,8e81fbc4-4f33-4ae9-bf5e-a2415372e77b,174.8,0.0,1.0,GBP,True,3dec381c-7af5-436d-8b35-2d3a69b95f89
3,aca26d49-abcb-4320-ac50-e0e613e34393,2021-03-07T00:00:00.000Z,2021-03-25T00:00:00.000Z,2022-04-05T00:00:00.000Z,True,cf2b417f-352e-486f-a5e0-a32cbbfa0826,848.346016,0.0,1.9317,NZD,True,aca26d49-abcb-4320-ac50-e0e613e34393
4,d1c5eee6-9b87-46ed-a153-0a344f242675,2021-03-14T00:00:00.000Z,2021-04-30T00:00:00.000Z,2021-04-30T00:00:00.000Z,True,d7d2b5fc-32e7-4011-94a4-2c8aa0ff9e4f,1725.0,0.0,1.0,GBP,True,d1c5eee6-9b87-46ed-a153-0a344f242675


In [17]:
invoice_df["invoice_id"] = invoice_df["id"]

In [18]:
merged_df = pd.merge(invoice_df, invoice_info_df, on='invoice_id', how='outer')

In [23]:
merged_df = merged_df.rename(columns={'id_x': 'id_invoice_df', 'id_y':'id_invoice_info'})

In [27]:
merged_df

Unnamed: 0,id_invoice_df,issue_date,due_date,paid_date,paid,contact_id,total_x,amount_due,exchange_rate,currency,is_sale,invoice_id,id_invoice_info,description,item_code,total_y,quantity
0,90381d5a-9348-4208-915c-60b082378370,2021-01-31T00:00:00.000Z,2021-02-06T00:00:00.000Z,2021-02-06T00:00:00.000Z,True,8e81fbc4-4f33-4ae9-bf5e-a2415372e77b,174.8,0.0,1.0,GBP,True,90381d5a-9348-4208-915c-60b082378370,a90834f9-90b5-4342-9cb6-f8d45b35fd7e,Rental income,,174.8,1
1,e4f53ac4-3aa8-4332-9082-c6551e09d90b,2021-02-23T00:00:00.000Z,2021-02-28T00:00:00.000Z,,False,e0857d09-69cc-4608-83b8-7e2083e678a3,10000.0,0.0,1.0,GBP,True,e4f53ac4-3aa8-4332-9082-c6551e09d90b,0163c3f0-97e7-4309-9056-0d1d745db656,Purchase of building,,10000.0,1
2,3dec381c-7af5-436d-8b35-2d3a69b95f89,2021-02-28T00:00:00.000Z,2021-03-06T00:00:00.000Z,2021-03-06T00:00:00.000Z,True,8e81fbc4-4f33-4ae9-bf5e-a2415372e77b,174.8,0.0,1.0,GBP,True,3dec381c-7af5-436d-8b35-2d3a69b95f89,3554bf7d-bd10-449c-b05a-5a5e478bab73,Rental income,,174.8,1
3,aca26d49-abcb-4320-ac50-e0e613e34393,2021-03-07T00:00:00.000Z,2021-03-25T00:00:00.000Z,2022-04-05T00:00:00.000Z,True,cf2b417f-352e-486f-a5e0-a32cbbfa0826,848.346016,0.0,1.9317,NZD,True,aca26d49-abcb-4320-ac50-e0e613e34393,6299f20b-baf1-46f6-9c87-531ef2a91cda,Baking Lessons,,848.346016,1
4,d1c5eee6-9b87-46ed-a153-0a344f242675,2021-03-14T00:00:00.000Z,2021-04-30T00:00:00.000Z,2021-04-30T00:00:00.000Z,True,d7d2b5fc-32e7-4011-94a4-2c8aa0ff9e4f,1725.0,0.0,1.0,GBP,True,d1c5eee6-9b87-46ed-a153-0a344f242675,3e2ee452-d7c7-4bf5-95ba-c0aae26815a6,Cinderella Cupcakes,CIN001,1725.0,100
5,2f861907-afa3-441b-b523-ff734f1277e5,2021-03-15T00:00:00.000Z,2021-06-06T00:00:00.000Z,2022-04-30T00:00:00.000Z,True,8f65adab-5f53-402a-a2ee-31d458304705,250.537385,0.0,20.6556,ZAR,True,2f861907-afa3-441b-b523-ff734f1277e5,cacfe47f-9cc9-4a66-b2db-ea88842f6577,Frozen cupcakes,FR001,250.537385,10
6,2f47cd73-4c05-475f-931a-86b0a7f976fb,2021-03-16T00:00:00.000Z,2021-06-02T00:00:00.000Z,2021-04-09T00:00:00.000Z,True,a9797cb2-4805-4528-bbcb-fdb94368bee6,267.808627,0.0,1.93235,NZD,True,2f47cd73-4c05-475f-931a-86b0a7f976fb,4c29aaac-522d-426b-ae7d-f824df790720,Frozen cupcakes,FR001,267.808627,10
7,0222b396-f017-4a90-ad5b-4a8e241d4afc,2021-03-31T00:00:00.000Z,2021-04-06T00:00:00.000Z,2021-04-06T00:00:00.000Z,True,8e81fbc4-4f33-4ae9-bf5e-a2415372e77b,174.8,0.0,1.0,GBP,True,0222b396-f017-4a90-ad5b-4a8e241d4afc,1e35c6e6-456d-4dae-9d7f-c4ee6b15c06e,Rental income,,174.8,1
8,0a84fa97-263c-4186-add7-1a56bd59e71c,2021-04-07T00:00:00.000Z,2021-04-25T00:00:00.000Z,2022-04-25T00:00:00.000Z,True,cf2b417f-352e-486f-a5e0-a32cbbfa0826,836.425535,0.0,1.95923,NZD,True,0a84fa97-263c-4186-add7-1a56bd59e71c,91eaeafb-fff6-4ffa-b4cd-807016e0e94f,Baking Lessons,,836.425535,1
9,4b7eb893-c128-4c15-ba05-1bda2e1b9dfa,2021-04-14T00:00:00.000Z,2021-05-30T00:00:00.000Z,2021-05-30T00:00:00.000Z,True,d7d2b5fc-32e7-4011-94a4-2c8aa0ff9e4f,1725.0,0.0,1.0,GBP,True,4b7eb893-c128-4c15-ba05-1bda2e1b9dfa,23b1b727-e39d-453a-b36c-48457b71712e,Cinderella Cupcakes,CIN001,1725.0,100


In [None]:
contacts_df["contact_id"] = contacts_df[]

In [26]:
merged_contacts = pd.merge(merged_df, contacts_df, on='invoice_id', how='outer')

KeyError: 'invoice_id'

In [25]:
contacts_df.head()

Unnamed: 0,id,name,is_supplier,is_customer,email,phone
0,b422b740-54e0-405f-94fc-f5dd32b55e91,Frozen King,False,False,,
1,d7d2b5fc-32e7-4011-94a4-2c8aa0ff9e4f,Cindarella,False,True,,
2,e0857d09-69cc-4608-83b8-7e2083e678a3,Maleficent,True,True,,
3,8e81fbc4-4f33-4ae9-bf5e-a2415372e77b,Snow White,False,True,,
4,94589e82-ecdf-4c7b-9896-f5e54b8d95af,Ariel new,False,True,ariel@mail.com,
