In [4]:
import pandas as pd
import uuid
import random
from datetime import timedelta

In [5]:
open_orders = dict()
event_types = ['order_created', 'order_fulfilled']
periods = [ts for ts in pd.date_range(start='1/1/2008', end='31/12/2018') if random.random()>0.8]

In [6]:
event_types

['order_created', 'order_fulfilled']

In [7]:
def new_random_order():
    return {
        'order_id': uuid.uuid4(),
        'amount': random.random() * 1000,
        'created_at': random.choice(periods)
    } 

In [8]:
def new_random_event():
    event_type = random.choice(event_types)
    order_info = new_random_order()
    if event_type=='order_created':
        open_orders[order_info['order_id']] = order_info
    else:
        if len(open_orders):
            order_id = random.choice(list(open_orders.keys()))
            order_info = open_orders[order_id]
            order_info['created_at'] = order_info['created_at'] + timedelta(random.randrange(0,180))
        open_orders.pop(order_info['order_id'], None)
        
    return {
        "event_id": uuid.uuid4(),
        "event_type": event_type,
        "order_id": order_info['order_id'],
        "amount": order_info['amount'],
        "created_at": order_info['created_at']
    }

In [9]:
df = pd.DataFrame([new_random_event() for _ in range(0, 100)])
df

Unnamed: 0,event_id,event_type,order_id,amount,created_at
0,073c1e0d-4e7d-4783-a298-9b7a9ca9cbb3,order_created,8c86f099-d7fb-4952-b97f-fec68230da6c,747.058475,2011-08-07
1,b6602479-7301-4180-8842-0d71588c2f9a,order_fulfilled,8c86f099-d7fb-4952-b97f-fec68230da6c,747.058475,2011-08-25
2,0ddc9125-d894-44b6-b1c5-b02b8dc4f177,order_created,71a9ad8a-ca7f-4911-a90f-109413665ff2,482.346631,2014-02-27
3,d6aab0b6-1a5d-4404-8e69-95410f9e977c,order_created,49d63ac9-9ac4-4edb-a316-6a4a48f414f5,966.229322,2010-01-14
4,82978e9a-8195-4a61-a1fe-9a79207abb1d,order_fulfilled,49d63ac9-9ac4-4edb-a316-6a4a48f414f5,966.229322,2010-01-22
...,...,...,...,...,...
95,1a068838-9fe4-414e-8b1c-6032e530153a,order_fulfilled,d9091576-5731-4e0d-a9bc-a0c06243125e,836.858404,2013-10-02
96,1089172b-5991-45d2-8abf-5a9422afe42d,order_created,9c01d490-b5f6-444e-9bf6-ca07ee3c0cc6,981.150079,2008-11-04
97,76f57da5-c71d-4915-bbd0-316053baee88,order_created,2e34b383-018d-4687-8a69-c6d5618d3db2,360.784547,2014-08-03
98,494aa0e7-6c1a-4e94-9e67-2480a5d2b154,order_fulfilled,2e34b383-018d-4687-8a69-c6d5618d3db2,360.784547,2014-09-17


## QUESTION ONE

In [10]:
(df['event_type'] == 'order_created')

0      True
1     False
2      True
3      True
4     False
      ...  
95    False
96     True
97     True
98    False
99    False
Name: event_type, Length: 100, dtype: bool

In [11]:
order_created = (df['event_type'] == 'order_created')

In [12]:
created_orders = df[order_created]
created_orders

Unnamed: 0,event_id,event_type,order_id,amount,created_at
0,073c1e0d-4e7d-4783-a298-9b7a9ca9cbb3,order_created,8c86f099-d7fb-4952-b97f-fec68230da6c,747.058475,2011-08-07
2,0ddc9125-d894-44b6-b1c5-b02b8dc4f177,order_created,71a9ad8a-ca7f-4911-a90f-109413665ff2,482.346631,2014-02-27
3,d6aab0b6-1a5d-4404-8e69-95410f9e977c,order_created,49d63ac9-9ac4-4edb-a316-6a4a48f414f5,966.229322,2010-01-14
9,3e8fd2a5-b24b-402f-a1b0-8d616954c43e,order_created,1c2b399c-990e-47cb-9dbc-fdbc6c22b18d,319.676773,2014-04-11
10,1f2502b2-71fd-4c53-a7c0-0548110e6001,order_created,b0e14f60-f8d5-4a4a-990f-e8d98346648c,954.482188,2017-10-25
12,1a501b62-2b6b-4785-ab1d-372f84a81172,order_created,f679b4ed-9588-4c48-80b2-f6d84229bbe6,101.610171,2018-10-13
13,577b5573-3174-4c5f-9cee-782d41729a43,order_created,d77302fc-b8dd-4868-9cdd-b70a628dd294,27.730124,2018-09-14
14,2cb6edfa-7418-492b-9600-0f00edb665d0,order_created,0697711f-62c8-46fb-8e93-a15520900045,661.882347,2010-11-05
15,0641b079-8b0d-4f28-85a1-172b77caa8dd,order_created,17d8ce37-31a1-40dc-89f8-901ac0319037,241.833713,2009-04-28
16,bf5ebfd0-4599-44e4-8d65-882bbc52ba22,order_created,da233135-9061-45a4-80f8-3c86c9c7dfa7,13.329982,2015-02-24


In [13]:
(df['event_type'] == 'order_fulfilled')

0     False
1      True
2     False
3     False
4      True
      ...  
95     True
96    False
97    False
98     True
99     True
Name: event_type, Length: 100, dtype: bool

In [14]:
order_fulfilled = (df['event_type'] == 'order_fulfilled')

In [15]:
fulfilled_order = df[order_fulfilled]

In [16]:
fulfilled_order ['created_at'].value_counts()
fulfilled_order

Unnamed: 0,event_id,event_type,order_id,amount,created_at
1,b6602479-7301-4180-8842-0d71588c2f9a,order_fulfilled,8c86f099-d7fb-4952-b97f-fec68230da6c,747.058475,2011-08-25
4,82978e9a-8195-4a61-a1fe-9a79207abb1d,order_fulfilled,49d63ac9-9ac4-4edb-a316-6a4a48f414f5,966.229322,2010-01-22
5,cf4ebf95-7e40-434c-a4f7-1a5e4a377f0d,order_fulfilled,71a9ad8a-ca7f-4911-a90f-109413665ff2,482.346631,2014-08-03
6,52f6f242-6795-4a25-b565-32f7af5a06ea,order_fulfilled,35ac45f5-cd56-4ce3-8346-266444c12b58,861.064494,2012-07-07
7,4ecf8b06-f966-459d-8e38-723ab5520596,order_fulfilled,b91a3e27-656d-40bf-a332-74df7c0685d3,181.821778,2014-06-13
8,438d8843-0bf9-480f-b8d6-d6badf6f6644,order_fulfilled,1fbe23a3-5654-43bd-8ab6-2f144714bae1,697.024006,2016-01-10
11,a7f1d9c9-9e7e-49e6-92a0-64e3092c2a33,order_fulfilled,1c2b399c-990e-47cb-9dbc-fdbc6c22b18d,319.676773,2014-04-23
21,7121371d-fbf0-42fb-b1e9-2847595e6107,order_fulfilled,5bddcf68-22db-478a-9873-623e52dee147,868.040937,2009-06-02
22,170092e4-5231-4704-a95a-43baafed6689,order_fulfilled,b0e14f60-f8d5-4a4a-990f-e8d98346648c,954.482188,2018-04-18
24,59e946a9-ef3a-4eaa-86f5-96ca7ffbce43,order_fulfilled,3a02acae-3f8f-416f-b9d2-29755fc5ee37,400.82016,2017-03-12


In [17]:
orders_created_fulfilled= pd.merge(created_orders, fulfilled_order, on='order_id')
orders_created_fulfilled

Unnamed: 0,event_id_x,event_type_x,order_id,amount_x,created_at_x,event_id_y,event_type_y,amount_y,created_at_y
0,073c1e0d-4e7d-4783-a298-9b7a9ca9cbb3,order_created,8c86f099-d7fb-4952-b97f-fec68230da6c,747.058475,2011-08-07,b6602479-7301-4180-8842-0d71588c2f9a,order_fulfilled,747.058475,2011-08-25
1,0ddc9125-d894-44b6-b1c5-b02b8dc4f177,order_created,71a9ad8a-ca7f-4911-a90f-109413665ff2,482.346631,2014-02-27,cf4ebf95-7e40-434c-a4f7-1a5e4a377f0d,order_fulfilled,482.346631,2014-08-03
2,d6aab0b6-1a5d-4404-8e69-95410f9e977c,order_created,49d63ac9-9ac4-4edb-a316-6a4a48f414f5,966.229322,2010-01-14,82978e9a-8195-4a61-a1fe-9a79207abb1d,order_fulfilled,966.229322,2010-01-22
3,3e8fd2a5-b24b-402f-a1b0-8d616954c43e,order_created,1c2b399c-990e-47cb-9dbc-fdbc6c22b18d,319.676773,2014-04-11,a7f1d9c9-9e7e-49e6-92a0-64e3092c2a33,order_fulfilled,319.676773,2014-04-23
4,1f2502b2-71fd-4c53-a7c0-0548110e6001,order_created,b0e14f60-f8d5-4a4a-990f-e8d98346648c,954.482188,2017-10-25,170092e4-5231-4704-a95a-43baafed6689,order_fulfilled,954.482188,2018-04-18
5,1a501b62-2b6b-4785-ab1d-372f84a81172,order_created,f679b4ed-9588-4c48-80b2-f6d84229bbe6,101.610171,2018-10-13,d933f05e-9515-4cc0-9dee-45e221762a04,order_fulfilled,101.610171,2018-12-14
6,577b5573-3174-4c5f-9cee-782d41729a43,order_created,d77302fc-b8dd-4868-9cdd-b70a628dd294,27.730124,2018-09-14,b38b3630-6456-448f-97d3-01bc9dd70dbf,order_fulfilled,27.730124,2018-11-11
7,2cb6edfa-7418-492b-9600-0f00edb665d0,order_created,0697711f-62c8-46fb-8e93-a15520900045,661.882347,2010-11-05,89d82033-10d4-4828-a766-85f3366b5a8d,order_fulfilled,661.882347,2011-02-26
8,0641b079-8b0d-4f28-85a1-172b77caa8dd,order_created,17d8ce37-31a1-40dc-89f8-901ac0319037,241.833713,2009-04-28,b3b8ceaf-42c5-471d-96c4-9b7fd50bf123,order_fulfilled,241.833713,2009-07-31
9,bf5ebfd0-4599-44e4-8d65-882bbc52ba22,order_created,da233135-9061-45a4-80f8-3c86c9c7dfa7,13.329982,2015-02-24,6f57a8d9-f814-4666-9fff-37cecaa058c6,order_fulfilled,13.329982,2015-08-07


In [18]:
orders_created_fulfilled_merged = orders_created_fulfilled [['order_id',  'created_at_x',  'created_at_y']]

In [19]:
df2 = orders_created_fulfilled_merged
df2

Unnamed: 0,order_id,created_at_x,created_at_y
0,8c86f099-d7fb-4952-b97f-fec68230da6c,2011-08-07,2011-08-25
1,71a9ad8a-ca7f-4911-a90f-109413665ff2,2014-02-27,2014-08-03
2,49d63ac9-9ac4-4edb-a316-6a4a48f414f5,2010-01-14,2010-01-22
3,1c2b399c-990e-47cb-9dbc-fdbc6c22b18d,2014-04-11,2014-04-23
4,b0e14f60-f8d5-4a4a-990f-e8d98346648c,2017-10-25,2018-04-18
5,f679b4ed-9588-4c48-80b2-f6d84229bbe6,2018-10-13,2018-12-14
6,d77302fc-b8dd-4868-9cdd-b70a628dd294,2018-09-14,2018-11-11
7,0697711f-62c8-46fb-8e93-a15520900045,2010-11-05,2011-02-26
8,17d8ce37-31a1-40dc-89f8-901ac0319037,2009-04-28,2009-07-31
9,da233135-9061-45a4-80f8-3c86c9c7dfa7,2015-02-24,2015-08-07


In [20]:
df2['delay'] = df2['created_at_y'] - df2['created_at_x']
df2

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['delay'] = df2['created_at_y'] - df2['created_at_x']


Unnamed: 0,order_id,created_at_x,created_at_y,delay
0,8c86f099-d7fb-4952-b97f-fec68230da6c,2011-08-07,2011-08-25,18 days
1,71a9ad8a-ca7f-4911-a90f-109413665ff2,2014-02-27,2014-08-03,157 days
2,49d63ac9-9ac4-4edb-a316-6a4a48f414f5,2010-01-14,2010-01-22,8 days
3,1c2b399c-990e-47cb-9dbc-fdbc6c22b18d,2014-04-11,2014-04-23,12 days
4,b0e14f60-f8d5-4a4a-990f-e8d98346648c,2017-10-25,2018-04-18,175 days
5,f679b4ed-9588-4c48-80b2-f6d84229bbe6,2018-10-13,2018-12-14,62 days
6,d77302fc-b8dd-4868-9cdd-b70a628dd294,2018-09-14,2018-11-11,58 days
7,0697711f-62c8-46fb-8e93-a15520900045,2010-11-05,2011-02-26,113 days
8,17d8ce37-31a1-40dc-89f8-901ac0319037,2009-04-28,2009-07-31,94 days
9,da233135-9061-45a4-80f8-3c86c9c7dfa7,2015-02-24,2015-08-07,164 days


In [21]:
df3= df2.sort_values(by = 'delay') [:9]
df3

Unnamed: 0,order_id,created_at_x,created_at_y,delay
18,0dd5bfed-a7d5-4cc6-b5dc-8a6b38adedf2,2018-05-04,2018-05-11,7 days
2,49d63ac9-9ac4-4edb-a316-6a4a48f414f5,2010-01-14,2010-01-22,8 days
24,413bed8e-978a-43ee-b139-affb29b7c919,2016-11-24,2016-12-02,8 days
3,1c2b399c-990e-47cb-9dbc-fdbc6c22b18d,2014-04-11,2014-04-23,12 days
38,2d42ff5f-4963-46c0-b435-456f5ba51136,2014-08-24,2014-09-07,14 days
0,8c86f099-d7fb-4952-b97f-fec68230da6c,2011-08-07,2011-08-25,18 days
11,2ad72f89-1431-4ce2-a1f5-26b955be1c97,2013-10-26,2013-11-17,22 days
30,6dbe8831-d2bd-4e08-b020-6e92629e8ea3,2016-09-09,2016-10-04,25 days
36,c8967f0d-5401-4e4f-8af0-7574023a7a53,2012-11-21,2012-12-18,27 days


In [None]:
result = df3.to_dict()

In [None]:
result

In [1]:
result_2 = gt30_df2.groupby(["fulfilled_year", "fulfilled_month"]).agg({"delay": ["mean", "min", "max", "count"]})

NameError: name 'gt30_df2' is not defined