In [1]:
import pandas as pd
import datetime
import random
import names

In [2]:
def random_date(start, end):
    random_seconds = random.randint(0, int((end - start).total_seconds()))
    return start + datetime.timedelta(seconds=random_seconds)

def generate_from(start, end, interval=3600):
    interval_num = int((end-start).total_seconds()/interval)
    return pd.DataFrame({'From Time': [start+datetime.timedelta(seconds=i*interval) for i in range(interval_num)]})

def generate_to(start, end, interval=3600):
    interval_num = int((end-start).total_seconds()/interval)
    return pd.DataFrame({'To Time': [start+datetime.timedelta(seconds=(i+1)*interval) for i in range(interval_num)]})

In [8]:
def generate_employee(start, end, n):
    name = [names.get_full_name()]*n
    active = [random_date(start, end) for i in range(n)]
    df = pd.DataFrame({'name': name, 'active': active}).sort_values(by='active', ignore_index=True)
    df['from'] = df['active'].apply(lambda x: x.replace(minute=0, second=0))
    df['to'] = df['from'].apply(lambda x: x + datetime.timedelta(hours=1))
    return df

def generate_workforce(start, end, num_employees=10, min=1, max=11):
    data = pd.DataFrame()
    for i in range(num_employees):
        data = data.append(generate_employee(start, end, random.randint(min, max)), ignore_index=True)
    return data

In [9]:
start_input = "8/1/2020 12:00 AM"
end_input = "9/1/2020 12:00 AM"
format = '%m/%d/%Y %I:%M %p'
start = datetime.datetime.strptime(start_input, format)
end = datetime.datetime.strptime(end_input, format)

In [10]:
data = generate_workforce(start, end, num_employees=5, min=90, max=150)

In [11]:
data

Unnamed: 0,name,active,from,to
0,John Rodriguez,2020-08-01 05:14:15,2020-08-01 05:00:00,2020-08-01 06:00:00
1,John Rodriguez,2020-08-01 08:30:47,2020-08-01 08:00:00,2020-08-01 09:00:00
2,John Rodriguez,2020-08-01 15:55:13,2020-08-01 15:00:00,2020-08-01 16:00:00
3,John Rodriguez,2020-08-01 16:00:02,2020-08-01 16:00:00,2020-08-01 17:00:00
4,John Rodriguez,2020-08-01 22:19:19,2020-08-01 22:00:00,2020-08-01 23:00:00
...,...,...,...,...
575,Angela Ceasar,2020-08-30 20:25:30,2020-08-30 20:00:00,2020-08-30 21:00:00
576,Angela Ceasar,2020-08-30 22:32:51,2020-08-30 22:00:00,2020-08-30 23:00:00
577,Angela Ceasar,2020-08-31 00:11:51,2020-08-31 00:00:00,2020-08-31 01:00:00
578,Angela Ceasar,2020-08-31 07:16:12,2020-08-31 07:00:00,2020-08-31 08:00:00


In [12]:
data['date'] = data['active'].apply(lambda x: datetime.datetime.date(x))

In [13]:
data['time'] = data['active'].apply(lambda x: datetime.datetime.time(x))

In [14]:
import calendar

In [15]:
data['day of week'] = data['date'].apply(lambda x: calendar.day_name[x.weekday()])

In [16]:
data

Unnamed: 0,name,active,from,to,date,time,day of week
0,John Rodriguez,2020-08-01 05:14:15,2020-08-01 05:00:00,2020-08-01 06:00:00,2020-08-01,05:14:15,Saturday
1,John Rodriguez,2020-08-01 08:30:47,2020-08-01 08:00:00,2020-08-01 09:00:00,2020-08-01,08:30:47,Saturday
2,John Rodriguez,2020-08-01 15:55:13,2020-08-01 15:00:00,2020-08-01 16:00:00,2020-08-01,15:55:13,Saturday
3,John Rodriguez,2020-08-01 16:00:02,2020-08-01 16:00:00,2020-08-01 17:00:00,2020-08-01,16:00:02,Saturday
4,John Rodriguez,2020-08-01 22:19:19,2020-08-01 22:00:00,2020-08-01 23:00:00,2020-08-01,22:19:19,Saturday
...,...,...,...,...,...,...,...
575,Angela Ceasar,2020-08-30 20:25:30,2020-08-30 20:00:00,2020-08-30 21:00:00,2020-08-30,20:25:30,Sunday
576,Angela Ceasar,2020-08-30 22:32:51,2020-08-30 22:00:00,2020-08-30 23:00:00,2020-08-30,22:32:51,Sunday
577,Angela Ceasar,2020-08-31 00:11:51,2020-08-31 00:00:00,2020-08-31 01:00:00,2020-08-31,00:11:51,Monday
578,Angela Ceasar,2020-08-31 07:16:12,2020-08-31 07:00:00,2020-08-31 08:00:00,2020-08-31,07:16:12,Monday


In [17]:
data.to_excel('data.xlsx', index=False)

In [18]:
temp = data.groupby(['name', 'date']).size().reset_index(name='counts')

In [19]:
temp.to_excel('grouped_data.xlsx', index=False)

In [21]:
temp

Unnamed: 0,name,date,counts
0,Angela Ceasar,2020-08-01,6
1,Angela Ceasar,2020-08-02,4
2,Angela Ceasar,2020-08-03,4
3,Angela Ceasar,2020-08-04,4
4,Angela Ceasar,2020-08-05,4
...,...,...,...
148,John Rodriguez,2020-08-27,7
149,John Rodriguez,2020-08-28,5
150,John Rodriguez,2020-08-29,3
151,John Rodriguez,2020-08-30,4


In [22]:
from_time = generate_from(start, end)
to_time = generate_to(start, end)
time_data = pd.concat([from_time, to_time], axis=1)

In [23]:
time_data.to_excel('timeline.xlsx', index=False)