In [1]:
import pandas as pd
import datetime
import random
import names

In [2]:
def random_date(start, end):
    random_seconds = random.randint(0, int((end - start).total_seconds()))
    return start + datetime.timedelta(seconds=random_seconds)

def generate_from(start, end, interval=3600):
    interval_num = int((end-start).total_seconds()/interval)
    return pd.DataFrame({'From Time': [start+datetime.timedelta(seconds=i*interval) for i in range(interval_num)]})

def generate_to(start, end, interval=3600):
    interval_num = int((end-start).total_seconds()/interval)
    return pd.DataFrame({'To Time': [start+datetime.timedelta(seconds=(i+1)*interval) for i in range(interval_num)]})

In [3]:
def generate_employee(start, end, n):
    name = [names.get_full_name()]*n
    active = [random_date(start, end) for i in range(n)]
    df = pd.DataFrame({'name': name, 'active': active}).sort_values(by='active', ignore_index=True)
    df['from'] = df['active'].apply(lambda x: x.replace(minute=0, second=0))
    df['to'] = df['from'].apply(lambda x: x + datetime.timedelta(hours=1))
    return df

def generate_workforce(start, end, num_employees=10, min=1, max=11):
    data = pd.DataFrame()
    for i in range(num_employees):
        data = data.append(generate_employee(start, end, random.randint(min, max)), ignore_index=True)
    return data

In [21]:
start_input = "9/1/2020 12:00 AM"
end_input = "10/1/2020 12:00 AM"
format = '%m/%d/%Y %I:%M %p'
start = datetime.datetime.strptime(start_input, format)
end = datetime.datetime.strptime(end_input, format)

In [29]:
data = generate_workforce(start, end, num_employees=5, min=90, max=150)

In [30]:
len(data)

570

In [31]:
data.drop_duplicates(['name', 'from'], inplace=True)

In [85]:
data

Unnamed: 0,name,active,from,to
0,Antwan Bourgeois,2020-09-01 22:44:43,2020-09-01 22:00:00,2020-09-01 23:00:00
1,Antwan Bourgeois,2020-09-02 00:45:07,2020-09-02 00:00:00,2020-09-02 01:00:00
2,Antwan Bourgeois,2020-09-02 05:47:58,2020-09-02 05:00:00,2020-09-02 06:00:00
3,Antwan Bourgeois,2020-09-02 06:30:54,2020-09-02 06:00:00,2020-09-02 07:00:00
4,Antwan Bourgeois,2020-09-02 13:56:10,2020-09-02 13:00:00,2020-09-02 14:00:00
...,...,...,...,...
565,Frances Herlocker,2020-09-30 00:34:07,2020-09-30 00:00:00,2020-09-30 01:00:00
566,Frances Herlocker,2020-09-30 01:02:38,2020-09-30 01:00:00,2020-09-30 02:00:00
567,Frances Herlocker,2020-09-30 10:02:41,2020-09-30 10:00:00,2020-09-30 11:00:00
568,Frances Herlocker,2020-09-30 14:21:18,2020-09-30 14:00:00,2020-09-30 15:00:00


In [33]:
data.to_excel('Dummy_data.xlsx')

In [98]:
def create_window(start, end, interval=3600):
    interval_num = int((end-start).total_seconds()/interval)
    return [start+datetime.timedelta(seconds=i*interval) for i in range(interval_num+1)]

In [91]:
start_input = "9/1/2020 12:00 AM"
end_input = "9/1/2020 10:00 PM"
format = '%m/%d/%Y %I:%M %p'
start = datetime.datetime.strptime(start_input, format)
end = datetime.datetime.strptime(end_input, format)

In [99]:
window = create_window(start, end)

In [100]:
window

[datetime.datetime(2020, 9, 1, 0, 0),
 datetime.datetime(2020, 9, 1, 1, 0),
 datetime.datetime(2020, 9, 1, 2, 0),
 datetime.datetime(2020, 9, 1, 3, 0),
 datetime.datetime(2020, 9, 1, 4, 0),
 datetime.datetime(2020, 9, 1, 5, 0),
 datetime.datetime(2020, 9, 1, 6, 0),
 datetime.datetime(2020, 9, 1, 7, 0),
 datetime.datetime(2020, 9, 1, 8, 0),
 datetime.datetime(2020, 9, 1, 9, 0),
 datetime.datetime(2020, 9, 1, 10, 0),
 datetime.datetime(2020, 9, 1, 11, 0),
 datetime.datetime(2020, 9, 1, 12, 0),
 datetime.datetime(2020, 9, 1, 13, 0),
 datetime.datetime(2020, 9, 1, 14, 0),
 datetime.datetime(2020, 9, 1, 15, 0),
 datetime.datetime(2020, 9, 1, 16, 0),
 datetime.datetime(2020, 9, 1, 17, 0),
 datetime.datetime(2020, 9, 1, 18, 0),
 datetime.datetime(2020, 9, 1, 19, 0),
 datetime.datetime(2020, 9, 1, 20, 0),
 datetime.datetime(2020, 9, 1, 21, 0),
 datetime.datetime(2020, 9, 1, 22, 0)]

In [96]:
data['from']

0     2020-09-01 22:00:00
1     2020-09-02 00:00:00
2     2020-09-02 05:00:00
3     2020-09-02 06:00:00
4     2020-09-02 13:00:00
              ...        
565   2020-09-30 00:00:00
566   2020-09-30 01:00:00
567   2020-09-30 10:00:00
568   2020-09-30 14:00:00
569   2020-09-30 18:00:00
Name: from, Length: 533, dtype: datetime64[ns]

In [101]:
test = data['from'].apply(lambda x: True if x in window else False)

In [102]:
test

0       True
1      False
2      False
3      False
4      False
       ...  
565    False
566    False
567    False
568    False
569    False
Name: from, Length: 533, dtype: bool

In [49]:
a = pd.DataFrame({'name': ['a', 'b']})
b = pd.DataFrame({'name': ['c', 'd']})

In [50]:
c = a.append(b)

In [51]:
c.reset_index(drop=True)

Unnamed: 0,name
0,a
1,b
2,c
3,d
