In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from dateutil import parser
import time
from itertools import starmap

%matplotlib inline

In [3]:
def date_string(dt):
    return dt.strftime("%d %B, %Y (%I:%M %p)")

In [59]:
def generate_slots(iid, subject, n):
    now = datetime.now()
    numdays = np.random.randint(1,31,(n,))
    iids = [iid for i in range(n)]
    subs = [subject for i in range(n)]
    start = []
    end = []
    for i in list(sorted(numdays)):
        day = now + timedelta(days=int(i))
        day = day.replace(hour=np.random.randint(6,24), minute=0, second=0, microsecond=0) 
        dayplus = day + timedelta(hours=1)
        start.append(day)
        end.append(dayplus)
        #print(str(date_string(day)+' __to__ '+date_string(dayplus)))
    slots = zip(iids, subs, start, end)
    slots_df = pd.DataFrame(slots,columns=['interviewer','subject','start','end'])
    slots_df.drop_duplicates(inplace=True)
    return slots_df

In [60]:
def get_slot_schedule(interviewers):
    df = pd.concat(list(starmap(generate_slots, interviewers)))
    return df.reset_index().drop('index',axis=1)

In [61]:
def readable(df):
    df1 = pd.DataFrame()
    df1['interviewer'] = df['interviewer']
    df1['subject'] = df['subject']
    df1['start'] = df['start'].apply(date_string)
    df1['end'] = df['end'].apply(date_string)
    return df1

In [62]:
interviewers = [('Akshay','DS',12), ('Tarang','PM',18), ('Vibhanshu','PM', 7)]

In [90]:
df = get_slot_schedule(interviewers)
df

Unnamed: 0,interviewer,subject,start,end
0,Akshay,DS,2019-08-27 23:00:00,2019-08-28 00:00:00
1,Akshay,DS,2019-08-28 06:00:00,2019-08-28 07:00:00
2,Akshay,DS,2019-08-31 20:00:00,2019-08-31 21:00:00
3,Akshay,DS,2019-09-04 20:00:00,2019-09-04 21:00:00
4,Akshay,DS,2019-09-09 14:00:00,2019-09-09 15:00:00
5,Akshay,DS,2019-09-11 09:00:00,2019-09-11 10:00:00
6,Akshay,DS,2019-09-16 16:00:00,2019-09-16 17:00:00
7,Akshay,DS,2019-09-19 14:00:00,2019-09-19 15:00:00
8,Akshay,DS,2019-09-24 22:00:00,2019-09-24 23:00:00
9,Akshay,DS,2019-09-24 21:00:00,2019-09-24 22:00:00


In [91]:
df1 = readable(df)
df1

Unnamed: 0,interviewer,subject,start,end
0,Akshay,DS,"27 August, 2019 (11:00 PM)","28 August, 2019 (12:00 AM)"
1,Akshay,DS,"28 August, 2019 (06:00 AM)","28 August, 2019 (07:00 AM)"
2,Akshay,DS,"31 August, 2019 (08:00 PM)","31 August, 2019 (09:00 PM)"
3,Akshay,DS,"04 September, 2019 (08:00 PM)","04 September, 2019 (09:00 PM)"
4,Akshay,DS,"09 September, 2019 (02:00 PM)","09 September, 2019 (03:00 PM)"
5,Akshay,DS,"11 September, 2019 (09:00 AM)","11 September, 2019 (10:00 AM)"
6,Akshay,DS,"16 September, 2019 (04:00 PM)","16 September, 2019 (05:00 PM)"
7,Akshay,DS,"19 September, 2019 (02:00 PM)","19 September, 2019 (03:00 PM)"
8,Akshay,DS,"24 September, 2019 (10:00 PM)","24 September, 2019 (11:00 PM)"
9,Akshay,DS,"24 September, 2019 (09:00 PM)","24 September, 2019 (10:00 PM)"


In [65]:
def get_params(df):
    print(df)

In [130]:
def get_flags(dt, shift = (9, 18), half_point = 14, return_dict=False):
    
    #mention s1 and s2 as hour number in 24 hours for start and end of shift respectively (default = 9am to 6pm)
    #mention h as hour number in 24 hours for point of day where second half begins (default = 2pm)
    s1, s2 = shift
    h = half_point
    
    ts = pd.Timestamp(dt)
    
    ##Weekday or Weekend flags
    weekend = int(ts.dayofweek in [5,6])
    weekday = int(ts.dayofweek in [0,1,2,3,4])
    
    
    ## Before or after office flags
    work_start = pd.Timestamp(datetime(ts.year, ts.month, ts.day, s1))
    work_end = pd.Timestamp(datetime(ts.year, ts.month, ts.day, s2))
    office_hours = int(ts >= work_start and ts < work_end)
    after_office = int(not(office_hours))
    
    ##first half or second half flags
    halfday_point = pd.Timestamp(datetime(ts.year, ts.month, ts.day, h))
    first_half = int(ts < halfday_point)
    second_half = int(ts >= halfday_point)
    
    
    metainfo = {'weekday':weekday, 
                'weekend': weekend, 
                'office_hours':office_hours, 
                'after_office':after_office, 
                'first_half':first_half,
                'second_half':second_half
               }
    
    if return_dict == True:
        return metainfo
    else:
        return list(metainfo.values())

In [131]:
dtt = df['start'].iloc[0]
dt = pd.Timestamp(dtt)
dt

Timestamp('2019-08-27 23:00:00')

In [216]:
get_flags(dt)

[1, 0, 0, 1, 0, 1]

In [133]:
get_flags(datetime.now(), return_dict=True)

{'weekday': 1,
 'weekend': 0,
 'office_hours': 1,
 'after_office': 0,
 'first_half': 0,
 'second_half': 1}

In [157]:
from sklearn.metrics import pairwise

In [235]:
def get_best_slots(slot_vectors, pref_vector):
    similar_idx = np.argsort(pairwise.cosine_similarity([pref_vector],slot_vectors)).flatten()
    return list(reversed(similar_idx))

In [237]:
np.array(get_best_slots(slot_vectors, [1,0,1,1,0,1]))

array([34, 21,  3,  4,  6,  7,  8,  9, 14, 33, 18, 19, 20,  0, 32, 29, 23,
       22, 27,  1,  2, 31,  5, 30, 28, 10, 11, 26, 15, 16, 25, 17, 12, 13,
       24])

# TESTING

In [218]:
def get_top_n(slot_vectors, pref_vector, slot_list, n=3):
    sorted_indexes = get_best_slots(slot_vectors, pref_vector)
    sorted_slots = [slot_list[i] for i in sorted_indexes]
    recommended = sorted_slots[:n]
    
    #make readable
    readable_dt = [date_string(i) for i in recommended]
    
    #get days
    dayslist = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
    day_of_week = [dayslist[i.dayofweek] for i in recommended]
    return list(zip(readable_dt, day_of_week))

In [219]:
pref_vector = [0,1,1,1,1,1]  #Weekday, office hours, first half

#slots
slot_list = list(df['start'])
slot_vectors = [get_flags(i) for i in slot_list]

In [220]:
get_top_n(slot_vectors, pref_vector, slot_list, 5)

[('21 September, 2019 (09:00 AM)', 'Saturday'),
 ('31 August, 2019 (08:00 PM)', 'Saturday'),
 ('01 September, 2019 (10:00 AM)', 'Sunday'),
 ('01 September, 2019 (09:00 AM)', 'Sunday'),
 ('18 September, 2019 (07:00 PM)', 'Wednesday')]