In [1]:
from copy import deepcopy

import dateutil as dt

import scipy.stats as st
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

%matplotlib inline

In [2]:
TICKET_TYPES_LIST = (
    (0, 'adult'),
    (1, 'childish'),
    (2, 'concessionary'),
)

TICKET_TYPES_DICT = dict(TICKET_TYPES_LIST)

In [3]:
class LogEntry(object):
    def __init__(self, date, ticket_type_id, action, user_id, convert_to_tzlocal=False):
        self.date = dt.parser.parse(date)
        if convert_to_tzlocal:
            self.date = self.date.astimezone(tz.tzlocal())
        self.ticket_type = TICKET_TYPES_DICT.get(int(ticket_type_id))
        self.action = int(action)
        self.user_id = int(user_id)
        
    def __cmp__(self, other):
        if self.date < other.date:
            return -1
        elif self.date == other.date:
            return 0
        else:
            return 1

    def __log(self):
        return "%s;%s;%s;%s" % (self.date, self.ticket_type, self.action, self.user_id)

    def __repr__(self):
        return self.__log()

    def __unicode__(self):
        return self.__log()

In [4]:
def generate_time():
    enter_day = np.random.randint(10, 25)
    exit_day = np.random.randint(enter_day, 31 + 1)

    # print enter_day, exit_day

    enter_hour = np.random.randint(10, 21 + 1)
    exit_hour = np.random.randint(enter_hour + 1, 23 + 1)

    # print enter_hour, exit_hour

    enter_minutes = np.random.randint(10, 40 + 1)
    exit_minutes = np.random.randint(enter_hour + 1, 59 + 1)

    # print enter_minutes, exit_minutes

    enter_seconds = np.random.randint(10, 40 + 1)
    exit_seconds = np.random.randint(enter_hour + 1, 59 + 1)

    # print enter_seconds, exit_seconds

    enter_time_zone = np.random.randint(0, 9 + 1)
    enter_time_zone_direction = '+' if np.random.randint(0, 2) else '-'
    exit_time_zone = np.random.randint(0, 9 + 1)
    exit_time_zone_direction = '+' if np.random.randint(0, 2) else '-'

    # print enter_time_zone, enter_time_zone_direction
    # print exit_time_zone, exit_time_zone_direction

    enter_time_str = "2015-07-%s %s:%s:%s %s0%s00" % (enter_day, enter_hour, enter_minutes, enter_seconds,
                                      '-', enter_time_zone)
    exit_time_str = "2015-07-%s %s:%s:%s %s0%s00" % (exit_day, exit_hour, exit_minutes, exit_seconds,
                                      '+', exit_time_zone)

    return enter_time_str, exit_time_str

In [5]:
def generate_visitor(user_id=None):
    enter_time_str, exit_time_str = generate_time()
    ticket_type_id = np.random.randint(0, 3)

    enter_log = LogEntry(enter_time_str, ticket_type_id, 1, user_id)
    exit_log = LogEntry(exit_time_str, ticket_type_id, 0, user_id)

    return enter_log, exit_log

In [6]:
enters, exits = zip(*[generate_visitor(i) for i in range(0, 1000)])
data = list(enters + exits)
data = sorted(data)

In [7]:
test_data = [
    '2015-07-10 11:30:28 +0300;0;1;390',
    '2015-07-10 11:32:28 +0300;0;1;391',
    '2015-07-10 11:33:28 +0300;0;0;390',
    '2015-07-10 11:34:28 +0300;0;0;391',
    '2015-07-10 11:35:28 +0300;0;1;392',
    '2015-07-10 11:36:28 +0300;0;1;393',
    '2015-07-10 11:37:28 +0300;0;0;392',
    '2015-07-10 11:38:28 +0300;0;0;393',
] 
test_data = [LogEntry(*d.split(';')) for d in test_data]

In [8]:
test_data

[2015-07-10 11:30:28+03:00;adult;1;390,
 2015-07-10 11:32:28+03:00;adult;1;391,
 2015-07-10 11:33:28+03:00;adult;0;390,
 2015-07-10 11:34:28+03:00;adult;0;391,
 2015-07-10 11:35:28+03:00;adult;1;392,
 2015-07-10 11:36:28+03:00;adult;1;393,
 2015-07-10 11:37:28+03:00;adult;0;392,
 2015-07-10 11:38:28+03:00;adult;0;393]

In [9]:
def compute_popular_time_intervals(logs):
    max_visitors  = -1
    curr_visitors = 0; curr_date = None
    prev_visitors = 0; prev_date = None

    tickets_distribution = {y:0 for _,y in TICKET_TYPES_DICT.iteritems()}

    intervals = []
    
    for log in sorted(logs):
        action    = 1 if log.action else -1
        curr_date = log.date

        curr_visitors += action

        if curr_visitors > max_visitors:
            max_visitors = curr_visitors
            print log, " action = ", log.action, " max = ", max_visitors
            del intervals[:]

        if prev_visitors > curr_visitors:
            if prev_visitors == max_visitors:
                interval = ((prev_date, curr_date),
                            deepcopy(tickets_distribution))
                intervals.append(interval)
                
        prev_visitors = curr_visitors
        prev_date     = curr_date

        tickets_distribution[log.ticket_type] += action
        
    if prev_visitors == max_visitors:
        interval = ((prev_date, None),
                     deepcopy(tickets_distribution))
        intervals.append(interval)
    
    return intervals

In [10]:
intervals = compute_popular_time_intervals(test_data)

for i, (time_interval, tickets_distribution)  in enumerate(intervals):
    print "Interval #%s" % (i + 1)
    
    from_date = time_interval[0]
    from_date = from_date.astimezone(dt.tz.tzlocal()) if from_date else from_date
    
    to_date = time_interval[1]
    to_date = to_date.astimezone(dt.tz.tzlocal()) if to_date else to_date
    print "   from %s" % from_date
    print "   to   %s" % to_date
    
    visitors_amount = sum(tickets_distribution.values())
    for key, val in tickets_distribution.items():
        print "    %s = %s%% (%s)" % (key, round(val/float(visitors_amount)*100, 2), val)



2015-07-10 11:30:28+03:00;adult;1;390  action =  1  max =  1
2015-07-10 11:32:28+03:00;adult;1;391  action =  1  max =  2
Interval #1
   from 2015-07-10 11:32:28+03:00
   to   2015-07-10 11:33:28+03:00
    childish = 0.0% (0)
    adult = 100.0% (2)
    concessionary = 0.0% (0)
Interval #2
   from 2015-07-10 11:36:28+03:00
   to   2015-07-10 11:37:28+03:00
    childish = 0.0% (0)
    adult = 100.0% (2)
    concessionary = 0.0% (0)


In [11]:
"Hello"[:1:2]

'H'

In [12]:
bool(1 <= 2<=3)

True

In [13]:
2.2

2.2