In [96]:
import abc
import dateparser
from datetime import timedelta

COLUMN_HEADERS = ["cluster_id", "lat", "long", "weekday", "hour", "date", "t/u", "index"]
DATE_FORMAT = "%Y/%m/%d"
DAY_OF_WEEK = {
    0: 'Monday',
    1: 'Tuesday',
    2: 'Wednesday',
    3: 'Thursday',
    4: 'Friday',
    5: 'Saturday',
    6: 'Sunday'
}

class Filter(object):
    __metaclass__ = abc.ABCMeta
    """ Abstract base class for filters
    """
    @abc.abstractproperty
    def name(self):
        """(str) name of filter"""

    @abc.abstractmethod
    def is_match(self, row):
        """apply filter on self.original"""
        
class DateFilter(Filter):
    def __init__(self, date):
        dt = dateparser.parse(str(date))
        self.date = dt.strftime(DATE_FORMAT)
        
    def __str__(self):
        return 'date == {}'.format(self.date)
        
    @property
    def name(self):
        return "DateFilter"
    
    def is_match(self, df):
        return df['date'] == self.date
        

class DateRangeFilter(Filter):
    dates = []
    def __init__(self, start, end):
        dt_start = dateparser.parse(str(start))
        dt_end = dateparser.parse(str(end))
        if dt_start > dt_end:
            raise ValueError("DateRangeFilter requires start date before end date")

        # kind of hack but for date ranges generate all dates in the range
        dt = dt_start
        while dt <= dt_end:
            self.dates.append(dt.strftime(DATE_FORMAT))
            dt += timedelta(days=1)
        
    def __str__(self):
        return '{} <= date <= {}'.format(str(self.dates[0]), str(self.dates[:-1]))
    
    @property
    def name(self):
        return "DateRangeFilter"
    
    def is_match(self, df):
        mask = (1 == 2)
        for date in self.dates:
            mask |= (df['date'] == date)
        return (mask)
        
class DayOfWeekFilter(Filter):
    def __init__(self, day):
        # day of the week as an integer, where Monday is 0 and Sunday is 6
        if isinstance(day, int):
            if day < 0 or day > 6:
                raise ValueError("DayFilter as int must fall between 0(Mon) and 6(Sun)")
            else:
                self.weekday = day
        elif isinstance(day, str):
            day = day.strip().lower()
            if day in ['monday', 'mon', 'm']:
                self.weekday = 0
            elif day in ['tuesday', 'tues', 'tu']:
                self.weekday = 1
            elif day in ['wednesday', 'wed', 'w']:
                self.weekday = 2
            elif day in ['thursday', 'thurs', 'th']:
                self.weekday = 3
            elif day in ['friday', 'fri', 'f']:
                self.weekday = 4
            elif day in ['saturday', 'sat']:
                self.weekday = 5
            elif day in ['sunday', 'sun']:
                self.weekday = 6
            else:
                raise ValueError("Invalid string for DayFilter")
        else:
            raise ValueError("Invalid type for DayFilter")
            
    def __str__(self):
        return 'day == {}'.format(DAY_OF_WEEK[self.weekday])
    
    @property
    def name(self):
        return "DayOfWeekFilter"
    
    def is_match(self, df):
        return df['weekday'] == self.weekday
    
class HourFilter(Filter):
    def __init__(self, hour):
        if isinstance(hour, int):
            self.hour = hour
        elif isinstance(hour, str):
            hour = hour.lower()
            if 'am' in hour:
                self.hour = int(hour.replace('am', '').strip())
            elif 'pm' in hour:
                hour = int(hour.replace('am', '').strip())
                self.hour = hour + 12 if hour < 12 else 0
            else:
                # just try converting to int
                self.hour = int(hour)
        else:
            raise ValueError("Invalid type for HourFilter")
            
        if self.hour < 0 or self.hour > 23:
            raise ValueError("Invalid value for HourFilter")
            
    def __str__(self):
        return 'hour == {}'.format(self.hour)
            
    @property
    def name(self):
        return "HourFilter"
    
    def is_match(self, df):
        return self.hour == df['hour']
    
class HourRangeFilter(Filter):
    def __init__(self, start, end):
        self.hour_start = HourFilter(start).hour
        self.hour_end = HourFilter(end).hour
    
    def __str__(self):
        return '{} <= hour <= {}'.format(self.hour_start, self.hour_end)
    
    @property
    def name(self):
        return "HourRangeFilter"
    
    def is_match(self, df):
        return (df['hour'] >= self.hour_start) & (df['hour'] <= self.hour_end)

In [97]:
def build_mask(df, filters):
    mask = 1 == 1
    for f in filters:
        print('Applying filter: {}'.format(f))
        mask &= f.is_match(df)
    return mask
            

In [100]:
import pandas as pd

fake_data = {
    'cluster_id': [0, 0, 1, 0, 2],
    'lat': [40.0, 41.0, 42.0, 43.0, 44.0],
    'long': [-73.0, -72.0, -71.0, -70.0, -69.0],
    'weekday': [6, 6, 5, 4, 0],
    'hour': [8, 12, 23, 11, 6],
    'date': ['2014/08/17', '2014/08/17', '2014/08/18', '2014/08/19', '2014/08/30'],
    't/u': ['T', 'T', 'U', 'T', 'T'],
    'index': [0,1,2,3,4]
}
 
df = pd.DataFrame(fake_data)
filters = [HourRangeFilter(10, 23), DateFilter('2014/08/17')]
mask = build_mask(df, filters)
print(df[mask])

Applying filter: 10 <= hour <= 23
Applying filter: date == 2014/08/17
   cluster_id        date  hour  index   lat  long t/u  weekday
1           0  2014/08/17    12      1  41.0 -72.0   T        6
