In [None]:
def read_csv_with_time(path, time_fields, n_rows, usecols=None):
    '''
      This function reads a csv and returns a dataframe considering only the first n_rows rows
      and transforming the indicated time_fields from timestamp(seconds) in datetime objects

      Arguments
      _________
        path: str
          The path where the file is located
        time_fields: List[str]
          A list of the fields to be converted in datetime
        n_rows: int
          The number of rows to be considered
        usecols: List[str]
          The list of the columns to be loaded      
      Returns
      _______
        a pandas dataframe containing the processed file
    '''
    
    return pd.read_csv(path, header='infer', nrows=n_rows, 
        parse_dates= [tf for tf in time_fields], date_parser=lambda x: pd.to_datetime(x, unit='s'), usecols=usecols)

def hour_in_range(str_hour, range_hour):
    '''
      Given a string defining an hour and a range of hour as a tuple of that type of string,
      the function assert when the given hour is in the range

      Arguments
      _________
        str_hour: str
          in the format HH:MM:SS
        range_hour: Tuple(str)
          a tuple of string in the form (HH:MM:SS, HH:MM:SS) where the first hour is lower than the second
    '''
    
    min_hour, max_hour = range_hour
    assert (hour_comparator(min_hour, max_hour) == -1), "A range is valid only if the first element is lower than the second"
    return (hour_comparator(str_hour, min_hour) * hour_comparator(max_hour, str_hour)) >=0

def hour_comparator(str_h1, str_h2):
    '''
      Compares two string in the format HH:MM:SS and returns an integer value accordingly with their comparison
      
      Arguments
      __________
        str_h1: str
          in the format HH:MM:SS
        str_h2: str
          in the format HH:MM:SS
      
      Returns
      _______
        An integer representing the comparison between the given strings:
          -1  if the first is less then the second
           0  if the dates are the same
           1  if the first is greater than the second
    '''
    hh1, mm1, ss1 = map(int,str_h1.split(':'))
    hh2, mm2, ss2 = map(int, str_h2.split(':'))
    deltas = [hh1-hh2, mm1-mm2, ss1-ss2]
    for d in deltas:
        if d>0:
            return 1
        elif d < 0:
            return -1
    return 0

def get_range_index(str_hour, ranges):
    '''
      Given a string hour and a list of hour ranges, the function returns the index of the range
      to wich the string hour belongs

      Arguments
      _________
        str_hour: str
          in the format HH:MM:SS
        ranges: List[Tuple(str)]
          List containing hour ranges, so list o tuples of string in the form
          (HH:MM:SS, HH:MM:SS) where the first hour is lower than the second
      
      Return
      ______
        an integer indicating the index of the range where the hour is
    '''
    for i in range(len(ranges)):
        if hour_in_range(str_hour, ranges[i]):
            return i
    return -1

def transform_in_hour_ranges(df, column, ranges):
    '''
      Given a dataframe convert the given column of datetime to the index of the range in the given list
      where the value belongs

      Arguments
      _________
        df: pd.DataFrame
        column: str
          the name of a Datetime column of the df
        ranges: List[Tuple(str)]
          List containing hour ranges, so list o tuples of string in the form
          (HH:MM:SS, HH:MM:SS) where the first hour is lower than the second

      Return
      ______
        the df with the given column modified

    '''
    df[column]=df[column].apply(lambda x: get_range_index(x.strftime('%H:%M:%S'), ranges))
    return df


# REALLY USED:

#read_csv_with_time

def get_integer_ranges(ranges):
    return [(tuple(int(data) for data in x[0].split(':')), tuple(int(data) for data in x[1].split(':'))) for x in ranges]

def get_integer_range_index(tuple_hour, integer_ranges):
    
    for i in range(len(integer_ranges)):
        min_r, max_r = integer_ranges[i]
        if tuple_hour >= min_r and tuple_hour <= max_r:
            return i
    return -1

In [1]:
fname = '/content/drive/MyDrive/HW2-ADM/steam_reviews.csv'
ts_created = 'timestamp_created'

def_ranges = [('06:00:00', '10:59:59'),
('11:00:00', '13:59:59'),
('14:00:00', '16:59:59'),
('17:00:00', '19:59:59'),
('20:00:00', '23:59:59'),
('00:00:00', '02:59:59'),
('03:00:00', '05:59:59')]