In [None]:
import math

import pandas as pd
import matplotlib.pyplot as plt
from typing import Tuple, Dict, List

UNIX_TIMESTAMP_COL = 'Timestamp'

In [None]:
rawdata = pd.read_csv('gs://anomaly_detection_inputs/saltdsassignment/data.csv.zip')
rawdata.Timestamp = rawdata.Timestamp.astype(int) 
rawdata[REQUEST_TIMESTAMP_COL] = pd.to_datetime(rawdata.Timestamp, unit='s')

# Set time intervals values

In [None]:
def generate_time_intervals(unix_start_time: int, unix_end_time: int, 
                            time_interval: int) -> Dict[int, int]:
    """
    This function generate time intervals (by seconds) ranges by the user specific request
    """
    still_remain = True
    
    alternative_time: Dict[int, int] = dict()
    lower_boundry = unix_start_time
    
    while still_remain:
        upper_boundry = lower_boundry + time_interval
        mean_value = lower_boundry + math.ceil(0.5*time_interval)
        
        for unix_second in range(lower_boundry, upper_boundry):
            alternative_time[unix_second] = mean_value
        
        if upper_boundry > unix_end_time:
            still_remain = False
        
        else:
            lower_boundry = upper_boundry
            
    return alternative_time


def set_time_intervals(rawdata: pd.DataFrame, time_intervals_list: List[int]) -> pd.DataFrame:
    """
    This function add column with an alternative timestamps value according to each of the intervlas 
    windows provided in the time_intervals list
    """
    unix_start_time = rawdata[UNIX_TIMESTAMP_COL].min()
    unix_end_time = rawdata[UNIX_TIMESTAMP_COL].min()
    
    for time_interval in time_intervals_list:
        time_intervals = generate_time_intervals(unix_start_time, unix_end_time, 
                            time_interval)
        
        rawdata[f'interval_window_{time_interval}_sec'] = rawdata[UNIX_TIMESTAMP_COL].replace(
            time_intervals)
        
    return rawdata

rawdata = set_time_intervals(rawdata, [10, 30, 60, 180, 300])

In [None]:
pd.set_option('display.max_rows', 200)
rawdata.head(200)

# Analyze the number of requests a user or an API associate with 

In [None]:
pd.set_option('display.max_rows', 200)
route_data.head(200)

In [None]:
for route, route_data in rawdata.groupby('Route'):
    plt.plot(route_data['interval_window_30_sec'].size())
#plt.plot(raw)

In [None]:
rawdata

In [None]:
a = generate_time_intervals(rawdata.Timestamp.min(),rawdata.Timestamp.max(), 10)

In [None]:
a[1577829614]

In [None]:
rawdata[UNIX_TIMESTAMP_COL].replace(a)