In [27]:
from statistics import mean, stdev
from pandas import read_csv, DataFrame, concat

LOG_FILE = 'access_log_format-1.csv'


def read_log(file_name: str) -> DataFrame:
    ds = read_csv(file_name, parse_dates=[3], infer_datetime_format=True)
    ds['datetime'] = ds['datetime'].sort_index()
    return ds

def set_suspicious_tag(df: DataFrame) -> DataFrame:
    ''' Filters 200 status OK and sets the suspicious tag'''
    ok_status_df = df[df['response_code'] == 200]
    date_ranges = ok_status_df.resample('30min', on='datetime').\
                        size().to_frame().reset_index().rename(columns={0:'count'})
    g_mean = mean(date_ranges['count'])
    g_stdev = stdev(date_ranges['count'])
    labeled_dataset = DataFrame()
    for i in range(1, date_ranges.shape[0]+1):
        temp_df = None
        init_dt = None
        end_dt = None
        try:
            init_dt = date_ranges['datetime'][i-1]
            end_dt = date_ranges['datetime'][i]
        except Exception:
            end_dt = df['datetime'][df.shape[0]-1]
            
        temp_df = ok_status_df[(ok_status_df['datetime'] > init_dt) & (ok_status_df['datetime'] <= end_dt)]
        ok_count = date_ranges['count'][i-1]
        tag = None
        if ok_count > g_mean:
            if ((ok_count/g_stdev) < 3):
                tag = 'suspicious'
            else:
                tag = 'high-suspicious'
        else:
            tag = 'normal' 
        temp_df.insert(temp_df.shape[1], 's_tag', [tag] * temp_df.shape[0])
        labeled_dataset = concat([labeled_dataset, temp_df])
    return labeled_dataset

Now we filter 200 status ok and set de suspicious tag in the dataframe

In [28]:
df = read_log(LOG_FILE)
l_df = set_suspicious_tag(df)
print(l_df)

              src_ip     userid  ... country       s_tag
0     14.139.187.130     hahiss  ...      FR      normal
1     14.139.187.130   ahuillet  ...      FR      normal
2     14.139.187.130    gtaylor  ...      BR      normal
3     68.180.228.229    terjesa  ...      FR      normal
4     68.180.228.229  smallpaul  ...      US      normal
...              ...        ...  ...     ...         ...
7206  54.234.104.161      yumpy  ...      FR  suspicious
7222  54.234.104.161      houle  ...      BR  suspicious
7225  85.159.196.242  peterhoeg  ...      US  suspicious
7226  85.159.196.242   wikinerd  ...      BR  suspicious
7228  163.172.65.131     munjal  ...      MX  suspicious

[3382 rows x 11 columns]
