# Fake Page View generator

This notebook generates page views based on a three-hour dataset.

In [3]:
%%bash
# Install the required Python 3 dependencies
python3 -m pip install kafka-python  # type: ignore



In [4]:
# Extract archive of click data
import zipfile

ARCHIVEF = "20180113-6-9-with-nulls.json.zip"
CLICKSF = "20180113-6-9-with-nulls.json"

with zipfile.ZipFile(ARCHIVEF,"r") as zip_ref:
    zip_ref.extractall(".")

In [6]:
SENDMODE = "kafka"
# SENDMODE = "http"

In [7]:
# Send click data
import json
import time
from datetime import datetime

import requests
from kafka import KafkaProducer
from kafka.errors import KafkaError
from IPython.display import clear_output

producer = KafkaProducer(bootstrap_servers=['localhost:9092'],
                         value_serializer=lambda v: json.dumps(v).encode('utf-8'))

orig_start = 0
replay_start = time.time() * 1000

counter = 0
i = 0

print("Simulating website traffic...")

with open(CLICKSF, 'r') as f:
    first = True
    start_ts = 0
    for line in f:
        click = json.loads(line)

        orig_event = click["ts_ingest"] or 0
        
        if orig_event:
            curr_ts = time.time() * 1000

            if first:
                first = False
                orig_start = click["ts_ingest"]

            difference = (orig_event - orig_start) - (curr_ts - replay_start)

            if difference > 0:
                print("sleeping {0:.2f} seconds".format(difference/1000.0))
                time.sleep(difference/1000.0)
            
        click['ts_ingest'] = round((orig_event - orig_start) + replay_start)

        if SENDMODE == "kafka":            
            future = producer.send('clicks', click)
            try:
                record_metadata = future.get(timeout=10)
            except KafkaError:
                log.exception()
                pass
        else:
            #print(f"sending {line}")
            requests.post("http://localhost:5000/clicks", data=json.dumps(click).encode("utf-8"), headers={'Content-Type':'application/json'})
        
        print("✓ {}".format(datetime.fromtimestamp(orig_event/1000.0)))
        if i > 1000:
            clear_output()
            i = 0
        i = i+1


sleeping 0.00 seconds
✓ 2018-01-13 05:50:42.622000
✓ 2018-01-13 05:50:42.625000
sleeping 0.00 seconds
✓ 2018-01-13 05:50:42.631000
✓ 2018-01-13 05:50:42.632000
sleeping 0.00 seconds
✓ 2018-01-13 05:50:42.646000
✓ 2018-01-13 05:50:42.648000
sleeping 0.00 seconds
✓ 2018-01-13 05:50:42.656000
sleeping 0.01 seconds
✓ 2018-01-13 05:50:42.665000
sleeping 0.01 seconds


✓ 2018-01-13 05:50:42.678000
sleeping 0.00 seconds
✓ 2018-01-13 05:50:42.687000
sleeping 0.01 seconds
✓ 2018-01-13 05:50:42.705000
sleeping 0.01 seconds
✓ 2018-01-13 05:50:42.719000
sleeping 0.01 seconds
✓ 2018-01-13 05:50:42.731000
✓ 2018-01-13 05:50:42.733000
sleeping 0.00 seconds
✓ 2018-01-13 05:50:42.741000
✓ 2018-01-13 05:50:42.744000
sleeping 0.02 seconds
✓ 2018-01-13 05:50:42.771000
✓ 2018-01-13 05:50:42.774000
sleeping 0.00 seconds
✓ 2018-01-13 05:50:42.779000
✓ 1970-01-01 00:00:00
✓ 2018-01-13 05:50:42.782000
✓ 2018-01-13 05:50:42.787000
sleeping 0.04 seconds
✓ 2018-01-13 05:50:42.829000
sleeping 0.01 seconds
✓ 2018-01-13 05:50:42.840000
✓ 2018-01-13 05:50:42.841000
sleeping 0.00 seconds
✓ 2018-01-13 05:50:42.853000
sleeping 0.01 seconds
✓ 2018-01-13 05:50:42.866000
sleeping 0.01 seconds
✓ 2018-01-13 05:50:42.879000
sleeping 0.00 seconds
✓ 2018-01-13 05:50:42.882000
sleeping 0.00 seconds
✓ 2018-01-13 05:50:42.886000
sleeping 0.01 seconds
✓ 2018-01-13 05:50:42.903000
sleeping 0

KeyboardInterrupt: 