# Fake Page View generator

This notebook generates page views based on a three-hour dataset.

In [1]:
%%bash
# Install the required Python 3 dependencies
python3 -m pip install kafka-python  # type: ignore



In [2]:
# Extract archive of click data
import zipfile
import os

ARCHIVEF = "20180113-6-9-with-nulls.json.zip"
CLICKSF = "20180113-6-9-with-nulls.json"

if (not os.path.isfile(CLICKSF)):
    with zipfile.ZipFile(ARCHIVEF,"r") as zip_ref:
        zip_ref.extractall(".")

In [4]:
# Send the ratings to the endpoint in order to test your implementation of the endpoint.
# SENDMODE = "http"

# Send the ratings straight to Kafka in order to simulate a working endpoint.
# This allows you to test cleaning code without a working endpoint. 
SENDMODE = "kafka"


In [5]:
# Send click data
import json
import time
from datetime import datetime
import itertools

import requests
from kafka import KafkaProducer
from kafka.errors import KafkaError
from IPython.display import clear_output

producer = KafkaProducer(bootstrap_servers=['localhost:9092'])

orig_start = 0
replay_start = time.time() * 1000

counter = 0
i = 0

print("Simulating website traffic...")

with open(CLICKSF, 'r') as f:
    first = True
    start_ts = 0
    for line in itertools.islice(f, 1633):
        click = json.loads(line)

        orig_event = click["ts_ingest"] or 0
        
        if orig_event:
            curr_ts = time.time() * 1000

            if first:
                first = False
                orig_start = click["ts_ingest"]
            
        click['ts_ingest'] = round((orig_event - orig_start) + replay_start)

        if SENDMODE == "kafka":            
            future = producer.send('clicks', json.dumps(click).encode('utf-8'))
            record_metadata = future.get(timeout=10)
        else:
            requests.post("http://localhost:5000/clicks", data=json.dumps(click).encode("utf-8"), headers={'Content-Type':'application/json'})
        
        print("✓ {}".format(datetime.fromtimestamp(orig_event/1000.0)))
        if i > 1000:
            clear_output()
            i = 0
        i = i+1


✓ 2018-01-13 05:04:22.366000
✓ 2018-01-13 05:04:22.402000
✓ 2018-01-13 05:04:22.422000
✓ 2018-01-13 05:04:22.468000
✓ 2018-01-13 05:04:22.478000
✓ 1970-01-01 00:00:00
✓ 2018-01-13 05:04:22.542000
✓ 2018-01-13 05:04:22.560000
✓ 2018-01-13 05:04:22.584000
✓ 2018-01-13 05:04:22.632000
✓ 2018-01-13 05:04:22.668000
✓ 2018-01-13 05:04:22.676000
✓ 2018-01-13 05:04:22.706000
✓ 2018-01-13 05:04:22.728000
✓ 2018-01-13 05:04:22.754000
✓ 2018-01-13 05:04:22.772000
✓ 2018-01-13 05:04:22.783000
✓ 2018-01-13 05:04:22.806000
✓ 2018-01-13 05:04:22.830000
✓ 2018-01-13 05:04:22.863000
✓ 2018-01-13 05:04:22.874000
✓ 2018-01-13 05:04:22.875000
✓ 2018-01-13 05:04:22.883000
✓ 2018-01-13 05:04:22.924000
✓ 2018-01-13 05:04:22.934000
✓ 2018-01-13 05:04:22.986000
✓ 2018-01-13 05:04:22.988000
✓ 2018-01-13 05:04:22.997000
✓ 2018-01-13 05:04:23.001000
✓ 2018-01-13 05:04:23.003000
✓ 2018-01-13 05:04:23.013000
✓ 2018-01-13 05:04:23.024000
✓ 2018-01-13 05:04:23.043000
✓ 2018-01-13 05:04:23.056000
✓ 2018-01-13 05:04:23