# Fake Page View generator

This notebook generates page views based on a three-hour dataset.

In [2]:
%%bash
python3 -m pip install kafka-python



In [3]:
# Initialize Kafka Topics
from kafka import KafkaClient
from kafka.admin import KafkaAdminClient, NewTopic

TOPICS = ["clicks"]

kafka_client = KafkaClient("localhost:9092")
existing_topics = kafka_client.topic_partitions

admin_client = KafkaAdminClient(bootstrap_servers="localhost:9092")
topic_list = []
for topicname in TOPICS:
    if topicname not in existing_topics:
        print("Creating topic: {}".format(topicname))
        topic_list.append(NewTopic(name=topicname, num_partitions=1, replication_factor=1))
admin_client.create_topics(new_topics=topic_list, validate_only=False)

CreateTopicsResponse_v0(topic_errors=[])

In [4]:
# Extract archive of click data
import zipfile

ARCHIVEF = "20180113-6-9-with-nulls.json.zip"
CLICKSF = "20180113-6-9-with-nulls.json"

with zipfile.ZipFile(ARCHIVEF,"r") as zip_ref:
    zip_ref.extractall(".")

In [5]:
# SENDMODE = "kafka"
SENDMODE = "http"

In [6]:
# Send click data
import json
import time
from datetime import datetime

import requests
from kafka import KafkaProducer
from kafka.errors import KafkaError
from IPython.display import clear_output

producer = KafkaProducer(bootstrap_servers=['localhost:9092'])

orig_start = 0
replay_start = time.time() * 1000

counter = 0
i = 0

print("Simulating website traffic...")

with open(CLICKSF, 'r') as f:
    first = True
    start_ts = 0
    for line in f:
        click = json.loads(line)

        orig_event = click["ts_ingest"] or 0
        
        if orig_event:
            curr_ts = time.time() * 1000

            if first:
                first = False
                orig_start = click["ts_ingest"]

            difference = (orig_event - orig_start) - (curr_ts - replay_start)

            if difference > 0:
                print("sleeping {0:.2f} seconds".format(difference/1000.0))
                time.sleep(difference/1000.0)

        if SENDMODE == "kafka":
            future = producer.send('clicks', line.encode())
            try:
                record_metadata = future.get(timeout=10)
            except KafkaError:
                log.exception()
                pass
        else:
            #print(f"sending {line}")
            requests.post("http://localhost:5000/clicks", data=line.encode("utf-8"), headers={'Content-Type':'application/json'})
        
        print("✓ {}".format(datetime.fromtimestamp(orig_event/1000.0)))
        if i > 1000:
            clear_output()
            i = 0
        i = i+1


sleeping 0.01 seconds
✓ 2018-01-13 05:26:58.363000
sleeping 0.01 seconds
✓ 2018-01-13 05:26:58.373000
sleeping 0.00 seconds
✓ 2018-01-13 05:26:58.379000
sleeping 0.00 seconds
✓ 2018-01-13 05:26:58.385000
sleeping 0.06 seconds
✓ 2018-01-13 05:26:58.446000
sleeping 0.00 seconds
✓ 2018-01-13 05:26:58.456000
sleeping 0.01 seconds
✓ 2018-01-13 05:26:58.469000
sleeping 0.03 seconds
✓ 2018-01-13 05:26:58.502000
✓ 2018-01-13 05:26:58.504000
sleeping 0.05 seconds
✓ 2018-01-13 05:26:58.566000
sleeping 0.01 seconds
✓ 2018-01-13 05:26:58.580000
sleeping 0.00 seconds
✓ 2018-01-13 05:26:58.590000
✓ 2018-01-13 05:26:58.591000
sleeping 0.02 seconds
✓ 2018-01-13 05:26:58.619000
sleeping 0.02 seconds
✓ 2018-01-13 05:26:58.643000
sleeping 0.02 seconds
✓ 2018-01-13 05:26:58.665000
sleeping 0.00 seconds
✓ 2018-01-13 05:26:58.672000
✓ 2018-01-13 05:26:58.675000
sleeping 0.03 seconds
✓ 2018-01-13 05:26:58.710000
sleeping 0.03 seconds
✓ 2018-01-13 05:26:58.747000
✓ 2018-01-13 05:26:58.750000
✓ 2018-01-13 05:2

✓ 2018-01-13 05:27:02.170000
sleeping 0.01 seconds
✓ 2018-01-13 05:27:02.188000
sleeping 0.02 seconds
✓ 2018-01-13 05:27:02.214000
sleeping 0.02 seconds
✓ 2018-01-13 05:27:02.240000
sleeping 0.00 seconds
✓ 2018-01-13 05:27:02.250000
sleeping 0.01 seconds
✓ 2018-01-13 05:27:02.261000
✓ 1970-01-01 00:00:00
sleeping 0.01 seconds
✓ 2018-01-13 05:27:02.277000
sleeping 0.01 seconds
✓ 2018-01-13 05:27:02.289000
✓ 2018-01-13 05:27:02.293000
sleeping 0.00 seconds
✓ 2018-01-13 05:27:02.302000
sleeping 0.06 seconds
✓ 2018-01-13 05:27:02.370000
✓ 2018-01-13 05:27:02.371000
sleeping 0.00 seconds
✓ 2018-01-13 05:27:02.387000
sleeping 0.00 seconds
✓ 2018-01-13 05:27:02.393000
sleeping 0.06 seconds
✓ 2018-01-13 05:27:02.454000
sleeping 0.04 seconds
✓ 2018-01-13 05:27:02.498000
sleeping 0.01 seconds
✓ 2018-01-13 05:27:02.516000
sleeping 0.03 seconds
✓ 2018-01-13 05:27:02.556000
sleeping 0.02 seconds
✓ 2018-01-13 05:27:02.583000
sleeping 0.08 seconds
✓ 2018-01-13 05:27:02.665000
sleeping 0.02 seconds
✓ 

✓ 2018-01-13 05:27:05.483000
sleeping 0.00 seconds
✓ 2018-01-13 05:27:05.493000
✓ 2018-01-13 05:27:05.496000
sleeping 0.00 seconds
✓ 2018-01-13 05:27:05.505000
sleeping 0.01 seconds
✓ 2018-01-13 05:27:05.525000
sleeping 0.01 seconds
✓ 2018-01-13 05:27:05.546000
sleeping 0.01 seconds
✓ 2018-01-13 05:27:05.566000
sleeping 0.01 seconds
✓ 2018-01-13 05:27:05.583000
✓ 2018-01-13 05:27:05.585000
sleeping 0.01 seconds
✓ 2018-01-13 05:27:05.599000
sleeping 0.00 seconds
✓ 2018-01-13 05:27:05.606000
sleeping 0.01 seconds
✓ 2018-01-13 05:27:05.617000
✓ 2018-01-13 05:27:05.617000
sleeping 0.01 seconds
✓ 2018-01-13 05:27:05.644000
sleeping 0.00 seconds
✓ 2018-01-13 05:27:05.653000
sleeping 0.02 seconds
✓ 2018-01-13 05:27:05.677000
sleeping 0.01 seconds
✓ 2018-01-13 05:27:05.692000
sleeping 0.01 seconds
✓ 2018-01-13 05:27:05.706000
sleeping 0.00 seconds
✓ 2018-01-13 05:27:05.715000
✓ 2018-01-13 05:27:05.719000
sleeping 0.02 seconds
✓ 2018-01-13 05:27:05.746000
sleeping 0.00 seconds
✓ 2018-01-13 05:2

✓ 2018-01-13 05:27:08.672000
sleeping 0.00 seconds
✓ 2018-01-13 05:27:08.682000
✓ 2018-01-13 05:27:08.686000
✓ 2018-01-13 05:27:08.695000
✓ 2018-01-13 05:27:08.696000
✓ 2018-01-13 05:27:08.704000
✓ 2018-01-13 05:27:08.716000
sleeping 0.02 seconds
✓ 2018-01-13 05:27:08.740000
✓ 2018-01-13 05:27:08.741000
✓ 2018-01-13 05:27:08.749000
✓ 2018-01-13 05:27:08.751000
sleeping 0.02 seconds
✓ 2018-01-13 05:27:08.785000
sleeping 0.00 seconds
✓ 2018-01-13 05:27:08.792000
sleeping 0.04 seconds
✓ 2018-01-13 05:27:08.836000
sleeping 0.04 seconds
✓ 2018-01-13 05:27:08.883000
sleeping 0.06 seconds
✓ 2018-01-13 05:27:08.953000
sleeping 0.00 seconds
✓ 2018-01-13 05:27:08.964000
✓ 1970-01-01 00:00:00
✓ 2018-01-13 05:27:08.971000
✓ 1970-01-01 00:00:00
sleeping 0.06 seconds
✓ 2018-01-13 05:27:09.044000
sleeping 0.02 seconds
✓ 2018-01-13 05:27:09.075000
sleeping 0.00 seconds
✓ 2018-01-13 05:27:09.085000
sleeping 0.02 seconds
✓ 2018-01-13 05:27:09.106000
sleeping 0.02 seconds
✓ 2018-01-13 05:27:09.128000
sle

ConnectionError: ('Connection aborted.', BrokenPipeError(32, 'Broken pipe'))