# Data Pipelines - Website Visitor Counts

### Website Visitor Counts Exercise (Part 2) - Querying database

In [None]:
import sqlite3
from datetime import datetime

### Function to fetch data from database

In [None]:
# Fetch rows added after a certain time

def fetch_rows(start_time):
    # Connect to the SQLite database
    conn = sqlite3.connect("web_traffic.db")
    cursor = conn.cursor()
    query = "SELECT local_time, ip_address, browser FROM web_traffic_logs WHERE record_created_at > ?"
    cursor.execute(query, (start_time,))
    rows = cursor.fetchall()
    
    return rows

### Extract relevant fields

In [None]:
def extract_data(rows):
    times = []
    ips = []
    browsers = []
    for row in rows:
        times.append(datetime.strptime(row[0], '[%d/%b/%Y:%H:%M:%S %z]')) 
        ips.append(row[1])
        browsers.append(row[2])
        
    return times, ips, browsers

### Counting

In [None]:
unique_ips = {}
browser_counts = {}
counts = {}

# Arbitrary start time
start_time = datetime(year=2023, month=1, day=1)

rows = fetch_rows(start_time)
times, ips, browsers = extract_data(rows)

if len(times) > 0:
    # New data to analyze!
    start_time = times[-1]
    # Set the start time for as the last time to prevent future duplicate data

for ip, time, browser in zip(ips, times, browsers):
    unique_day = time.strftime("%d-%m-%Y")
    if unique_day not in unique_ips:
        unique_ips[unique_day] = set() # Create empty set
    unique_ips[unique_day].add(ip)
    
    # Count browsers used per day
    if unique_day not in browser_counts:
        browser_counts[unique_day] = {}
    if browser not in browser_counts[unique_day]:
        browser_counts[unique_day][browser] = 0
    browser_counts[unique_day][browser] += 1
    
    for days, unique_ip_set in unique_ips.items():
        counts[days] = len(unique_ip_set)
        count_list = counts.items()
        
        # Creates a list of tuples: (day,unique_ip_count) sorted by day
        count_list = sorted(count_list, key=lambda x: x[0])
        
for item in count_list:
    print(f"Day: {item[0]}, Unique visitors: {item[1]}")
    print(f"Breakdown by browser:")
    for browser,count in browser_counts[item[0]].items():
        print(f"\t{browser} : {count}")

In [None]:
# Close the database connection
conn.close()