# Data Pipelines - Website Visitor Counts

### Website Visitor Counts Exercise (Part 1) - Saving log data to database

In [None]:
from datetime import datetime
import sqlite3

In [None]:
# Web traffic data log file
file_name = "web_traffic_logs.txt"

# Connect to the SQLite database
conn = sqlite3.connect("web_traffic.db")
cursor = conn.cursor()

In [None]:
with open(file_name, 'r') as file:
    for line in file:
        line = line.strip()  # Remove leading and trailing whitespace
        if line:  # Check if the line is not empty
            # Parse the line as needed
            fields = line.split(" ")
            record_created_at = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            ip_address = fields[0]
            local_time = fields[3] + " " + fields[4]
            request_type = fields[5].strip('"')
            request_address = fields[6]
            request_status = fields[8]
            body_bytes_sent = fields[9]
            http_referrer = fields[10].strip('"') #page client was on before sending current request
            
            # Logic to extract correct browser name (annoying to figure out!)
            for idx in range(11,len(fields)):
                if '/' in fields[idx] and len(fields[idx].split('/')[1].split('.'))>=3:
                    browser = fields[idx].split('/')[0]
                else:
                    continue
            
            # Create the table if it doesn't exist
            cursor.execute("CREATE TABLE IF NOT EXISTS \
                           web_traffic_logs (\
                           id INTEGER PRIMARY KEY AUTOINCREMENT,\
                           record_created_at DATETIME,\
                           ip_address TEXT,\
                           local_time TEXT,\
                           request_type TEXT,\
                           request_address TEXT,\
                           request_status INTEGER,\
                           body_bytes_sent INTEGER,\
                           http_referrer TEXT,\
                           browser TEXT,\
                           raw_log TEXT UNIQUE\
                           )")

            # Insert values into the table
            cursor.execute(f"INSERT INTO web_traffic_logs (\
                            record_created_at,\
                            ip_address,\
                            local_time,\
                            request_type,\
                            request_address,\
                            request_status,\
                            body_bytes_sent,\
                            http_referrer,\
                            browser,\
                            raw_log) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
                           (record_created_at, ip_address, local_time,
                            request_type, request_address,
                            request_status, body_bytes_sent, http_referrer, browser,
                            line))

In [None]:
# Commit changes and close the database connection
conn.commit()
conn.close()

#
file.close()