In [1]:
import mysql.connector

# Connect to MySQL
connection = mysql.connector.connect(
    host="localhost",
    user="your_username",
    password="your_password"
)

# Create a database
cursor = connection.cursor()
cursor.execute("CREATE DATABASE website_traffic")

import mysql.connector
import random
from datetime import datetime, timedelta

# Connect to the MySQL database
connection = mysql.connector.connect(
    host="localhost",
    user="your_username",
    password="your_password",
    database="website_traffic"
)

# Create a table for website traffic
cursor = connection.cursor()
cursor.execute("""
    CREATE TABLE IF NOT EXISTS traffic (
        id INT AUTO_INCREMENT PRIMARY KEY,
        timestamp DATETIME,
        pageviews INT,
        unique_visitors INT
    )
""")

# Generate and insert synthetic data
for _ in range(100):
    timestamp = datetime.now() - timedelta(minutes=random.randint(1, 60))
    pageviews = random.randint(100, 1000)
    unique_visitors = random.randint(50, 200)
    cursor.execute("INSERT INTO traffic (timestamp, pageviews, unique_visitors) VALUES (%s, %s, %s)",
                   (timestamp, pageviews, unique_visitors))

# Commit changes and close the connection
connection.commit()
connection.close()

In [2]:
pip install apache-airflow

In [3]:
airflow initdb
airflow webserver -p 8080
airflow scheduler

In [4]:
from airflow import DAG
from airflow.operators.python_operator import PythonOperator
from datetime import datetime, timedelta
import mysql.connector
import random
from datetime import datetime, timedelta

# Define default_args and DAG
default_args = {
    'owner': 'your_name',
    'depends_on_past': False,
    'start_date': datetime(2023, 1, 1),
    'retries': 1,
    'retry_delay': timedelta(minutes=5),
}

dag = DAG(
    'website_traffic_etl',
    default_args=default_args,
    description='ETL process for website traffic data',
    schedule_interval=timedelta(minutes=15),  # Run every 15 minutes
    catchup=False,
)

# ETL Function
def etl():
    connection = mysql.connector.connect(
        host="localhost",
        user="your_username",
        password="your_password",
        database="website_traffic"
    )
    cursor = connection.cursor()

    # Generate and insert synthetic data
    timestamp = datetime.now()
    pageviews = random.randint(100, 1000)
    unique_visitors = random.randint(50, 200)
    cursor.execute("INSERT INTO traffic (timestamp, pageviews, unique_visitors) VALUES (%s, %s, %s)",
                   (timestamp, pageviews, unique_visitors))

    # Commit changes and close the connection
    connection.commit()
    connection.close()

# Define the ETL task
etl_task = PythonOperator(
    task_id='etl_task',
    python_callable=etl,
    dag=dag,
)

# Set task dependencies (if needed)
# etl_task.set_upstream(...)
# ...

if __name__ == "__main__":
    dag.cli()

In [5]:
pip install apache-superset

In [6]:
superset db upgrade

In [7]:
superset fab create-admin

In [8]:
superset load_examples

In [9]:
superset create-admin

In [10]:
superset run -p 8088

In [12]:
from superset import app, db
from superset.connectors.connector_registry import ConnectorRegistry
from superset.models.core import Database
from superset.models.slice import Slice
from sqlalchemy.orm import sessionmaker
import datetime

def create_superset_chart():
    # Initialize Superset app
    app.config.from_mapping(
        SQLALCHEMY_DATABASE_URI='mysql://your_username:your_password@localhost/website_traffic',
    )
    with app.app_context():
        db.init_app(app)
        # Create a new database instance
        database = Database(
            database_name='website_traffic',
            sqlalchemy_uri='mysql://your_username:your_password@localhost/website_traffic',
        )
        session = sessionmaker(autocommit=False, autoflush=False, bind=db.engine)()
        session.add(database)
        session.commit()

        # Create a new chart
        chart = Slice(
            slice_name='Real-Time Traffic',
            datasource_type='table',
            datasource_id=1,  # Your table's ID
            viz_type='line',
            params={
                'time_range_endpoints': ['inclusive', 'exclusive'],
                'granularity_sqla': 'timestamp',
                'since': '7 days ago',
                'until': 'now',
                'metrics': ['count'],
                'groupby': ['timestamp'],
            },
        )
        session.add(chart)
        session.commit()

if __name__ == '__main__':
    create_superset_chart()