In [None]:
import numpy as np
from dotenv import load_dotenv
import os
from sqlalchemy import create_engine, text
import pandas as pd
import json
from naivebayes import NaiveBayesTextClassifier
from sklearn.metrics import classification_report
from sklearn.feature_extraction.text import CountVectorizer
import time
import math

In [None]:
load_dotenv()

In [None]:
username = os.getenv('SNOWFLAKE_USER')
password = os.getenv('SNOWFLAKE_PASSWORD')
account = os.getenv('SNOWFLAKE_ACCOUNT')
warehouse = os.getenv('SNOWFLAKE_WAREHOUSE')
database = os.getenv('SNOWFLAKE_DATABASE_TPC')
schema = os.getenv('SNOWFLAKE_SCHEMA_TPC')

In [None]:
engine = create_engine(
    f'snowflake://{username}:{password}@{account}/{database}/{schema}?warehouse={warehouse}'
)

In [None]:
Query1 = """
SELECT
    l_returnflag,
    l_linestatus,
    SUM(l_quantity) AS sum_qty,
    SUM(l_extendedprice) AS sum_base_price,
    SUM(l_extendedprice * (1 - l_discount)) AS sum_disc_price,
    SUM(l_extendedprice * (1 - l_discount) * (1 + l_tax)) AS sum_charge,
    AVG(l_quantity) AS avg_qty,
    AVG(l_extendedprice) AS avg_price,
    AVG(l_discount) AS avg_disc,
    COUNT(*) AS count_order
FROM
    lineitem
WHERE
    l_shipdate <= DATEADD(DAY, -90, DATE '1998-12-01')
GROUP BY
    l_returnflag,
    l_linestatus
ORDER BY
    l_returnflag,
    l_linestatus;
"""


In [None]:
Query5 = """
SELECT
    n_name,
    SUM(l_extendedprice * (1 - l_discount)) AS revenue
FROM
    customer,
    orders,
    lineitem,
    supplier,
    nation,
    region
WHERE
    c_custkey = o_custkey
    AND l_orderkey = o_orderkey
    AND l_suppkey = s_suppkey
    AND c_nationkey = s_nationkey
    AND s_nationkey = n_nationkey
    AND n_regionkey = r_regionkey
    AND r_name = 'ASIA'
    AND o_orderdate >= DATE '1994-01-01'
    AND o_orderdate < DATEADD(YEAR, 1, DATE '1994-01-01')
GROUP BY
    n_name
ORDER BY
    revenue DESC;
"""


In [None]:
Query18 = """
SELECT
    c_name,
    c_custkey,
    o_orderkey,
    o_orderdate,
    o_totalprice,
    SUM(l_quantity) AS total_quantity
FROM
    customer,
    orders,
    lineitem
WHERE
    o_orderkey IN (
        SELECT
            l_orderkey
        FROM
            lineitem
        GROUP BY
            l_orderkey
        HAVING
            SUM(l_quantity) > 300
    )
    AND c_custkey = o_custkey
    AND o_orderkey = l_orderkey
GROUP BY
    c_name,
    c_custkey,
    o_orderkey,
    o_orderdate,
    o_totalprice
ORDER BY
    o_totalprice DESC,
    o_orderdate;
"""


In [None]:
def query_run(query, number):
    profiling_folder = os.path.join('results', f'query_{number}')
    os.makedirs(profiling_folder, exist_ok=True)
    execution_times = []  # Initialize execution_times list
    
    try:
        with engine.connect() as connection:
            for i in range(100):
                start_time = time.time()
                result = connection.execute(text(query))
                rows = result.fetchall()  # Optional: Remove if rows aren't needed
                end_time = time.time()
                elapsed_time = end_time - start_time
                execution_times.append(elapsed_time)
                print(f"Iteration {i + 1}: Query executed in {elapsed_time:.4f} seconds.")
    except Exception as e:
        print(f"Error executing query: {e}")
        return  # Exit function if query execution fails

    file_path = os.path.join(profiling_folder, "execution_times.txt")
    try:
        with open(file_path, "w") as file:
            for exec_time in execution_times:
                file.write(f"{exec_time:.4f}\n")
        print(f"Execution times saved to {file_path}")
    except Exception as e:
        print(f"Error saving execution times to file: {e}")


In [None]:
query_run(Query1,'1')

In [None]:
query_run(Query5,'5')

In [None]:
query_run(Query18,'18')