# Testing performance of queries

In [36]:
import mysql.connector
import numpy as np
import pandas as pd
import time

client = mysql.connector.connect(user='admin', password='admin', host='localhost', port=3306, database='mysql')
mycursor = client.cursor()

x = 10
times = []

In [37]:
def sql_query_result(query,query_name):
    mycursor.execute(query)
    myresult1 = mycursor.fetchall()
    for r in myresult1:
        print(r)
    print("Length of the result for",query_name, ": ", myresult1.__len__())

In [38]:
def sql_query_explain(query):
    explain_query = f"EXPLAIN {query}"
    mycursor.execute(explain_query)
    explanation_result = mycursor.fetchall()
    for row in explanation_result:
        print(row)

In [39]:
def sql_query_avg_time(query,num, query_name):
    times = []
    for i in range(num):
        time_i = time.time()
        mycursor.execute(query)
        myresult1 = mycursor.fetchall()
        time_f = time.time()
        times.append(time_f-time_i)
        
    avgtime = sum(times)/x
    print('No optimization:  avg total time SQL-Query',query_name ,' = ', avgtime)

## Query 3a - 1

Esta consulta calcula o salário máximo médio para cada empresa e, em seguida, ordena os resultados por ordem decrescente do salário máximo médio.

In [40]:
query1 = ("SELECT salary_id, AVG(max_salary) AS avg_max_salary "
          "FROM salaries GROUP BY salary_id ORDER BY avg_max_salary DESC")

#### Result:

In [41]:
sql_query_result(query1,"3a-1")

(4514, 500000.0)
(6612, 450000.0)
(7163, 400000.0)
(4537, 345000.0)
(7174, 271200.0)
(4536, 264000.0)
(1425, 250000.0)
(4549, 250000.0)
(1438, 243300.0)
(1440, 243300.0)
(1416, 234563.0)
(7137, 220000.0)
(7165, 220000.0)
(6628, 218500.0)
(4554, 215000.0)
(6655, 210000.0)
(6643, 205500.0)
(6625, 200000.0)
(7162, 194400.0)
(6654, 191000.0)
(4531, 190000.0)
(4516, 185300.0)
(4545, 185300.0)
(4597, 185300.0)
(4598, 185300.0)
(4599, 185300.0)
(1397, 175200.0)
(1422, 175125.0)
(4520, 170443.40625)
(6657, 165000.0)
(1419, 162250.0)
(1420, 162250.0)
(1421, 162250.0)
(7127, 160000.0)
(4517, 157500.0)
(4526, 157500.0)
(4527, 157500.0)
(4547, 157500.0)
(4566, 157500.0)
(4567, 157500.0)
(4568, 157500.0)
(4600, 157500.0)
(4601, 157500.0)
(4602, 157500.0)
(4603, 157500.0)
(4604, 157500.0)
(7147, 155000.0)
(6623, 151800.0)
(1426, 150000.0)
(7126, 150000.0)
(7141, 145000.0)
(4575, 144811.0)
(7130, 140000.0)
(6645, 139800.0)
(6646, 139000.0)
(4570, 135000.0)
(4504, 133120.0)
(4546, 132800.0)
(4565, 132

#### Explained query

In [42]:
sql_query_explain(query1)

(1, 'SIMPLE', 'salaries', None, 'index', 'PRIMARY,job_id', 'PRIMARY', '8', None, 259, 100.0, 'Using temporary; Using filesort')


#### Average time

In [43]:
sql_query_avg_time(query1,x,"3a-1")

No optimization:  avg total time SQL-Query 3a-1  =  0.003800702095031738


## Query 3a - 2

Esta consulta utiliza uma expressão de tabela comum e a função de janela ROW_NUMBER() para classificar as empresas com base no número de ofertas de emprego em cada localização. O resultado final inclui apenas as empresas com o maior número de ofertas de emprego em cada local.

In [44]:
query2 = ("WITH ranked_postings AS ("
                     "SELECT company_id, location, ROW_NUMBER() OVER (PARTITION BY location ORDER BY COUNT(*) DESC) AS posting_rank "
                     "FROM job_postings GROUP BY company_id, location)"
                     "SELECT company_id, location FROM ranked_postings WHERE posting_rank = 1;")

#### Result:

In [45]:
sql_query_result(query2,"3a-2")

(28615633, 'Aberdeen Proving Ground, MD')
(1013110, 'Aberdeen, MD')
(3706049, 'Aberdeen, WA')
(17226, 'Abilene, TX')
(2700167, 'Abingdon, VA')
(3625, 'Abo, MO')
(115116, 'Acampo, CA')
(25567, 'Acton, MA')
(3570660, 'Ada, MI')
(7064, 'Ada, OK')
(1482552, 'Addison, IL')
(77301, 'Addison, TX')
(27180004, 'Adrian, MI')
(581824, 'Agoura Hills, CA')
(1386950, 'Aiken, SC')
(23182, 'Akron, OH')
(5323150, 'Alabama, United States')
(18506580, 'Alabaster, AL')
(98488356, 'Alachua, FL')
(8437, 'Alamance, NC')
(18924711, 'Alameda County, CA')
(808034, 'Alameda, CA')
(15472, 'Alamo Heights, TX')
(2646, 'Alamosa, CO')
(3558, 'Albany, GA')
(28409, 'Albany, New York Metropolitan Area')
(1302410, 'Albany, NY')
(14542824, 'Albany, OR')
(18009221, 'Albemarle, NC')
(2943678, 'Albuquerque-Santa Fe Metropolitan Area')
(1945473, 'Albuquerque, NM')
(5755, 'Alburtis, PA')
(10854676, 'Alden, NY')
(82153, 'Aledo, TX')
(13309, 'Alexandria City County, VA')
(6062532, 'Alexandria, LA')
(1403, 'Alexandria, VA')
(7730

#### Explained query

In [46]:
sql_query_explain(query2)

(1, 'PRIMARY', '<derived2>', None, 'ref', '<auto_key0>', '<auto_key0>', '8', 'const', 10, 100.0, None)
(2, 'DERIVED', 'job_postings', None, 'ALL', None, None, None, None, 15830, 100.0, 'Using temporary; Using filesort')


#### Average time

In [47]:
sql_query_avg_time(query2,x,"3a-2")

No optimization:  avg total time SQL-Query 3a-2  =  0.22435481548309327


## Query 3b - 1

Esta consulta recupera informações sobre empresas, incluindo o seu ID, nome, número de empregados e o número de ofertas de emprego em que o título contém "er". Também filtra as empresas com mais de 5 ofertas de emprego e ordena os resultados pela contagem de ofertas por ordem decrescente. A utilização de LEFT JOINs garante que as empresas sem entradas correspondentes nas tabelas benefits ou employee_counts continuam a ser incluídas nos resultados.

In [48]:
query3 = ("SELECT c.company_id, c.name as company_name, ec.employee_count, COUNT(*) as job_count "
                     "FROM job_postings jp "
                     "LEFT JOIN companies c ON jp.company_id = c.company_id "
                     "LEFT JOIN benefits b ON jp.job_id = b.job_id "
                     "LEFT JOIN employee_counts ec ON c.company_id = ec.company_id "
                     "WHERE jp.title LIKE '%er' GROUP BY c.company_id, c.name, ec.employee_count "
                     "HAVING job_count > 5 ORDER BY job_count DESC;")

#### Result:

In [49]:
sql_query_result(query3,"3b-1")

(163139, 'Cogent Communications', 2258, 60)
(163139, 'Cogent Communications', 2255, 60)
(163139, 'Cogent Communications', 2250, 60)
(10420321, 'The Mom Project', 897, 43)
(10420321, 'The Mom Project', 902, 43)
(11056, 'Insight Global', 13096, 33)
(11056, 'Insight Global', 13698, 33)
(11056, 'Insight Global', 13787, 33)
(11056, 'Insight Global', 13801, 33)
(11056, 'Insight Global', 13804, 33)
(18860134, 'Jobot', 2148, 31)
(18860134, 'Jobot', 2781, 31)
(18860134, 'Jobot', 2783, 31)
(18860134, 'Jobot', 2774, 31)
(49042, 'Five Star Senior Living', 2882, 26)
(49042, 'Five Star Senior Living', 2905, 26)
(49042, 'Five Star Senior Living', 2904, 26)
(7795, 'Petco', 15497, 25)
(7795, 'Petco', 15848, 25)
(7795, 'Petco', 15841, 25)
(1403, 'Booz Allen Hamilton', 38277, 24)
(1403, 'Booz Allen Hamilton', 35963, 24)
(1403, 'Booz Allen Hamilton', 35961, 24)
(1403, 'Booz Allen Hamilton', 35400, 24)
(77301, "Raising Cane's Chicken Fingers", 10158, 24)
(77301, "Raising Cane's Chicken Fingers", 10861, 24)

#### Explained query

In [50]:
sql_query_explain(query3)

(1, 'SIMPLE', 'jp', None, 'ALL', None, None, None, None, 15830, 11.11, 'Using where; Using temporary; Using filesort')
(1, 'SIMPLE', 'c', None, 'eq_ref', 'PRIMARY', 'PRIMARY', '8', 'mysql.jp.company_id', 1, 100.0, None)
(1, 'SIMPLE', 'b', None, 'ref', 'job_id', 'job_id', '9', 'mysql.jp.job_id', 2, 100.0, 'Using index')
(1, 'SIMPLE', 'ec', None, 'ref', 'company_id', 'company_id', '9', 'mysql.c.company_id', 1, 100.0, None)


#### Average time

In [51]:
sql_query_avg_time(query3,x,"3b-1")

No optimization:  avg total time SQL-Query 3b-1  =  0.11321542263031006


## Query 3b - 2

Essa consulta calcula os valores médio, mínimo e máximo da coluna max_salary da tabela salaries, considerando apenas as linhas em que os anúncios de emprego correspondentes têm um max_salary maior que 5000. O RIGHT JOIN garante que todas as linhas da tabela job_postings sejam incluídas, e as linhas correspondentes da tabela salaries sejam incluídas com valores NULL se não houver correspondência.

In [52]:
query4 = ("SELECT avg(jp.max_salary), min(jp.max_salary), max(jp.max_salary) FROM salaries s "
                     "RIGHT JOIN job_postings jp on s.job_id = jp.job_id "
                     "WHERE jp.max_salary > 5000;")

#### Result:

In [53]:
sql_query_result(query4,"3b-2")

(135784.2010912389, 5474.0, 1100000.0)
Length of the result for 3b-2 :  1


#### Explained query

In [54]:
sql_query_explain(query4)

(1, 'SIMPLE', 'jp', None, 'ALL', None, None, None, None, 15830, 33.33, 'Using where')
(1, 'SIMPLE', 's', None, 'ref', 'job_id', 'job_id', '9', 'mysql.jp.job_id', 1, 100.0, 'Using index')


#### Average time

In [55]:
sql_query_avg_time(query4,x,"3b-2")

No optimization:  avg total time SQL-Query 3b-2  =  0.028089284896850586
