# Testing performance of queries

In [3]:
import mysql.connector
import numpy as np
import pandas as pd
import time
from pymongo import MongoClient
import pprint


mongo_client = MongoClient('mongodb://root:rootpassword@localhost:27017/admin')
db = mongo_client.admin
salaries_collection = db.salaries
jobs_collection = db.job_postings


client = mysql.connector.connect(user='admin', password='admin', host='localhost', port=3306, database='mysql')
mycursor = client.cursor()


x = 10
times = []

In [4]:
def sql_query_result(query,query_name):
    time_i = time.time()
    mycursor.execute(query)
    time_f = time.time()
    myresult1 = mycursor.fetchall()
    for r in myresult1:
        print(r)
    print("Length of the result for",query_name, ": ", myresult1.__len__())
    print("SQL Execution time:", (time_f-time_i))
    
def mongo_query_result(query,collection,query_name):
    time_i = time.time()
    doc = collection.aggregate(query)
    time_f = time.time()
    result = list(doc)
    for row in result:
        print(row)
    print("Length of the result for",query_name, ": ", result.__len__())
    print("Mongo Execution time:", (time_f-time_i))

In [5]:
def sql_query_explain(query):
    explain_query = f"EXPLAIN {query}"
    mycursor.execute(explain_query)
    explanation_result = mycursor.fetchall()
    for row in explanation_result:
        print(row)
        
def mongo_query_explain(query,collection):
    pprint.pprint(collection.aggregate(query).explain())

In [6]:
def sql_query_avg_time(query,num, query_name):
    times = []
    for i in range(num):
        time_i = time.time()
        mycursor.execute(query)
        mycursor.fetchall()
        time_f = time.time()
        times.append(time_f-time_i)
        
    avgtime = sum(times)/x
    print('No optimization:  avg total time SQL-Query',query_name ,' = ', avgtime)
    
def mongo_query_avg_time(query,collection,num,query_name):
    times = []
    for i in range(num):
        time_i = time.time()
        doc = collection.aggregate(query)
        time_f = time.time()
        times.append(time_f-time_i)
        
    avgtime = sum(times)/x
    print('No optimization:  avg total time Mongo-Query',query_name ,' = ', avgtime)
        

## Query 3a - 1

Esta consulta calcula o salário máximo médio para cada empresa e, em seguida, ordena os resultados por ordem decrescente do salário máximo médio.

In [7]:
name = "3a-1"

sql_query1 = ("SELECT salary_id, AVG(max_salary) AS avg_max_salary "
          "FROM salaries GROUP BY salary_id ORDER BY avg_max_salary DESC")

mongo_query1 = [{"$group": {
    "_id": "$salary_id",
    "avg_max_salary": {"$avg": "$max_salary"}
}
},
    {"$sort": {"avg_max_salary": -1}
     }]

#### Result:

In [8]:
sql_query_result(sql_query1,name)

(4514, 500000.0)
(6612, 450000.0)
(7163, 400000.0)
(4537, 345000.0)
(7174, 271200.0)
(4536, 264000.0)
(1425, 250000.0)
(4549, 250000.0)
(1438, 243300.0)
(1440, 243300.0)
(1416, 234563.0)
(7137, 220000.0)
(7165, 220000.0)
(6628, 218500.0)
(4554, 215000.0)
(6655, 210000.0)
(6643, 205500.0)
(6625, 200000.0)
(7162, 194400.0)
(6654, 191000.0)
(4531, 190000.0)
(4516, 185300.0)
(4545, 185300.0)
(4597, 185300.0)
(4598, 185300.0)
(4599, 185300.0)
(1397, 175200.0)
(1422, 175125.0)
(4520, 170443.40625)
(6657, 165000.0)
(1419, 162250.0)
(1420, 162250.0)
(1421, 162250.0)
(7127, 160000.0)
(4517, 157500.0)
(4526, 157500.0)
(4527, 157500.0)
(4547, 157500.0)
(4566, 157500.0)
(4567, 157500.0)
(4568, 157500.0)
(4600, 157500.0)
(4601, 157500.0)
(4602, 157500.0)
(4603, 157500.0)
(4604, 157500.0)
(7147, 155000.0)
(6623, 151800.0)
(1426, 150000.0)
(7126, 150000.0)
(7141, 145000.0)
(4575, 144811.0)
(7130, 140000.0)
(6645, 139800.0)
(6646, 139000.0)
(4570, 135000.0)
(4504, 133120.0)
(4546, 132800.0)
(4565, 132

In [9]:
mongo_query_result(mongo_query1,salaries_collection,name)

{'_id': 4514, 'avg_max_salary': 500000.0}
{'_id': 6612, 'avg_max_salary': 450000.0}
{'_id': 7163, 'avg_max_salary': 400000.0}
{'_id': 4537, 'avg_max_salary': 345000.0}
{'_id': 7174, 'avg_max_salary': 271200.0}
{'_id': 4536, 'avg_max_salary': 264000.0}
{'_id': 1425, 'avg_max_salary': 250000.0}
{'_id': 4549, 'avg_max_salary': 250000.0}
{'_id': 1440, 'avg_max_salary': 243300.0}
{'_id': 1438, 'avg_max_salary': 243300.0}
{'_id': 1416, 'avg_max_salary': 234563.0}
{'_id': 7137, 'avg_max_salary': 220000.0}
{'_id': 7165, 'avg_max_salary': 220000.0}
{'_id': 6628, 'avg_max_salary': 218500.0}
{'_id': 4554, 'avg_max_salary': 215000.0}
{'_id': 6655, 'avg_max_salary': 210000.0}
{'_id': 6643, 'avg_max_salary': 205500.0}
{'_id': 6625, 'avg_max_salary': 200000.0}
{'_id': 7162, 'avg_max_salary': 194400.0}
{'_id': 6654, 'avg_max_salary': 191000.0}
{'_id': 4531, 'avg_max_salary': 190000.0}
{'_id': 4597, 'avg_max_salary': 185300.0}
{'_id': 4516, 'avg_max_salary': 185300.0}
{'_id': 4599, 'avg_max_salary': 18

#### Explained query

In [10]:
sql_query_explain(sql_query1)

(1, 'SIMPLE', 'salaries', None, 'index', 'PRIMARY,job_id', 'PRIMARY', '8', None, 259, 100.0, 'Using temporary; Using filesort')


In [11]:
#mongo_query_explain(mongo_query1,salaries_collection)

#### Average time

In [12]:
sql_query_avg_time(sql_query1,x,name)

No optimization:  avg total time SQL-Query 3a-1  =  0.0018000602722167969


In [13]:
mongo_query_avg_time(mongo_query1,salaries_collection,x,name)

No optimization:  avg total time Mongo-Query 3a-1  =  0.010264849662780762


## Query 3a - 2

Esta consulta utiliza uma expressão de tabela comum e a função de janela ROW_NUMBER() para classificar as empresas com base no número de ofertas de emprego em cada localização. O resultado final inclui apenas as empresas com o maior número de ofertas de emprego em cada local.

In [14]:
name = "3a-2"
sql_query2 = ("WITH ranked_postings AS ("
                     "SELECT company_id, location, ROW_NUMBER() OVER (PARTITION BY location ORDER BY COUNT(*) DESC) AS posting_rank "
                     "FROM job_postings GROUP BY company_id, location)"
                     "SELECT company_id, location FROM ranked_postings WHERE posting_rank = 1;")


mongo_query2 = [
    {"$group": {"_id": {"company_id": "$company_id", "location": "$location"},
                "count": {"$sum": 1}
                }
     },
    {"$sort": {"_id.location": 1, "count": -1}},
    {"$group": {"_id": "$_id.location",
                "topCompany": {"$first": "$_id.company_id"}}
     },
    {"$project": {"_id": 0, "company_id": "$topCompany", "location": "$_id"}}
]

#### Result:

In [None]:
sql_query_result(sql_query2, name)

In [None]:
mongo_query_result(mongo_query2,jobs_collection,name)

#### Explained query

In [2]:
sql_query_explain(sql_query2)


sql_query2_index_query = "CREATE INDEX idx_company_location ON job_postings (company_id, location)"
sql_query_result(sql_query2_index_query, name)

sql_query_explain(sql_query2)

NameError: name 'sql_query_explain' is not defined

In [None]:
#mongo_query_explain(mongo_query2,jobs_collection)

#### Average time

In [None]:
sql_query_avg_time(sql_query2,x,name)

No optimization:  avg total time SQL-Query 3a-2  =  0.29535412788391113


In [None]:
mongo_query_avg_time(mongo_query2,jobs_collection,x,name)

No optimization:  avg total time Mongo-Query 3a-2  =  0.06385385990142822


## Query 3b - 1

Esta consulta recupera informações sobre empresas, incluindo o seu ID, nome, número de empregados e o número de ofertas de emprego em que o título contém "er". Também filtra as empresas com mais de 5 ofertas de emprego e ordena os resultados pela contagem de ofertas por ordem decrescente. A utilização de LEFT JOINs garante que as empresas sem entradas correspondentes nas tabelas benefits ou employee_counts continuam a ser incluídas nos resultados.

In [None]:
name ="3b-1"

sql_query3 = ("SELECT c.company_id, c.name as company_name, ec.employee_count, COUNT(*) as job_count "
                     "FROM job_postings jp "
                     "LEFT JOIN companies c ON jp.company_id = c.company_id "
                     "LEFT JOIN benefits b ON jp.job_id = b.job_id "
                     "LEFT JOIN employee_counts ec ON c.company_id = ec.company_id "
                     "WHERE jp.title LIKE '%er' GROUP BY c.company_id, c.name, ec.employee_count "
                     "HAVING job_count > 5 ORDER BY job_count DESC;")

mongo_query3 = [
    {"$match": {'title': {'$regex': 'er$'}}},
    {'$lookup': {'from': "companies",
                 'localField': "company_id",
                 'foreignField': "company_id",
                 'as': "company"}},
    {'$unwind': "$company"},
    {'$lookup': {'from': "benefits",
                 'localField': "job_id",
                 'foreignField': "job_id",
                 'as': "benefits"}},
    {'$lookup': {'from': "employee_counts",
                 'localField': "company.company_id",
                 'foreignField': "company_id",
                 'as': "employee_counts"}},
    {'$group': {'_id': {'company_id': "$company.company_id",
                        'company_name': "$company.name",
                        'employee_count': {'$ifNull': ["$employee_counts.employee_count", 0]}
                        },
                'job_count': {'$sum': 1}}},
    {'$match': {'job_count': {'$gt': 5}}},
    {'$sort': {'job_count': -1}},
    {'$project': {'_id': 0,
                  'company_id': "$_id.company_id",
                  'company_name': "$_id.company_name",
                  'employee_count': "$_id.employee_count",
                  'job_count': "$job_count"}}
]

#### Result:

In [None]:
sql_query_result(sql_query3,name)

(163139, 'Cogent Communications', 2258, 60)
(163139, 'Cogent Communications', 2255, 60)
(163139, 'Cogent Communications', 2250, 60)
(10420321, 'The Mom Project', 897, 43)
(10420321, 'The Mom Project', 902, 43)
(11056, 'Insight Global', 13096, 33)
(11056, 'Insight Global', 13698, 33)
(11056, 'Insight Global', 13787, 33)
(11056, 'Insight Global', 13801, 33)
(11056, 'Insight Global', 13804, 33)
(18860134, 'Jobot', 2148, 31)
(18860134, 'Jobot', 2781, 31)
(18860134, 'Jobot', 2783, 31)
(18860134, 'Jobot', 2774, 31)
(49042, 'Five Star Senior Living', 2882, 26)
(49042, 'Five Star Senior Living', 2905, 26)
(49042, 'Five Star Senior Living', 2904, 26)
(7795, 'Petco', 15497, 25)
(7795, 'Petco', 15848, 25)
(7795, 'Petco', 15841, 25)
(1403, 'Booz Allen Hamilton', 38277, 24)
(1403, 'Booz Allen Hamilton', 35963, 24)
(1403, 'Booz Allen Hamilton', 35961, 24)
(1403, 'Booz Allen Hamilton', 35400, 24)
(77301, "Raising Cane's Chicken Fingers", 10158, 24)
(77301, "Raising Cane's Chicken Fingers", 10861, 24)

In [None]:
mongo_query_result(mongo_query3,jobs_collection,name)

{'company_id': 163139, 'company_name': 'Cogent Communications', 'employee_count': [2258, 2255, 2250], 'job_count': 59}
{'company_id': 10420321, 'company_name': 'The Mom Project', 'employee_count': [897, 902], 'job_count': 35}
{'company_id': 11056, 'company_name': 'Insight Global', 'employee_count': [13096, 13698, 13787, 13801, 13804], 'job_count': 30}
{'company_id': 18860134, 'company_name': 'Jobot', 'employee_count': [2148, 2781, 2783, 2774], 'job_count': 27}
{'company_id': 49042, 'company_name': 'Five Star Senior Living', 'employee_count': [2882, 2905, 2904], 'job_count': 26}
{'company_id': 7795, 'company_name': 'Petco', 'employee_count': [15497, 15848, 15841], 'job_count': 25}
{'company_id': 77301, 'company_name': "Raising Cane's Chicken Fingers", 'employee_count': [10158, 10861, 10863], 'job_count': 24}
{'company_id': 1344, 'company_name': 'Honeywell', 'employee_count': [127293, 128397, 128388, 127620], 'job_count': 23}
{'company_id': 1103, 'company_name': 'Verizon', 'employee_coun

#### Explained query

In [None]:
sql_query_explain(sql_query3)

(1, 'SIMPLE', 'jp', None, 'ALL', None, None, None, None, 17047, 11.11, 'Using where; Using temporary; Using filesort')
(1, 'SIMPLE', 'c', None, 'eq_ref', 'PRIMARY', 'PRIMARY', '8', 'mysql.jp.company_id', 1, 100.0, None)
(1, 'SIMPLE', 'b', None, 'ref', 'job_id', 'job_id', '9', 'mysql.jp.job_id', 2, 100.0, 'Using index')
(1, 'SIMPLE', 'ec', None, 'ref', 'company_id', 'company_id', '9', 'mysql.c.company_id', 1, 100.0, None)


In [None]:
mongo_query_explain(mongo_query3,jobs_collection)

KeyboardInterrupt: 

#### Average time

In [None]:
sql_query_avg_time(sql_query3,x,name)

No optimization:  avg total time SQL-Query 3b-1  =  0.13926100730895996


In [None]:
# WARNING! TAKES A LONG TIME TO COMPUTE 
mongo_query_avg_time(mongo_query3,jobs_collection,x,name)

## Query 3b - 2

Essa consulta calcula os valores médio, mínimo e máximo da coluna max_salary da tabela salaries, considerando apenas as linhas em que os anúncios de emprego correspondentes têm um max_salary maior que 5000. O RIGHT JOIN garante que todas as linhas da tabela job_postings sejam incluídas, e as linhas correspondentes da tabela salaries sejam incluídas com valores NULL se não houver correspondência.

In [None]:
name = "3b-2"
sql_query4 = ("SELECT avg(jp.max_salary), min(jp.max_salary), max(jp.max_salary) FROM salaries s "
                     "RIGHT JOIN job_postings jp on s.job_id = jp.job_id "
                     "WHERE jp.max_salary > 5000;")

mongo_query4 = [
    {'$lookup': {
            'from': "job_postings",
            'localField': "job_id",
            'foreignField': "job_id",
            'as': "job_posting"
                }
    },
    {'$unwind': "$job_posting"},
    {'$match': { "job_posting.max_salary": { '$gt': 5000 } }},
    {'$group': {
            '_id': None,
            'avg_max_salary': { '$avg': "$max_salary" },
            'min_max_salary': { '$min': "$max_salary" },
            'max_max_salary': { '$max': "$max_salary" }
                }
    }]

#### Result:

In [None]:
sql_query_result(sql_query4,name)

(135784.2010912389, 5474.0, 1100000.0)
Length of the result for 3b-2 :  1
Execution time: 0.038593292236328125


In [None]:
#mongo_query_result(mongo_query4,salaries_collection,name)

NameError: name 'mongo_query_result' is not defined

#### Explained query

In [None]:
sql_query_explain(sql_query4)

(1, 'SIMPLE', 'jp', None, 'ALL', None, None, None, None, 15830, 33.33, 'Using where')
(1, 'SIMPLE', 's', None, 'ref', 'job_id', 'job_id', '9', 'mysql.jp.job_id', 1, 100.0, 'Using index')


In [None]:
mongo_query_explain(mongo_query4,salaries_collection)

#### Average time

In [None]:
sql_query_avg_time(sql_query4,x,name)

No optimization:  avg total time SQL-Query 3b-2  =  0.01907923221588135


In [None]:
mongo_query_avg_time(mongo_query4,salaries_collection,x,name)

In [None]:
#FOR TESTING !

query_aux = ("SELECT * FROM job_postings jp "
             "LEFT JOIN benefits b ON jp.job_id = b.job_id "
             "WHERE jp.company_id = 10420321 and jp.title LIKE '%er' ")

sql_query_result(query_aux,"aux")

(3697384953, 10420321, 'Quality Assurance Engineer', 85.29, 0.0, 78.68, 'HOURLY', 'Contract', 'Atlanta, GA', 34, 1690000000000.0, 0, 124, 'https://www.linkedin.com/jobs/view/3697384953/?trk=jobs_biz_prem_srch', 'https://themomproject.com/projects/quality-assurance-engineer-8f9883d7ca?utm_campaign=quality-assurance-engineer-8f9883d7ca&utm_medium=posting&utm_source=LinkedIn', 'OffsiteApply', 1700000000000, 0, 'Entry level', '0', 1690000000000, 'themomproject.com', 0, 'CONTRACT', 'USD', 'BASE_SALARY', '1', 3697384953, 1, 'Commuter benefits')
(3697384966, 10420321, 'Clinical Nurse Manager', 0.0, 0.0, 0.0, '0', 'Contract', 'Kearny, NJ', 0, 1690000000000.0, 0, 4, 'https://www.linkedin.com/jobs/view/3697384966/?trk=jobs_biz_prem_srch', 'https://themomproject.com/projects/clinical-nurse-manager-cf4e02a6db?utm_campaign=clinical-nurse-manager-cf4e02a6db&utm_medium=posting&utm_source=LinkedIn', 'OffsiteApply', 1700000000000, 0, 'Mid-Senior level', '0', 1690000000000, 'themomproject.com', 0, 'CONT

### UPDATE

Esta consulta atualiza até 10 linhas na tabela benefits, alterando o valor da coluna type de 'Medical insurance' para 'test'. O LIMIT 10 garante que apenas um máximo de 10 linhas sejam atualizadas, mesmo que haja mais linhas que satisfaçam a condição.

In [None]:
sql_update = "UPDATE benefits SET type = 'test' WHERE type = 'Medical insurance' LIMIT 10;"
time_i = time.time()
mycursor.execute(sql_update)
resu = mycursor.fetchall()
time_f = time.time()

print("SQL Update time:", (time_f-time_i))

time_i = time.time()
mongo_update = db.benefits.updateMany({ type: 'Medical insurance' },
                                       {'$set': { type: 'test' } })
time_f = time.time()

print("Mongo Update time:", (time_f-time_i))


### INSERTION

In [None]:
sql_insertion = ("INSERT INTO companies(company_id, name, description, company_size, state, country, city, zip_code, address, url) "
                 "VALUES (1, 'Empresas Empresas', 'Fazemos tudo e mais alguma coisa', 15, 'CA', 'USA', 'Los Angeles', '2625-136', 'Rua 29 de Fevereiro', 'https://www.example.com');")

mongo_insertion = db.companies.insertOne({
    'company_id': 1,
    'name': 'Empresas Empresas',
    'description': 'Fazemos tudo e mais alguma coisa',
    'company_size': 15,
    'state': 'CA',
    'country': 'USA',
    'city': 'Los Angeles',
    'zip_code': '2625-136',
    'address': 'Rua 29 de Fevereiro',
    'url': 'https://www.example.com'
})