# Testing performance of queries

In [None]:
import mysql.connector
import numpy as np
import pandas as pd
import time
from pymongo import MongoClient
import pprint


mongo_client = MongoClient('mongodb://root:rootpassword@localhost:27017/admin')
db = mongo_client.admin
salaries_collection = db.salaries
jobs_collection = db.job_postings


client = mysql.connector.connect(user='admin', password='admin', host='localhost', port=3306, database='mysql')
mycursor = client.cursor()


x = 10
times = []

In [None]:
def sql_query_result(query,query_name):
    time_i = time.time()
    mycursor.execute(query)
    time_f = time.time()
    myresult1 = mycursor.fetchall()
    for r in myresult1:
        print(r)
    print("Length of the result for",query_name, ": ", myresult1.__len__())
    print("SQL Execution time:", (time_f-time_i))
    
def mongo_query_result(query,collection,query_name):
    time_i = time.time()
    doc = collection.aggregate(query)
    time_f = time.time()
    result = list(doc)
    for row in result:
        print(row)
    print("Length of the result for",query_name, ": ", result.__len__())
    print("Mongo Execution time:", (time_f-time_i))

In [None]:
def sql_query_explain(query):
    explain_query = f"EXPLAIN {query}"
    mycursor.execute(explain_query)
    explanation_result = mycursor.fetchall()
    for row in explanation_result:
        print(row)
        
def mongo_query_explain(query,collection):
    pprint.pprint(collection.aggregate(query).explain())

In [None]:
def sql_query_avg_time(query,num, query_name):
    times = []
    for i in range(num):
        time_i = time.time()
        mycursor.execute(query)
        mycursor.fetchall()
        time_f = time.time()
        times.append(time_f-time_i)
        
    avgtime = sum(times)/x
    print('No optimization:  avg total time SQL-Query',query_name ,' = ', avgtime)
    
def mongo_query_avg_time(query,collection,num,query_name):
    times = []
    for i in range(num):
        time_i = time.time()
        doc = collection.aggregate(query)
        time_f = time.time()
        times.append(time_f-time_i)
        
    avgtime = sum(times)/x
    print('No optimization:  avg total time Mongo-Query',query_name ,' = ', avgtime)
        

## Query 3a - 1

Esta consulta calcula o salário máximo médio para cada empresa e, em seguida, ordena os resultados por ordem decrescente do salário máximo médio.

In [None]:
name = "3a-1"

sql_query1 = ("SELECT salary_id, AVG(max_salary) AS avg_max_salary "
          "FROM salaries GROUP BY salary_id ORDER BY avg_max_salary DESC")

mongo_query1 = [{"$group": {
    "_id": "$salary_id",
    "avg_max_salary": {"$avg": "$max_salary"}
}
},
    {"$sort": {"avg_max_salary": -1}
     }]

sqlquery_index = "CREATE index index_salary ON salaries(salary_id,max_salary)"

drop_index = "DROP INDEX index_salary ON salaries;"

#mongoquery_index = salaries_collection.create_index([ ("salary_id",1), ("max_salary",1])
#salary_id and 

#### Result:

In [None]:
#ONLY RUN IF INDEX EXISTS
mycursor.execute(drop_index)

In [None]:
sql_query_result(sql_query1,name)
sql_query_explain(sql_query1)

In [None]:
print()
mycursor.execute(sqlquery_index)
print("After optimization")
sql_query_result(sql_query1,name)
sql_query_explain(sql_query1)

In [None]:
mongo_query_result(mongo_query1,salaries_collection,name)

#### Explained query

In [None]:
sql_query_explain(sql_query1)

In [None]:
#mongo_query_explain(mongo_query1,salaries_collection)

#### Average time

In [None]:
sql_query_avg_time(sql_query1,x,name)

In [None]:
mongo_query_avg_time(mongo_query1,salaries_collection,x,name)

In [None]:

mycursor.execute(sqlquery_index)
sqlShowIndexes = "show index from salaries"
mycursor.execute(sqlShowIndexes)
indexList = mycursor.fetchall()

# Printing the list of indexes on the table cluster
print()


In [None]:
sql_query_result(sql_query1,name)
sql_query_explain(sql_query1)

## Query 3a - 2

Esta consulta utiliza uma expressão de tabela comum e a função de janela ROW_NUMBER() para classificar as empresas com base no número de ofertas de emprego em cada localização. O resultado final inclui apenas as empresas com o maior número de ofertas de emprego em cada local.

In [16]:
name = "3a-2"
sql_query2 = ("WITH ranked_postings AS ("
                     "SELECT company_id, location, ROW_NUMBER() OVER (PARTITION BY location ORDER BY COUNT(*) DESC) AS posting_rank "
                     "FROM job_postings GROUP BY company_id, location)"
                     "SELECT company_id, location FROM ranked_postings WHERE posting_rank = 1;")

sql_query2_index = "CREATE INDEX location_index ON job_postings (location(255));"


# mongo

index_definition = [
    ("location", 1),
    ("count", -1)
]
jobs_collection.create_index(index_definition)

mongo_query2 = [
    {"$group": {"_id": {"company_id": "$company_id", "location": "$location"},
                "count": {"$sum": 1}
                }
     },
    {"$sort": {"_id.location": 1, "count": -1}},
    {"$group": {"_id": "$_id.location",
                "topCompany": {"$first": "$_id.company_id"}}
     },
    {"$project": {"_id": 0, "company_id": "$topCompany", "location": "$_id"}}
]

#### Result:

In [None]:
sql_query_result(sql_query2,name)

In [None]:
mongo_query_result(mongo_query2,jobs_collection,name)

#### Explained query

In [None]:
sql_query_explain(sql_query2)

In [17]:
mycursor.execute(sql_query2_index)
print("After optimization")
sql_query_result(sql_query2,name)
sql_query_explain(sql_query2)

ProgrammingError: 1072 (42000): Key column 'count' doesn't exist in table

In [None]:
#mongo_query_explain(mongo_query2,jobs_collection)

#### Average time

In [None]:
sql_query_avg_time(sql_query2,x,name)

In [None]:
mongo_query_avg_time(mongo_query2,jobs_collection,x,name)

## Query 3b - 1

Esta consulta recupera informações sobre empresas, incluindo o seu ID, nome, número de empregados e o número de ofertas de emprego em que o título contém "er". Também filtra as empresas com mais de 5 ofertas de emprego e ordena os resultados pela contagem de ofertas por ordem decrescente. A utilização de LEFT JOINs garante que as empresas sem entradas correspondentes nas tabelas benefits ou employee_counts continuam a ser incluídas nos resultados.

In [None]:
name ="3b-1"

sql_query3 = ("SELECT c.company_id, c.name as company_name, ec.employee_count, COUNT(*) as job_count "
                     "FROM job_postings jp "
                     "LEFT JOIN companies c ON jp.company_id = c.company_id "
                     "LEFT JOIN employee_counts ec ON c.company_id = ec.company_id "
                     "WHERE jp.title LIKE '%er' GROUP BY c.company_id, c.name, ec.employee_count "
                     "HAVING job_count > 5 ORDER BY job_count DESC;")

mongo_query3 = [
    {"$match": {'title': {'$regex': 'er$'}}},
    {'$lookup': {'from': "companies",
                 'localField': "company_id",
                 'foreignField': "company_id",
                 'as': "company"}},
    {'$unwind': "$company"},
    {'$lookup': {'from': "benefits",
                 'localField': "job_id",
                 'foreignField': "job_id",
                 'as': "benefits"}},
    {'$lookup': {'from': "employee_counts",
                 'localField': "company.company_id",
                 'foreignField': "company_id",
                 'as': "employee_counts"}},
    {'$group': {'_id': {'company_id': "$company.company_id",
                        'company_name': "$company.name",
                        'employee_count': {'$ifNull': ["$employee_counts.employee_count", 0]}
                        },
                'job_count': {'$sum': 1}}},
    {'$match': {'job_count': {'$gt': 5}}},
    {'$sort': {'job_count': -1}},
    {'$project': {'_id': 0,
                  'company_id': "$_id.company_id",
                  'company_name': "$_id.company_name",
                  'employee_count': "$_id.employee_count",
                  'job_count': "$job_count"}}
]

#### Result:

In [None]:
sql_query_result(sql_query3,name)

In [None]:
mongo_query_result(mongo_query3,jobs_collection,name)

#### Explained query

In [None]:
sql_query_explain(sql_query3)

In [None]:
#mongo_query_explain(mongo_query3,jobs_collection)

#### Average time

In [None]:
sql_query_avg_time(sql_query3,x,name)

In [None]:
# WARNING! TAKES A LONG TIME TO COMPUTE 
mongo_query_avg_time(mongo_query3,jobs_collection,x,name)

## Query 3b - 2

Essa consulta calcula os valores médio, mínimo e máximo da coluna max_salary da tabela salaries, considerando apenas as linhas em que os anúncios de emprego correspondentes têm um max_salary maior que 5000. O RIGHT JOIN garante que todas as linhas da tabela job_postings sejam incluídas, e as linhas correspondentes da tabela salaries sejam incluídas com valores NULL se não houver correspondência.

In [None]:
name = "3b-2"
sql_query4 = ("SELECT avg(jp.max_salary), min(jp.max_salary), max(jp.max_salary) FROM salaries s "
                     "RIGHT JOIN job_postings jp on s.job_id = jp.job_id "
                     "WHERE jp.max_salary > 5000;")

mongo_query4 = [
    {'$lookup': {
            'from': "job_postings",
            'localField': "job_id",
            'foreignField': "job_id",
            'as': "job_posting"
                }
    },
    {'$unwind': "$job_posting"},
    {'$match': { "job_posting.max_salary": { '$gt': 5000 } }},
    {'$group': {
            '_id': None,
            'avg_max_salary': { '$avg': "$max_salary" },
            'min_max_salary': { '$min': "$max_salary" },
            'max_max_salary': { '$max': "$max_salary" }
                }
    }]

#### Result:

In [None]:
sql_query_result(sql_query4,name)

In [None]:
mongo_query_result(mongo_query4,salaries_collection,name)

#### Explained query

In [None]:
sql_query_explain(sql_query4)

In [None]:
#mongo_query_explain(mongo_query4,salaries_collection)

#### Average time

In [None]:
sql_query_avg_time(sql_query4,x,name)

In [None]:
mongo_query_avg_time(mongo_query4,salaries_collection,x,name)

In [None]:
#FOR TESTING !

query_aux = ("SELECT * FROM job_postings jp "
             "LEFT JOIN benefits b ON jp.job_id = b.job_id "
             "WHERE jp.company_id = 10420321 and jp.title LIKE '%er' ")

sql_query_result(query_aux,"aux")

### UPDATE

Esta consulta atualiza até 10 linhas na tabela benefits, alterando o valor da coluna type de 'Medical insurance' para 'test'. O LIMIT 10 garante que apenas um máximo de 10 linhas sejam atualizadas, mesmo que haja mais linhas que satisfaçam a condição.

In [None]:
sql_update = "UPDATE benefits SET type = 'test' WHERE type = 'Medical insurance' LIMIT 10;"
time_i = time.time()
mycursor.execute(sql_update)
resu = mycursor.fetchall()
time_f = time.time()

print("SQL Update time:", (time_f-time_i))

time_i = time.time()
mongo_update = db.benefits.updateMany({ type: 'Medical insurance' },
                                       {'$set': { type: 'test' } })
time_f = time.time()

print("Mongo Update time:", (time_f-time_i))


### INSERTION

In [None]:
sql_insertion = ("INSERT INTO companies(company_id, name, description, company_size, state, country, city, zip_code, address, url) "
                 "VALUES (1, 'Empresas Empresas', 'Fazemos tudo e mais alguma coisa', 15, 'CA', 'USA', 'Los Angeles', '2625-136', 'Rua 29 de Fevereiro', 'https://www.example.com');")

mongo_insertion = db.companies.insertOne({
    'company_id': 1,
    'name': 'Empresas Empresas',
    'description': 'Fazemos tudo e mais alguma coisa',
    'company_size': 15,
    'state': 'CA',
    'country': 'USA',
    'city': 'Los Angeles',
    'zip_code': '2625-136',
    'address': 'Rua 29 de Fevereiro',
    'url': 'https://www.example.com'
})