In [12]:
#========================= Loading Libraries =========================#

from google.cloud import bigquery
from statistics import mean
import time
import os
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'credentials.json'

# BigQuery Client
client = bigquery.Client()

In [2]:
#========================= Queries =========================#

query_list = [
# Query 1 - Crime Counts by Major Category and Year
"""
SELECT major_category, year, SUM(value) AS total_crimes
FROM `bigquery-public-data.london_crime.crime_by_lsoa`
GROUP BY major_category, year
ORDER BY major_category, year;
""",
# Query 2 - Crime Counts by Borough in the Year 2016
"""
SELECT borough, SUM(value) AS total_crimes
FROM `bigquery-public-data.london_crime.crime_by_lsoa`
WHERE year = 2016
GROUP BY borough
ORDER BY total_crimes DESC;
""",
# Query 3 - Monthly Crime Trend for Westminster (borough) in 2013
"""
SELECT borough, year, month, SUM(value) AS total_crimes
FROM `bigquery-public-data.london_crime.crime_by_lsoa`
WHERE borough = 'Westminster' AND year = 2013
GROUP BY borough, year, month
ORDER BY year, month;
""",
# Query 4 -  Crime Counts per Minor Category in Camden (borough)
"""
SELECT borough, minor_category, SUM(value) AS total_crimes
FROM `bigquery-public-data.london_crime.crime_by_lsoa`
WHERE borough = 'Camden'
GROUP BY borough, minor_category
ORDER BY total_crimes DESC;
""",
# Query 5 - Top 10 Boroughs with the Highest Crime Counts for Major Category 'Violence Against the Person'
"""
SELECT major_category, borough, SUM(value) AS total_crimes
FROM `bigquery-public-data.london_crime.crime_by_lsoa`
WHERE major_category = 'Violence Against the Person'
GROUP BY major_category, borough
ORDER BY total_crimes DESC
LIMIT 10;
""",
# Query 6 - Percentage Distribution of Crimes Across Major Categories
"""
SELECT major_category, SUM(value) AS total_crimes,
  (SUM(value) * 100.0) / SUM(SUM(value)) OVER() AS percentage
FROM `bigquery-public-data.london_crime.crime_by_lsoa`
GROUP BY major_category
ORDER BY total_crimes DESC;
""" 
]

In [3]:
#========================= Experiment =========================#

# List to store the average querying times for all queries
avg_query_times = []
q = 0

# Iterating on the list of queries
for query in query_list:
    q = q+1
    # List to store querying times for multiple runs
    query_times = []
    
    # Executing the query 10 times and tracking querying time
    for i in range(10):
        start_time = time.time()
        df_result = client.query(query).to_dataframe()
        query_times.append(time.time() - start_time)
    
    avg_query_times.append(mean(query_times))
    
    print("#========================= Query", q, "=========================#\n")
    
    print("Average Query Time:", round(mean(query_times),2), "seconds")
    print("Sample Result:\n", df_result.head(),"\n")


Average Query Time: 1.15 seconds
Sample Result:
   major_category  year  total_crimes
0       Burglary  2008         88092
1       Burglary  2009         90619
2       Burglary  2010         86826
3       Burglary  2011         93315
4       Burglary  2012         93392 


Average Query Time: 1.21 seconds
Sample Result:
          borough  total_crimes
0    Westminster         48330
1        Lambeth         34071
2      Southwark         31636
3         Newham         30090
4  Tower Hamlets         29253 


Average Query Time: 1.19 seconds
Sample Result:
        borough  year  month  total_crimes
0  Westminster  2013      1          4749
1  Westminster  2013      2          4431
2  Westminster  2013      3          4577
3  Westminster  2013      4          4458
4  Westminster  2013      5          4309 


Average Query Time: 1.19 seconds
Sample Result:
   borough            minor_category  total_crimes
0  Camden               Other Theft         64265
1  Camden        Other Theft Perso

In [16]:
#========================= Queries =========================#

query_list2 = [
# Query 1 - Crime Counts by Major Category and Year
"""
SELECT major_category, year, SUM(value) AS total_crimes
FROM `bigdata-407822.mydb.london_crime_small`
GROUP BY major_category, year
ORDER BY major_category, year;
""",
# Query 2 - Crime Counts by Borough in the Year 2016
"""
SELECT borough, SUM(value) AS total_crimes
FROM `bigdata-407822.mydb.london_crime_small`
WHERE year = 2016
GROUP BY borough
ORDER BY total_crimes DESC;
""",
# Query 3 - Monthly Crime Trend for Westminster (borough) in 2013
"""
SELECT borough, year, month, SUM(value) AS total_crimes
FROM `bigdata-407822.mydb.london_crime_small`
WHERE borough = 'Westminster' AND year = 2013
GROUP BY borough, year, month
ORDER BY year, month;
""",
# Query 4 -  Crime Counts per Minor Category in Camden (borough)
"""
SELECT borough, minor_category, SUM(value) AS total_crimes
FROM `bigdata-407822.mydb.london_crime_small`
WHERE borough = 'Camden'
GROUP BY borough, minor_category
ORDER BY total_crimes DESC;
""",
# Query 5 - Top 10 Boroughs with the Highest Crime Counts for Major Category 'Violence Against the Person'
"""
SELECT major_category, borough, SUM(value) AS total_crimes
FROM `bigdata-407822.mydb.london_crime_small`
WHERE major_category = 'Violence Against the Person'
GROUP BY major_category, borough
ORDER BY total_crimes DESC
LIMIT 10;
""",
# Query 6 - Percentage Distribution of Crimes Across Major Categories
"""
SELECT major_category, SUM(value) AS total_crimes,
  (SUM(value) * 100.0) / SUM(SUM(value)) OVER() AS percentage
FROM `bigdata-407822.mydb.london_crime_small`
GROUP BY major_category
ORDER BY total_crimes DESC;
""" 
]

In [17]:
#========================= Experiment =========================#

# List to store the average querying times for all queries
avg_query_times = []
q = 0

# Iterating on the list of queries
for query in query_list2:
    q = q+1
    # List to store querying times for multiple runs
    query_times = []
    
    # Executing the query 10 times and tracking querying time
    for i in range(10):
        start_time = time.time()
        df_result = client.query(query).to_dataframe()
        query_times.append(time.time() - start_time)
    
    avg_query_times.append(mean(query_times))
    
    print("#========================= Query", q, "=========================#\n")
    
    print("Average Query Time:", round(mean(query_times),2), "seconds")
    print("Sample Result:\n", df_result.head(),"\n")


Average Query Time: 1.7 seconds
Sample Result:
   major_category  year  total_crimes
0          Drugs  2008            26
1          Drugs  2009            26
2          Drugs  2010            30
3          Drugs  2011            23
4          Drugs  2012            22 


Average Query Time: 1.4 seconds
Sample Result:
          borough  total_crimes
0    Westminster          1178
1        Lambeth           581
2  Tower Hamlets           567
3         Ealing           531
4        Hackney           519 


Average Query Time: 1.48 seconds
Sample Result:
        borough  year  month  total_crimes
0  Westminster  2013      1            34
1  Westminster  2013      2            97
2  Westminster  2013      3            18
3  Westminster  2013      4           153
4  Westminster  2013      5           168 


Average Query Time: 1.45 seconds
Sample Result:
   borough minor_category  total_crimes
0  Camden    Other Theft          4028
1  Camden     Harassment          1231
2  Camden    Other 