Skip to content

Commit

Permalink
Merge pull request ofek#107 from tswast/issue64-query-optimize
Browse files Browse the repository at this point in the history
  • Loading branch information
hugovk committed Jan 19, 2021
2 parents 64439d8 + ed33cad commit 9d22a7f
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 28 deletions.
27 changes: 13 additions & 14 deletions pypinfo/core.py
Expand Up @@ -9,15 +9,8 @@

from pypinfo.fields import AGGREGATES, Downloads

FROM = """\
FROM
TABLE_DATE_RANGE(
[the-psf:pypi.downloads],
{},
{}
)
"""
DATE_ADD = 'DATE_ADD(CURRENT_TIMESTAMP(), {}, "day")'
FROM = 'FROM `the-psf.pypi.file_downloads`'
DATE_ADD = 'DATE_ADD(CURRENT_TIMESTAMP(), INTERVAL {} DAY)'
START_TIMESTAMP = 'TIMESTAMP("{} 00:00:00")'
END_TIMESTAMP = 'TIMESTAMP("{} 23:59:59")'
START_DATE = '-31'
Expand All @@ -27,7 +20,7 @@

def create_config():
config = QueryJobConfig()
config.use_legacy_sql = True
config.use_legacy_sql = False
return config


Expand Down Expand Up @@ -136,26 +129,32 @@ def build_query(
for field in fields:
query += f' {field.data} as {field.name},\n'

query += FROM.format(start_date, end_date)
query += FROM

query += f'\nWHERE timestamp BETWEEN {start_date} AND {end_date}\n'
if where:
query += f'WHERE\n {where}\n'
query += f' AND {where}\n'
else:
conditions = []
if project:
conditions.append(f'file.project = "{project}"\n')
if pip:
conditions.append('details.installer.name = "pip"\n')
if conditions:
query += 'WHERE\n ' + ' AND '.join(conditions)
query += ' AND '
query += ' AND '.join(conditions)

if len(fields) > 1:
gb = 'GROUP BY\n'
initial_length = len(gb)

non_aggregate_fields = []
for field in fields[:-1]:
if field not in AGGREGATES:
gb += f' {field.name},\n'
non_aggregate_fields.append(field.name)
gb += ' '
gb += ', '.join(non_aggregate_fields)
gb += '\n'

if len(gb) > initial_length:
query += gb
Expand Down
6 changes: 3 additions & 3 deletions pypinfo/fields.py
Expand Up @@ -2,9 +2,9 @@

Field = namedtuple('Field', ('name', 'data'))
Downloads = Field('download_count', 'COUNT(*)')
Date = Field('download_date', 'STRFTIME_UTC_USEC(timestamp, "%Y-%m-%d")')
Month = Field('download_month', 'STRFTIME_UTC_USEC(timestamp, "%Y-%m")')
Year = Field('download_year', 'STRFTIME_UTC_USEC(timestamp, "%Y")')
Date = Field('download_date', 'FORMAT_TIMESTAMP("%Y-%m-%d", timestamp)')
Month = Field('download_month', 'FORMAT_TIMESTAMP("%Y-%m", timestamp)')
Year = Field('download_year', 'FORMAT_TIMESTAMP("%Y", timestamp)')
Country = Field('country', 'country_code')
Project = Field('project', 'file.project')
Version = Field('version', 'file.version')
Expand Down
17 changes: 6 additions & 11 deletions tests/test_core.py
Expand Up @@ -24,7 +24,7 @@ def test_create_config():
config = core.create_config()

# Assert
assert config.use_legacy_sql
assert not config.use_legacy_sql


@pytest.mark.parametrize(
Expand Down Expand Up @@ -88,7 +88,7 @@ def test_format_date_negative_number():
date = core.format_date("-1", dummy_format)

# Assert
assert date == 'DATE_ADD(CURRENT_TIMESTAMP(), -1, "day")'
assert date == 'DATE_ADD(CURRENT_TIMESTAMP(), INTERVAL -1 DAY)'


def test_format_date_yyy_mm_dd():
Expand Down Expand Up @@ -137,17 +137,12 @@ def test_build_query():
SELECT
REGEXP_EXTRACT(details.python, r"^([^\.]+\.[^\.]+)") as python_version,
COUNT(*) as download_count,
FROM
TABLE_DATE_RANGE(
[the-psf:pypi.downloads],
TIMESTAMP("2017-10-01 00:00:00"),
TIMESTAMP("2017-10-31 23:59:59")
)
WHERE
file.project = "pycodestyle"
FROM `the-psf.pypi.file_downloads`
WHERE timestamp BETWEEN TIMESTAMP("2017-10-01 00:00:00") AND TIMESTAMP("2017-10-31 23:59:59")
AND file.project = "pycodestyle"
AND details.installer.name = "pip"
GROUP BY
python_version,
python_version
ORDER BY
download_count DESC
LIMIT 100
Expand Down

0 comments on commit 9d22a7f

Please sign in to comment.