From fed20a40756c2bfac5e9ec1796fff16bb1e25038 Mon Sep 17 00:00:00 2001 From: Jamie Diprose <5715104+jdddog@users.noreply.github.com> Date: Mon, 12 Jun 2023 17:23:28 +1200 Subject: [PATCH] Remove percentiles --- .../database/sql/create_aggregate.sql.jinja2 | 42 +++---------------- .../sql/export_access_types.sql.jinja2 | 1 - .../sql/export_disciplines.sql.jinja2 | 1 - 3 files changed, 6 insertions(+), 38 deletions(-) diff --git a/academic_observatory_workflows/database/sql/create_aggregate.sql.jinja2 b/academic_observatory_workflows/database/sql/create_aggregate.sql.jinja2 index f6e6d871..32ab6958 100644 --- a/academic_observatory_workflows/database/sql/create_aggregate.sql.jinja2 +++ b/academic_observatory_workflows/database/sql/create_aggregate.sql.jinja2 @@ -13,23 +13,6 @@ # limitations under the License. # Author: Richard Hosking #} -# Helper Function: Processing Output Types -{# -Output Schema: -per_25th FLOAT NULLABLE -median FLOAT NULLABLE -per_90th FLOAT NULLABLE -per_95th FLOAT NULLABLE -#} -CREATE TEMP FUNCTION compute_percentiles(counts ARRAY) AS ( - (SELECT as STRUCT - ROUND(PERCENTILE_CONT(count, 0.25) OVER(), 2) as per_25th, - ROUND(PERCENTILE_CONT(count, 0.50) OVER(), 2) as median, - ROUND(PERCENTILE_CONT(count, 0.90) OVER(), 2) as per_90th, - ROUND(PERCENTILE_CONT(count, 0.95) OVER(), 2) as per_95th - FROM UNNEST(counts) as count LIMIT 1) -); - # Helper Function: Counting Access Types {# Output Schema: @@ -215,13 +198,10 @@ outputs_without_citations INTEGER NULLABLE citations RECORD NULLABLE citations.openalex RECORD NULLABLE citations.openalex.total_citations INTEGER NULLABLE -citations.openalex.percentiles RECORD NULLABLE citations.open_citations RECORD NULLABLE citations.open_citations.total_citations INTEGER NULLABLE -citations.open_citations.percentiles RECORD NULLABLE citations.crossref RECORD NULLABLE citations.crossref.total_citations INTEGER NULLABLE -citations.crossref.percentiles RECORD NULLABLE *percetiles schema captured above #} @@ -243,16 +223,13 @@ CREATE TEMP FUNCTION compute_conditional_citations( COUNTIF( (citations.crossref IS NULL OR citations.crossref = 0) AND (citations.open_citations IS NULL OR citations.open_citations = 0) AND (citations.openalex IS NULL OR citations.openalex = 0)) as outputs_without_citations, STRUCT( STRUCT( - SUM(citations.openalex) as total_citations, - compute_percentiles(ARRAY_AGG(citations.openalex)) as percentiles + SUM(citations.openalex) as total_citations ) as openalex, STRUCT( - SUM(citations.open_citations) as total_citations, - compute_percentiles(ARRAY_AGG(citations.open_citations)) as percentiles + SUM(citations.open_citations) as total_citations ) as open_citations, STRUCT( - SUM(citations.crossref) as total_citations, - compute_percentiles(ARRAY_AGG(citations.crossref)) as percentiles + SUM(citations.crossref) as total_citations ) as crossref ) as citations, @@ -504,15 +481,11 @@ sum_of_scores FLOAT NULLABLE citations RECORD NULLABLE openalex RECORD NULLABLE total_citations INTEGER NULLABLE -percentiles RECORD NULLABLE open_citations RECORD NULLABLE total_citations INTEGER NULLABLE -percentiles RECORD NULLABLE crossref RECORD NULLABLE total_citations INTEGER NULLABLE -percentiles RECORD NULLABLE total_citations INTEGER NULLABLE -percentiles RECORD NULLABLE num_oa_outputs INTEGER NULLABLE num_green_outputs INTEGER NULLABLE num_gold_outputs INTEGER NULLABLE @@ -543,16 +516,13 @@ CREATE TEMP FUNCTION compute_disciplines( SUM(Score) as sum_of_scores, STRUCT( STRUCT( - SUM(citations.openalex) as total_citations, - compute_percentiles(ARRAY_AGG(citations.openalex)) as percentiles + SUM(citations.openalex) as total_citations ) as openalex, STRUCT( - SUM(citations.open_citations) as total_citations, - compute_percentiles(ARRAY_AGG(citations.open_citations)) as percentiles + SUM(citations.open_citations) as total_citations ) as open_citations, STRUCT( - SUM(citations.crossref) as total_citations, - compute_percentiles(ARRAY_AGG(citations.crossref)) as percentiles + SUM(citations.crossref) as total_citations ) as crossref ) as citations, COUNTIF(is_oa) as num_oa_outputs, diff --git a/academic_observatory_workflows/database/sql/export_access_types.sql.jinja2 b/academic_observatory_workflows/database/sql/export_access_types.sql.jinja2 index 8583b711..5d1b6c0a 100644 --- a/academic_observatory_workflows/database/sql/export_access_types.sql.jinja2 +++ b/academic_observatory_workflows/database/sql/export_access_types.sql.jinja2 @@ -23,6 +23,5 @@ SELECT access_type.outputs_with_citations as access_types_outputs_with_citations, access_type.outputs_without_citations as access_types_outputs_without_citations, access_type.citations.openalex.total_citations as access_types_total_citations, - access_type.citations.openalex.percentiles.median as access_types_median_citations_per_output FROM `{{ table_id }}`, UNNEST( access_types.breakdown ) as access_type ORDER BY id, published_year ASC \ No newline at end of file diff --git a/academic_observatory_workflows/database/sql/export_disciplines.sql.jinja2 b/academic_observatory_workflows/database/sql/export_disciplines.sql.jinja2 index fd2ac6fb..9741fa76 100644 --- a/academic_observatory_workflows/database/sql/export_disciplines.sql.jinja2 +++ b/academic_observatory_workflows/database/sql/export_disciplines.sql.jinja2 @@ -30,7 +30,6 @@ SELECT ROUND(SAFE_DIVIDE( ( discipline.num_green_outputs ) * 100 , discipline.total_outputs ), 2) as disciplines_percent_green, ROUND(SAFE_DIVIDE( ( discipline.num_gold_outputs ) * 100 , discipline.total_outputs ), 2) as disciplines_percent_gold, discipline.citations.openalex.total_citations as disciplines_total_citations, - discipline.citations.openalex.percentiles.median as disciplines_median_citations_per_output, discipline.funding.total_funded_outputs as disciplines_total_funded_outputs, discipline.funding.num_international_outputs as disciplines_num_international_funded_outputs, discipline.funding.num_domestic_outputs as disciplines_num_domestic_funded_outputs,