Skip to content

Commit

Permalink
Remove percentiles
Browse files Browse the repository at this point in the history
  • Loading branch information
jdddog committed Jun 12, 2023
1 parent cd35da8 commit fed20a4
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 38 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,23 +13,6 @@
# limitations under the License.
# Author: Richard Hosking #}

# Helper Function: Processing Output Types
{#
Output Schema:
per_25th FLOAT NULLABLE
median FLOAT NULLABLE
per_90th FLOAT NULLABLE
per_95th FLOAT NULLABLE
#}
CREATE TEMP FUNCTION compute_percentiles(counts ARRAY<INT64>) AS (
(SELECT as STRUCT
ROUND(PERCENTILE_CONT(count, 0.25) OVER(), 2) as per_25th,
ROUND(PERCENTILE_CONT(count, 0.50) OVER(), 2) as median,
ROUND(PERCENTILE_CONT(count, 0.90) OVER(), 2) as per_90th,
ROUND(PERCENTILE_CONT(count, 0.95) OVER(), 2) as per_95th
FROM UNNEST(counts) as count LIMIT 1)
);

# Helper Function: Counting Access Types
{#
Output Schema:
Expand Down Expand Up @@ -215,13 +198,10 @@ outputs_without_citations INTEGER NULLABLE
citations RECORD NULLABLE
citations.openalex RECORD NULLABLE
citations.openalex.total_citations INTEGER NULLABLE
citations.openalex.percentiles RECORD NULLABLE
citations.open_citations RECORD NULLABLE
citations.open_citations.total_citations INTEGER NULLABLE
citations.open_citations.percentiles RECORD NULLABLE
citations.crossref RECORD NULLABLE
citations.crossref.total_citations INTEGER NULLABLE
citations.crossref.percentiles RECORD NULLABLE
*percetiles schema captured above
#}
Expand All @@ -243,16 +223,13 @@ CREATE TEMP FUNCTION compute_conditional_citations(
COUNTIF( (citations.crossref IS NULL OR citations.crossref = 0) AND (citations.open_citations IS NULL OR citations.open_citations = 0) AND (citations.openalex IS NULL OR citations.openalex = 0)) as outputs_without_citations,
STRUCT(
STRUCT(
SUM(citations.openalex) as total_citations,
compute_percentiles(ARRAY_AGG(citations.openalex)) as percentiles
SUM(citations.openalex) as total_citations
) as openalex,
STRUCT(
SUM(citations.open_citations) as total_citations,
compute_percentiles(ARRAY_AGG(citations.open_citations)) as percentiles
SUM(citations.open_citations) as total_citations
) as open_citations,
STRUCT(
SUM(citations.crossref) as total_citations,
compute_percentiles(ARRAY_AGG(citations.crossref)) as percentiles
SUM(citations.crossref) as total_citations
) as crossref
) as citations,

Expand Down Expand Up @@ -504,15 +481,11 @@ sum_of_scores FLOAT NULLABLE
citations RECORD NULLABLE
openalex RECORD NULLABLE
total_citations INTEGER NULLABLE
percentiles RECORD NULLABLE
open_citations RECORD NULLABLE
total_citations INTEGER NULLABLE
percentiles RECORD NULLABLE
crossref RECORD NULLABLE
total_citations INTEGER NULLABLE
percentiles RECORD NULLABLE
total_citations INTEGER NULLABLE
percentiles RECORD NULLABLE
num_oa_outputs INTEGER NULLABLE
num_green_outputs INTEGER NULLABLE
num_gold_outputs INTEGER NULLABLE
Expand Down Expand Up @@ -543,16 +516,13 @@ CREATE TEMP FUNCTION compute_disciplines(
SUM(Score) as sum_of_scores,
STRUCT(
STRUCT(
SUM(citations.openalex) as total_citations,
compute_percentiles(ARRAY_AGG(citations.openalex)) as percentiles
SUM(citations.openalex) as total_citations
) as openalex,
STRUCT(
SUM(citations.open_citations) as total_citations,
compute_percentiles(ARRAY_AGG(citations.open_citations)) as percentiles
SUM(citations.open_citations) as total_citations
) as open_citations,
STRUCT(
SUM(citations.crossref) as total_citations,
compute_percentiles(ARRAY_AGG(citations.crossref)) as percentiles
SUM(citations.crossref) as total_citations
) as crossref
) as citations,
COUNTIF(is_oa) as num_oa_outputs,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,5 @@ SELECT
access_type.outputs_with_citations as access_types_outputs_with_citations,
access_type.outputs_without_citations as access_types_outputs_without_citations,
access_type.citations.openalex.total_citations as access_types_total_citations,
access_type.citations.openalex.percentiles.median as access_types_median_citations_per_output
FROM `{{ table_id }}`, UNNEST( access_types.breakdown ) as access_type
ORDER BY id, published_year ASC
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ SELECT
ROUND(SAFE_DIVIDE( ( discipline.num_green_outputs ) * 100 , discipline.total_outputs ), 2) as disciplines_percent_green,
ROUND(SAFE_DIVIDE( ( discipline.num_gold_outputs ) * 100 , discipline.total_outputs ), 2) as disciplines_percent_gold,
discipline.citations.openalex.total_citations as disciplines_total_citations,
discipline.citations.openalex.percentiles.median as disciplines_median_citations_per_output,
discipline.funding.total_funded_outputs as disciplines_total_funded_outputs,
discipline.funding.num_international_outputs as disciplines_num_international_funded_outputs,
discipline.funding.num_domestic_outputs as disciplines_num_domestic_funded_outputs,
Expand Down

0 comments on commit fed20a4

Please sign in to comment.