In [None]:
%%bigquery
CREATE OR REPLACE TABLE `sevis-beta.sevis_staging.status_changes_plot4` AS
WITH eligible AS (
  SELECT
    `Year` AS fiscal_year,
    TRIM(Campus_State) AS Campus_State,
    CAMPUS_LMA,
    Student_Edu_Level_Desc,
    IS_STEM,
    NSF_SUBJ_FIELD_BROAD,
    Country_of_Birth,
    School_Name,
    Requested_Status,
    SEVIS_ID
  FROM `sevis-beta.sevis_raw.sevis_f1_cleaned_master`
  WHERE SAFE_CAST(Program_End_Date AS DATE)
        BETWEEN DATE(`Year` - 1, 10, 1) AND DATE(`Year`, 9, 30)
)
SELECT
  fiscal_year,
  Campus_State,
  CAMPUS_LMA,
  Student_Edu_Level_Desc,
  IS_STEM,
  NSF_SUBJ_FIELD_BROAD,
  Country_of_Birth,
  School_Name,
  COUNT(DISTINCT SEVIS_ID)                                                   AS completed_count,
  COUNT(DISTINCT IF(Requested_Status IS NOT NULL, SEVIS_ID, NULL))           AS completed_req_count,
  SAFE_DIVIDE(
    COUNT(DISTINCT IF(Requested_Status IS NOT NULL, SEVIS_ID, NULL)),
    COUNT(DISTINCT SEVIS_ID)
  )                                                                          AS completed_req_frac
FROM eligible
GROUP BY
  fiscal_year,
  Campus_State,
  CAMPUS_LMA,
  Student_Edu_Level_Desc,
  IS_STEM,
  NSF_SUBJ_FIELD_BROAD,
  Country_of_Birth,
  School_Name
ORDER BY
  fiscal_year,
  Campus_State,
  CAMPUS_LMA,
  Student_Edu_Level_Desc,
  IS_STEM,
  NSF_SUBJ_FIELD_BROAD,
  Country_of_Birth,
  School_Name;

Query is running:   0%|          |

In [None]:
%%bigquery
DECLARE fys       ARRAY<INT64>;
DECLARE campus_states ARRAY<STRING>;
DECLARE campus_lmas ARRAY<STRING>;
DECLARE edu_levels  ARRAY<STRING>;
DECLARE is_stem     BOOL;
DECLARE birth_countries    ARRAY<STRING>;
DECLARE nsf_fields ARRAY<STRING>;
DECLARE schools   ARRAY<STRING>;

/* ───── 2. Assign test values ───── */
SET fys       = [2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022];
SET campus_states = NULL;
SET campus_lmas = NULL;
SET edu_levels  = NULL;
SET is_stem     = NULL;
SET birth_countries    = NULL;
SET nsf_fields = NULL;
SET schools = NULL;

SELECT
      fiscal_year AS year,
      CASE
          WHEN Student_Edu_Level_Desc IS NULL THEN 'All Degrees'
          ELSE Student_Edu_Level_Desc
      END AS degree_level,
      # completed_req_count,
      # completed_count,
      SAFE_DIVIDE(SUM(completed_req_count), SUM(completed_count)) AS frac_req_status,
      1 - SAFE_DIVIDE(SUM(completed_req_count), SUM(completed_count)) AS
  frac_not_req_status
  FROM `sevis-beta.sevis_staging.status_changes_plot4`
  WHERE
        (fys IS NULL OR fiscal_year IN UNNEST(fys))
    AND (campus_states IS NULL OR Campus_State IN UNNEST(campus_states))
    AND (campus_lmas IS NULL OR Campus_State IN UNNEST(campus_lmas))
    AND (edu_levels IS NULL OR Student_Edu_Level_Desc IN UNNEST(edu_levels))
    AND (is_stem IS NULL OR IS_STEM = is_stem)
    AND (nsf_fields IS NULL OR NSF_SUBJ_FIELD_BROAD IN UNNEST(nsf_fields))
    AND (birth_countries IS NULL OR Country_of_Birth IN UNNEST(birth_countries))
    AND (schools IS NULL OR School_Name IN UNNEST(schools))
  GROUP BY GROUPING SETS (
      (fiscal_year, Student_Edu_Level_Desc),  -- Individual degree levels
      (fiscal_year)                           -- Aggregate (all degrees)
  )
  ORDER BY year, degree_level;

Query is running:   0%|          |

Downloading:   0%|          |

Unnamed: 0,year,degree_level,frac_req_status,frac_not_req_status
0,2010,All Degrees,0.240019,0.759981
1,2010,bachelors,0.113445,0.886555
2,2010,doctorate,0.336676,0.663324
3,2010,masters,0.294906,0.705094
4,2011,All Degrees,0.243252,0.756748
5,2011,bachelors,0.11033,0.88967
6,2011,doctorate,0.34599,0.65401
7,2011,masters,0.306782,0.693218
8,2012,All Degrees,0.236107,0.763893
9,2012,bachelors,0.110188,0.889812


In [None]:
# /* ───── 3. Query to generate a table for the first plot ───── */
# in a each given year, the bottom bar should be the grads_opt_total and the top one should be the non_opt_total
# SELECT
#     fiscal_year                             AS year,
#     Student_Edu_Level_Desc,
#     -- raw totals, not to be included, we're going to plot shares --
#     # SUM(completed_count)           AS completed_student_count_total,
#     # SUM(completed_req_count)        AS completed_req_stat_count_total
#     SAFE_DIVIDE(SUM(completed_req_count), SUM(completed_count)) AS frac_req_status,
#     1 - SAFE_DIVIDE(SUM(completed_req_count), SUM(completed_count)) AS frac_not_req_status
# FROM  `sevis-beta.sevis_staging.status_changes_plot4`
# WHERE
#       (fys       IS NULL OR fiscal_year            IN UNNEST(fys))
#   AND (campus_states IS NULL OR Campus_State           IN UNNEST(campus_states))
#   AND (campus_lmas IS NULL OR Campus_State           IN UNNEST(campus_lmas))
#   AND (edu_levels  IS NULL OR Student_Edu_Level_Desc IN UNNEST(edu_levels))
#   AND (is_stem     IS NULL OR IS_STEM                =  is_stem)
#   AND (nsf_fields IS NULL OR NSF_SUBJ_FIELD_BROAD IN UNNEST(nsf_fields))
#   AND (birth_countries    IS NULL OR Country_of_Birth IN UNNEST(birth_countries))
#   AND (schools IS NULL OR School_Name IN UNNEST(schools))
# GROUP BY year, Student_Edu_Level_Desc
# ORDER BY year;