In [None]:
%%bigquery
CREATE OR REPLACE TABLE
  `sevis-beta.sevis_staging.grad_cohort_opt_plot1` AS
  WITH base AS (
    SELECT
      `Year` AS fiscal_year,
      DATE(`Year` - 1, 10, 1) AS fy_start,
      DATE(`Year`, 9, 30) AS fy_end,

      -- Keep original column names to match filter system expectations
      Campus_State,
      CAMPUS_LMA,
      Student_Edu_Level_Desc,
      IS_STEM, I think the way you suggested going ahead and implementing this makes a ton of sense. Can you go ahead and authenticate in BigQuery so that I can do that?
      NSF_SUBJ_FIELD_BROAD,
      Country_of_Birth,
      School_Name,

      -- Employment columns
      LOWER(TRIM(Employment_Description)) AS emp_desc,
      LOWER(TRIM(Employment_OPT_Type)) AS opt_type,

      -- Date columns
      SAFE_CAST(Program_End_Date AS DATE) AS ped,
      SAFE_CAST(Authorization_Start_Date AS DATE) AS auth_start_date,
      SAFE_CAST(Program_Start_Date AS DATE) AS psd,

      SEVIS_ID
    FROM `sevis-beta.sevis_raw.sevis_f1_cleaned_master`
  ),

  person_year_summary AS (
    SELECT
      fiscal_year,
      SEVIS_ID,

      -- Use ANY_VALUE to get demographic info (assuming consistent within person-year)
      ANY_VALUE(Campus_State) AS Campus_State,
      ANY_VALUE(CAMPUS_LMA) AS CAMPUS_LMA,
      ANY_VALUE(Student_Edu_Level_Desc) AS Student_Edu_Level_Desc,
      ANY_VALUE(IS_STEM) AS IS_STEM,
      ANY_VALUE(NSF_SUBJ_FIELD_BROAD) AS NSF_SUBJ_FIELD_BROAD,
      ANY_VALUE(Country_of_Birth) AS Country_of_Birth,
      ANY_VALUE(School_Name) AS School_Name,

      -- LMA_NAME for employer metro (will be NULL for graduates without OPT work)
      ANY_VALUE(CASE WHEN emp_desc = 'opt' THEN CAMPUS_LMA END) AS LMA_NAME,

      -- Graduation flag: did they graduate this fiscal year?
      CASE WHEN COUNT(CASE WHEN ped IS NOT NULL AND ped BETWEEN
            fy_start AND fy_end THEN 1 END) > 0
            THEN 1 ELSE 0 END AS is_graduate,

      -- OPT flag: did they use OPT within 180 days of graduation this fiscal year?
      CASE WHEN COUNT(CASE WHEN ped IS NOT NULL
                            AND ped BETWEEN fy_start AND fy_end
                            AND emp_desc = 'opt'
                            AND opt_type IN ('post-completion','stem')
                            AND SAFE_CAST(auth_start_date AS DATE) > SAFE_CAST(psd AS DATE)
                            AND ABS(DATE_DIFF(auth_start_date, ped, DAY)) <= 180
                       THEN 1 END) > 0
           THEN 1 ELSE 0 END AS used_opt
    FROM base
    GROUP BY fiscal_year, SEVIS_ID, fy_start, fy_end
  )

  SELECT
    fiscal_year,
    Campus_State,
    CAMPUS_LMA,
    Student_Edu_Level_Desc,
    IS_STEM,
    NSF_SUBJ_FIELD_BROAD,
    Country_of_Birth,
    School_Name,
    LMA_NAME,
    SEVIS_ID,
    is_graduate,
    used_opt
  FROM person_year_summary
  WHERE is_graduate = 1  -- Only keep actual graduates
  ORDER BY fiscal_year, SEVIS_ID;

Query is running:   0%|          |

In [None]:
%%bigquery
DECLARE fys ARRAY<INT64>;
DECLARE birth_countries    ARRAY<STRING>;
DECLARE campus_states ARRAY<STRING>;
DECLARE campus_lmas ARRAY<STRING>;
DECLARE edu_levels  ARRAY<STRING>;
DECLARE is_stem     BOOL;
DECLARE nsf_fields   ARRAY<STRING>;
DECLARE schools   ARRAY<STRING>;

SET fys = [2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022];
SET campus_states = NULL;#['california'];
SET campus_lmas = NULL;
--#['alabama','alaska','arizona','arkansas','california','colorado','connecticut','delaware','district of columbia','florida','georgia','guam','hawaii','idaho','illinois','indiana','iowa','kansas','kentucky','louisiana','maine','maryland','massachusetts','michigan','minnesota','mississippi','missouri','montana','nebraska','nevada','new hampshire','new jersey','new mexico','new york','north carolina','north dakota','northern mariana islands','ohio','oklahoma','oregon','pennsylvania','puerto rico','rhode island','south carolina','south dakota','tennessee','texas','utah','vermont','virgin islands of the us','virginia','washington','west virginia','wisconsin','wyoming'];                      -- keep NULL when unused
SET edu_levels  = NULL;
SET is_stem     = NULL;                      -- NULL → both STEM & non‑STEM
SET nsf_fields = NULL;
SET birth_countries = NULL;
SET schools = ['loras college'];--NULL;

SELECT
  fiscal_year AS year,
  COUNT(DISTINCT SEVIS_ID) AS total_graduates,
  COUNT(DISTINCT CASE WHEN used_opt = 1 THEN SEVIS_ID END) AS grads_to_opt_total,
  COUNT(DISTINCT CASE WHEN used_opt = 0 THEN SEVIS_ID END) AS non_opt_total
FROM  `sevis-beta.sevis_staging.grad_cohort_opt_plot1`
WHERE
      (fys IS NULL OR fiscal_year     IN UNNEST(fys))
  AND (campus_states IS NULL OR Campus_State           IN UNNEST(campus_states))
  AND (campus_lmas IS NULL OR CAMPUS_LMA           IN UNNEST(campus_lmas))
  AND (edu_levels  IS NULL OR Student_Edu_Level_Desc IN UNNEST(edu_levels))
  AND (is_stem     IS NULL OR IS_STEM                =  is_stem)
  AND (birth_countries    IS NULL OR Country_of_Birth IN UNNEST(birth_countries))
  AND (nsf_fields IS NULL OR NSF_SUBJ_FIELD_BROAD IN UNNEST(nsf_fields))
  AND (schools IS NULL OR School_Name IN UNNEST(schools))
GROUP BY year
ORDER BY year;

Query is running:   0%|          |

Downloading:   0%|          |

Unnamed: 0,year,total_graduates,grads_to_opt_total,non_opt_total
0,2010,9,6,3
1,2011,9,6,3
2,2012,19,14,5
3,2013,15,8,7
4,2014,14,11,3
5,2015,13,11,2
6,2016,6,5,1
7,2017,3,3,0
8,2018,13,3,10
9,2019,14,1,13
