In [None]:
# Import python packages
import streamlit as st
import pandas as pd

# We can also use Snowpark for our analyses!
from snowflake.snowpark.context import get_active_session
session = get_active_session()


In [None]:
-- Welcome to Snowflake Notebooks!
-- Try out a SQL cell to generate some data.
SELECT 'FRIDAY' as SNOWDAY, 0.2 as CHANCE_OF_SNOW
UNION ALL
SELECT 'SATURDAY',0.5
UNION ALL 
SELECT 'SUNDAY', 0.9;

In [None]:
# Then, we can use the python name to turn cell2 into a Pandas dataframe
my_df = cell2.to_pandas()

# Chart the data
st.subheader("Chance of SNOW ❄️")
st.line_chart(my_df, x='SNOWDAY', y='CHANCE_OF_SNOW')

# Give it a go!
st.subheader("Try it out yourself and show off your skills 🥇")

# Resume Screening

## Create database, schema, stage and warehouses for the project

In [None]:

USE ROLE accountadmin;

CREATE OR REPLACE DATABASE RESUME_SCREENING_DB;

CREATE OR REPLACE SCHEMA RESUME_AI_SCHEMA;

CREATE OR REPLACE WAREHOUSE RESUME_AI_WH;

CREATE OR REPLACE WAREHOUSE RESUME_AI_MEDIUM 
WAREHOUSE_SIZE = 'MEDIUM'  
AUTO_SUSPEND = 60;

CREATE OR REPLACE WAREHOUSE RESUME_AI_LARGE
WAREHOUSE_SIZE = 'LARGE'  
AUTO_SUSPEND = 60;

USE DATABASE resume_screening_db;

USE SCHEMA resume_ai_schema;

CREATE STAGE RESUME_DOCS
    DIRECTORY = ( ENABLE = TRUE ) 
    ENCRYPTION = ( TYPE = 'SNOWFLAKE_SSE' );

CREATE ROLE doc_ai_role;

-- Step 2: Grant required privileges
GRANT USAGE ON WAREHOUSE resume_ai_wh TO ROLE doc_ai_role;
GRANT USAGE ON WAREHOUSE resume_ai_medium TO ROLE doc_ai_role;
GRANT USAGE ON WAREHOUSE resume_ai_large TO ROLE doc_ai_role;

GRANT USAGE ON DATABASE resume_screening_db TO ROLE doc_ai_role;
GRANT USAGE ON SCHEMA resume_ai_schema TO ROLE doc_ai_role;
-- GRANT CREATE SNOWFLAKE.ML.DOCUMENT_INTELLIGENCE ON SCHEMA doc_ai_db.doc_ai_schema TO ROLE doc_ai_role;
-- GRANT CREATE MODEL ON SCHEMA doc_ai_db.doc_ai_schema TO ROLE doc_ai_role;

-- Step 3: Grant the role to a specific user
GRANT ROLE doc_ai_role TO USER MENGTHONG01;

SELECT CURRENT_ROLE();


## Resume Prediciton    

### Create a table to store the json file from the prediction

In [None]:

CREATE OR REPLACE TABLE resume_prediction_json AS 
SELECT RESUME_SCREENING_DB.RESUME_AI_SCHEMA.RESUME_SCREENING!PREDICT(
  GET_PRESIGNED_URL(@RESUME_SCREENING_DB.RESUME_AI_SCHEMA.RESUME_DOCS, RELATIVE_PATH), 2) AS resume_prediction 
FROM DIRECTORY(@RESUME_SCREENING_DB.RESUME_AI_SCHEMA.RESUME_DOCS);

In [None]:
Select *
From RESUME_PREDICTION_json; 


### Create a semi-structured table of the prediction result 

In [None]:
-- Create a table with all values and scores
CREATE OR REPLACE TABLE resume_prediction AS
WITH temp AS(
-- First part gets the result from applying the model on the pdf documents as a JSON with additional metadata
    SELECT 
        Relative_path as file_name, 
        size as file_size, 
        last_modified, 
        file_url as snowflake_file_url,
        --VERIFY THAT BELOW IS USING THE SAME NAME AND NUMER AS THE MODEL INSTRUCTIONS YOU COPIED IN THE PREVIOUS STEP! 
        RESUME_SCREENING_DB.RESUME_AI_SCHEMA.RESUME_SCREENING!PREDICT(GET_PRESIGNED_URL(@RESUME_SCREENING_DB.RESUME_AI_SCHEMA.RESUME_DOCS, RELATIVE_PATH), 2) AS        
        resume_prediction
    from directory(@RESUME_DOCS)
)

SELECT 
    file_name, 
    file_size, 
    last_modified, 
    snowflake_file_url, 
    RESUME_PREDICTION:__documentMetadata.ocrScore::FLOAT AS ocrScore,
    RESUME_PREDICTION:"full_name"[0]:value::STRING AS full_name,
    RESUME_PREDICTION:"phone_number"[0]:value::STRING AS phone_number,
    RESUME_PREDICTION:"email"[0]:value::STRING AS email,
    RESUME_PREDICTION:"location"[0]:value::STRING AS location,
    RESUME_PREDICTION:"job_title"[0]:value::STRING AS job_title,
    RESUME_PREDICTION:"work_experience_roles" AS work_experience_roles,
    RESUME_PREDICTION:"recent_work_experience" AS recent_work_experience,
    RESUME_PREDICTION:"technical_skills" AS technical_skills,
    RESUME_PREDICTION:"year_of_experience"[0]:value::STRING AS year_of_experiences,
    RESUME_PREDICTION:"field_of_study"[0]:value::STRING AS field_of_study,
    RESUME_PREDICTION:"univeristy_name"[0]:value::STRING AS univeristy_name,
    RESUME_PREDICTION:"certification" AS certifications,
    RESUME_PREDICTION:"languages_spoken" AS languages_spoken,
FROM temp;

In [None]:
//view semi-structured resume prediction table
SELECT *
FROM resume_prediction;

### Transform the variant json format into structure format

In [None]:
CREATE OR REPLACE TABLE resume_prediction_clean AS
WITH role_flattened AS (
  SELECT 
    full_name,
    file_name,
    file_size,
    last_modified,
    snowflake_file_url,
    ocrScore,
    phone_number,
    email,
    location,
    job_title,
    year_of_experiences,  -- Added here
    field_of_study,       -- Added here
    univeristy_name,      -- Added here
    ARRAY_AGG(role.value:value::STRING) AS work_experience_roles
  FROM resume_prediction,
    LATERAL FLATTEN(input => work_experience_roles) role
  GROUP BY full_name, file_name, file_size, last_modified, snowflake_file_url, ocrScore, phone_number, email, location, job_title, year_of_experiences, field_of_study, univeristy_name
),
experience_flattened AS (
  SELECT 
    full_name,
    ARRAY_AGG(experiences.value:value::STRING) AS recent_work_experience
  FROM resume_prediction,
    LATERAL FLATTEN(input => recent_work_experience) experiences
  GROUP BY full_name
),
technical_skills_flattened AS (
  SELECT 
    full_name,
    ARRAY_AGG(technical_skills.value:value::STRING) AS technical_skills
  FROM resume_prediction,
    LATERAL FLATTEN(input => technical_skills) technical_skills
  GROUP BY full_name
),
certifications_flattened AS (
  SELECT 
    full_name,
    ARRAY_AGG(certifications.value:value::STRING) AS certifications
  FROM resume_prediction,
    LATERAL FLATTEN(input => certifications) certifications
  GROUP BY full_name
),
languages_spoken_flattened AS (
  SELECT 
    full_name,
    ARRAY_AGG(languages_spoken.value:value::STRING) AS languages_spoken
  FROM resume_prediction,
    LATERAL FLATTEN(input => languages_spoken) languages_spoken
  GROUP BY full_name
)

SELECT 
  r.file_name,
  r.file_size,
  r.last_modified,
  r.snowflake_file_url,
  r.ocrScore,
  r.full_name,
  r.phone_number,
  r.email,
  r.location,
  r.job_title,
  ARRAY_TO_STRING(r.work_experience_roles, ' | ') AS work_experience_roles,
  ARRAY_TO_STRING(e.recent_work_experience, ' | ') AS recent_work_experience,
  r.year_of_experiences,  -- Added here
  r.field_of_study,       -- Added here
  r.univeristy_name,       -- Added here
  ARRAY_TO_STRING(t.technical_skills, ' | ') AS technical_skills,
  ARRAY_TO_STRING(c.certifications, ' | ') AS certifications,
  ARRAY_TO_STRING(l.languages_spoken, ' | ') AS languages_spoken
FROM role_flattened r
LEFT JOIN experience_flattened e ON r.full_name = e.full_name
LEFT JOIN technical_skills_flattened t ON r.full_name = t.full_name
LEFT JOIN certifications_flattened c ON r.full_name = c.full_name
LEFT JOIN languages_spoken_flattened l ON r.full_name = l.full_name;


In [None]:
//view structure resume prediction table
SELECT *
FROM resume_prediction_clean;