### Initializations

In [ ]:
#Importing required libraries
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, greatest, from_json, sha2, concat_ws, isnull
from pyspark.sql.types import StructType, StructField, IntegerType, StringType
from pyspark.sql import functions as F
from pyspark.sql.window import Window

from pyspark.sql import SparkSession

In [ ]:
%run OEA/modules/Ed-Fi/v0.8/src/utilities/edfi_v0_8_edfi_py

In [ ]:
from datetime import datetime
oea = EdFiOEAChild()   
error_logger = ErrorLogging(spark = spark,
                            oea = oea,
                            logger = logger)

### Global Parameters

In [ ]:
def assign_default_variable(variable_name, default_value):
    if variable_name not in globals():
        globals()[variable_name] = default_value
        logger.info(f'{variable_name} not found - using system default')

In [ ]:
assign_default_variable(variable_name = 'workspace', 
                        default_value = 'sandbox2')
oea.set_workspace(workspace)

### Main Code

In [ ]:
def extract_substring_before(input_string, target_substring):
    pattern = rf'^(.*?){re.escape(target_substring)}'
    match = input_string.lower().endswith(target_substring.lower()) 
    
    re_search_substr = re.search(pattern, input_string)
    if match:
        substring_before_target = re_search_substr.group(1)
        return substring_before_target
    else:
        return None

def list_descriptor_tables(db_name):
    spark.sql(f"USE {db_name}")
    tables = spark.sql("SHOW TABLES")

    table_names_dict = {}
    for row in tables.collect():
        table_name = table_name_original = row['tableName']
        table_name = extract_substring_before(table_name.lower(), 
                                              'descriptors')
        if table_name is not None:
            table_names_dict[table_name_original] = table_name
        
    return table_names_dict

In [ ]:
parameterized = True
if not(parameterized):
    descriptors_dict = list_descriptor_tables(stage2_db_name)
else:
    descriptors_dict = {'operationalstatusdescriptors': 'operationalstatus',
                    'schooltypedescriptors': 'schooltype',
                    'charterstatusdescriptors': 'charterstatus',
                    'titleipartaschooldesignationdescriptors': 'titleipartaschooldesignation',
                    'administrativefundingcontroldescriptors': 'administrativefundingcontrol',
                    'stateabbreviationdescriptors': 'stateabbreviation',
                    'addresstypedescriptors': 'addresstype',
                    'institutiontelephonenumbertypedescriptors': 'institutiontelephonenumbertype',
                    'localeducationagencycategorydescriptors':'localeducationagencycategory',
                    'programtypedescriptors':'programtype',
                    'programsponsordescriptors':'programsponsor',
                    'racedescriptors':'race',
                    'electronicmailtypedescriptors':'electronicmailtype',
                    'attendanceeventcategorydescriptors':'attendanceeventcategory',
                    'educationalenvironmentdescriptors':'educationalenvironment',
                    'termdescriptors':'term',
                    'entrytypedescriptors':'entrytype',
                    'residencystatusdescriptors':'residencystatus',
                    'graduationplantypedescriptors':'graduationplantype',
                    'entrygradelevelreasondescriptors':'entrygradelevelreason',
                    #'entrygradeleveldescriptors':'entrygradelevel',
                    'calendareventdescriptors':'calendarevent',
                    'academicsubjectdescriptors':'academicsubject',
                    'languagedescriptors':'language',
                    'LevelOfEducationdescriptors':'LevelOfEducation',
                    'sexdescriptors':'sex',
                    'graduationplantypedescriptors':'graduationplantype',
                    'credittypedescriptors':'credittype',
                    'gradeleveldescriptors':'gradelevel',
                    "ClassroomPositionDescriptors": "ClassroomPositionDescriptors",
                    'AcademicSubjectdescriptors':'AcademicSubject',
                    'localeducationagencycategorydescriptors':'localeducationagencycategory',
                    'TitleIPartAProgramServicedescriptors':'TitleIPartAProgramService',
                    'TitleIPartAParticipantdescriptors':'TitleIPartAParticipant',
                    'Servicedescriptors':'Service',
                    'ReasonExiteddescriptors':'ReasonExited',
                    'SpecialEducationSettingdescriptors':'SpecialEducationSetting',
                    'DisabilityDeterminationSourceTypedescriptors':'DisabilityDeterminationSourceType',
                    'Disabilitydescriptors':'Disability',
                    'SpecialEducationProgramServicedescriptors':'SpecialEducationProgramService',
                    'specialeducationprogramservicedescriptors':'SpecialEducationProgramServiceProvider',
                    'schoolfoodserviceprogramservicedescriptors':'schoolfoodserviceprogramservice',
                    'neglectedordelinquentprogramservicedescriptors':'neglectedordelinquentprogramservice',
                    'ProgressLevelDescriptors':'ProgressLevel',
                    'DisabilityDesignationDescriptors': 'DisabilityDesignationDescriptors',
                    'PerformanceLevelDescriptors': 'PerformanceLevelDescriptors',
                    'assessmentReportingMethodDescriptors':'assessmentReportingMethodDescriptors',
                    'assessmentperioddescriptors':'assessmentperioddescriptors',
                    'ResultDatatypetypeDescriptors':'ResultDatatypetypeDescriptors',
                    'AssessmentPeriodDescriptors':'AssessmentPeriodDescriptors',
                    'GradeTypeDescriptors':'GradeTypeDescriptors',
                    'CohortTypeDescriptors':'CohortTypeDescriptors',
                    'AssessmentItemResultDescriptors':'AssessmentItemResultDescriptors',
                    'GradingPeriodDescriptors':'GradingPeriodDescriptors',
                    'DisciplineDescriptors':'DisciplineDescriptors',
                    'CohortScopeDescriptors':'CohortScopeDescriptors',
                    'ResponseIndicatorDescriptors':'ResponseIndicatorDescriptors',
                    'IncidentLocationDescriptors':'IncidentLocationDescriptors',
                    'ReporterDescriptionDescriptors':'ReporterDescriptionDescriptors',
                    'BehaviorDescriptors':'BehaviorDescriptors',
                    'WeaponDescriptors':'WeaponDescriptors',
                    'StudentParticipationCodeDescriptors':'StudentParticipationCodeDescriptors',
                    'StaffClassificationDescriptors':'StaffClassificationDescriptors'
                    }

In [ ]:
def create_desc_session_view(db_name, 
                             table_name,
                             session_view_name,
                             key_prefix):
    try:
        spark.sql(f"""CREATE OR REPLACE VIEW {db_name}.{session_view_name}_view AS 
                    SELECT
                        LakeId AS DescriptorHKey,
                        LakeId as DescriptorLakeId,
                        Namespace,
                        CodeValue,
                        ShortDescription,
                        NULL as ShortDescriptionOrder,
                        Description,
                        CURRENT_TIMESTAMP() AS CreatedDateTime,
                        CURRENT_TIMESTAMP() AS ModifiedDateTime
                    FROM {db_name}.{table_name}                   
                  """)
    except:
        logger.info(f'Error Occured -{e}: {table_name} not created')

In [ ]:
db_name = stage2_db_name
for table_name, key_prefix in descriptors_dict.items():
    logger.info(f"Create or replace view - {table_name}_view")
    create_desc_session_view(db_name = db_name, 
                             table_name = table_name,
                             session_view_name = table_name,
                             key_prefix = key_prefix)