In [101]:
import pandas as pd
from notebookutils import mssparkutils
from io import StringIO

### OEA Initializations

In [102]:
%run OEA/modules/Ed-Fi/v0.8/src/utilities/edfi_v0_8_edfi_py

In [103]:
oea = EdFiOEAChild()   
oea.set_workspace(workspace)

### Frequency Based ETL Metadata

In [109]:
dbo_tables = ['DimAcademicSubject',
 'DimAssessment',
 'DimAssessmentAcademicSubject',
 'DimAssessmentAssessedGradeLevel',
 'DimAssessmentCategory',
 'DimAssessmentItem',
 'DimAssessmentPeriod',
 'DimAssessmentItemCategory',
 'DimAssessmentItemResult',
 'DimAssessmentPerformanceLevel',
 'DimAssessmentReportingMethod',
 'DimAssessmentSection',
 'DimAttendanceEventCategory',
 'DimBehavior',
 'DimCTEProgramService',
 'DimCalendarEventType',
 'DimCareerPathway',
 'DimClassPeriod',
 'DimClassroomPosition',
 'DimCohort',
 'DimContinuationOfServicesReason',
 'DimCourse',
 'DimCourseOffering',
 'DimDate',
 'DimDisability',
 'DimDisabilityDesignation',
 'DimDisabilityDeterminationSourceType',
 'DimDiscipline',
 'DimDisciplineIncident',
 'DimELAProgressLevel',
 'DimEducationOrganization',
 'DimEducationalEnvironment',
 'DimEntryGradeLevelReason',
 'DimEntryType',
 'DimFoodProgramService',
 'DimGradeLevel',
 'DimGradeType',
 'DimGradingPeriod',
 'DimGraduationPlan',
 'DimHomelessPrimaryNighttimeResidence',
 'DimHomelessProgramService',
 'DimIncidentLocationType',
 'DimLanguage',
 'DimLanguageInstructionProgramService',
 'DimLearningStandard',
 'DimLocalEducationAgency',
 'DimLocation',
 'DimMathematicsProgressLevel',
 'DimMigrantEducationProgramService',
 'DimMonitored',
 'DimNeglectedOrDelinquentProgram',
 'DimNeglectedOrDelinquentProgramService',
 'DimObjectiveAssessment',
 'DimObjectiveAssessmentPerformanceLevel',
 'DimParticipation',
 'DimPerformanceLevel',
 'DimProficiency',
 'DimProgram',
 'DimProgress',
 'DimReasonExited',
 'DimReporterDescription',
 'DimResidencyStatus',
 'DimResponseIndicator',
 'DimResultDataType',
 'DimSchool',
 'DimSchoolYear',
 'DimSection',
 'DimService',
 'DimSession',
 'DimSpecialEducationProgramService',
 'DimSpecialEducationSetting',
 'DimStaff',
 'DimStaffClassification',
 'DimStaffRace',
 'DimStudent',
 'DimStudentCharacteristic',
 'DimStudentParticipationCodeType',
 'DimStudentRace',
 'DimTechnicalSkillsAssessment',
 'DimTerm',
 'DimTitleIPartAParticipant',
 'DimTitleIPartAProgramService',
 'DimWeapon',
 'EducationOrganization',
 'EducationOrganizationAddress',
 'EducationOrganizationAddressPeriod',
 'EducationOrganizationCategories',
 'EducationOrganizationinstitutiontelephone',
 'FactEnrollment',
 'FactFeederSchool',
 'FactSchoolAttendance',
 'FactSchoolEnrollment',
 'FactSectionAttendance',
 'FactSectionEnrollment',
 'FactSectionGrade',
 'FactStaffAssignment',
 'FactStudentAssessment',
 'FactStudentAssessmentItemScore',
 'FactStudentAssessmentPerformance',
 'FactStudentAssessmentScore',
 'FactStudentCTEProgram',
 'FactStudentCharacteristics',
 'FactStudentCohort',
 'FactStudentDisability',
 'FactStudentDisciplineAction',
 'FactStudentDisciplineIncident',
 'FactStudentHomelessProgram',
 'FactStudentLanguageInstructionProgram',
 'FactStudentMigrantEducationProgram',
 'FactStudentNeglectedOrDelinquentProgram',
 'FactStudentObjectiveAssessmentPerformance',
 'FactStudentObjectiveAssessmentScore',
 'FactStudentProgram',
 'FactStudentSchoolFoodServiceProgram',
 'FactStudentSpecialEducationProgram',
 'FactStudentTitleIPartAProgram']

config_tables = ['Descriptor',
 'DescriptorConstant',
 'DescriptorMap',
 'ExecutionAudit',
 'ExecutionDuration',
 'ExplicitStudentDataAuthorization',
 'Goals',
 'Parameter']

auth_tables = ['DataAuthorization', 'ExplicitStudentDataAuthorization', 'User']

In [110]:
low_frequency_tables = [
    "DimAcademicSubject",
    "DimAssessmentCategory",
    "DimAssessmentItemCategory",
    "DimAssessmentItemResult",
    "DimAssessmentReportingMethod",
    "DimAttendanceEventCategory",
    "DimBehavior",
    "DimCTEProgramService",
    "DimCalendarEventType",
    "DimCareerPathway",
    "DimClassroomPosition",
    "DimContinuationOfServicesReason",
    "DimDate",
    "DimDisability",
    "DimDisabilityDesignation",
    "DimDisabilityDeterminationSourceType",
    "DimDiscipline",
    "DimELAProgressLevel",
    "DimEducationalEnvironment",
    "DimEntryGradeLevelReason",
    "DimEntryType",
    "DimFoodProgramService",
    "DimGradeLevel",
    "DimGradeType",
    "DimHomelessPrimaryNighttimeResidence",
    "DimHomelessProgramService",
    "DimIncidentLocationType",
    "DimLanguage",
    "DimLanguageInstructionProgramService",
    "DimMathematicsProgressLevel",
    "DimMigrantEducationProgramService",
    "DimMonitored",
    "DimNeglectedOrDelinquentProgram",
    "DimNeglectedOrDelinquentProgramService",
    "DimParticipation",
    "DimPerformanceLevel",
    "DimProficiency",
    "DimProgress",
    "DimReasonExited",
    "DimReporterDescription",
    "DimResidencyStatus",
    "DimResponseIndicator",
    "DimResultDataType",
    "DimSchoolYear",
    "DimService",
    "DimSpecialEducationProgramService",
    "DimSpecialEducationSetting",
    "DimStaffClassification",
    "DimStudentCharacteristic",
    "DimStudentParticipationCodeType",
    "DimTechnicalSkillsAssessment",
    "DimTerm",
    "DimTitleIPartAParticipant",
    "DimTitleIPartAProgramService",
    "DimWeapon",
    "Descriptor",
    "DescriptorConstant",
    "DescriptorMap",
    "ExecutionAudit",
    "ExecutionDuration",
    "ExplicitStudentDataAuthorization",
    "Goals",
    "Parameter"
]


In [111]:
tables=dbo_tables+auth_tables+config_tables
len(tables)

In [112]:
frequency_assignments = ["high" if item not in low_frequency_tables else "low" for item in tables]

In [113]:
entity_frequency_lookup = dict()

entity_frequency_lookup['resource_full_name'] = tables
entity_frequency_lookup['resource_frequency_code'] = frequency_assignments
entity_frequency_lookup['lastrundatetime'] =[[] for i in range(len(tables))]
entity_frequency_lookup['lastrundate'] =[[] for i in range(len(tables))]

In [115]:
from datetime import datetime

In [116]:
datetime_placeholder = datetime.today() - timedelta(days = 365)

In [118]:
for index, entity_info in enumerate(entity_frequency_lookup['resource_full_name']):
    frequency_code = entity_frequency_lookup['resource_frequency_code'][index]

    if frequency_code == "low":
        datetime_oneYearBefore = datetime_placeholder - timedelta(days=360)
        entity_frequency_lookup['lastrundatetime'][index]=datetime_oneYearBefore
        entity_frequency_lookup['lastrundate'][index]=datetime_oneYearBefore.date()

    elif frequency_code == "high":
        entity_frequency_lookup['lastrundatetime'][index]=datetime_placeholder
        entity_frequency_lookup['lastrundate'][index]=datetime_placeholder.date()


In [119]:
df=pd.DataFrame(entity_frequency_lookup)

In [121]:
destination_path = f'stage1/Transactional/Ed-Fi/{apiVersion}/DistrictId={districtId}/SchoolYear={schoolYear}/metadata-assets/edgraph_frequency_etl.csv'

entity_frequency_lookup_df = pd.DataFrame(entity_frequency_lookup)
data_str = entity_frequency_lookup_df.to_csv(index=False) 
destination_url = oea.to_url(destination_path)
mssparkutils.fs.put(destination_url, data_str, True)  