In [ ]:
instance = instanceId = InstanceId
apiUrl = ApiUrl
schoolYear = SchoolYear
DistrictId = DistrictID = districtId = districtID
apiLimit = batchLimit

prepareSAPMetaData = prepareSAPMetadata
zone = sap_pipeline = submissionsType

In [1]:
import pyspark
from pyspark.sql import SparkSession
from pyspark.sql.utils import AnalysisException
from pyspark.sql.types import StructField, StructType
from pyspark.sql.functions import col, substring, regexp_extract, split, lit

import json
import os
import pandas as pd

In [ ]:
# FIXME: Do not pass uncessary params
input_params = {
        "kvName": kvName,
        "kVName": kvName,
        "workspace": workspace,
        "apiUrl": apiUrl,
        "instanceId": instanceId,
        "moduleName": moduleName,
        "apiLimit": apiLimit,
        "minChangeVer": minChangeVer,
        "maxChangeVer": maxChangeVer,
        "sapVersion": sapVersion,
        "prepareSAPMetaData": prepareSAPMetaData,
        "submissions": submissions,
        "submissionsType": submissionsType,
        "schoolYear": schoolYear,
        "districtId": districtId,
        "districtID": districtId,
        "edfi_url": apiUrl,
        "pipelineExecutionId" : pipelineExecutionId
    }

In [ ]:
%run OEA/modules/Ed-Fi/v0.7/src/utilities/edfi_v0_7_edfi_py

In [ ]:
oea.set_workspace(workspace)

### Column Mapping

In [ ]:
column_mappings = {
"TEA": {
"PEIMS_FALL": {
"tx/budgetExts":
   {"FISCYEAR": "fiscalYear",
    "AMOUNT": "budgetAmount",
    "PU_CI_SUB1": "budgetFunctionDescriptor",
    "PU_CI_SUB2": "budgetObjectDescriptor",
    "/BIC/ZPEIMFUND": "organization",
    "/BIC/ZPU_TEA": "budgetFundDescriptor", #"organization",
    "PU_FC_SUB2": "budgetProgramIntentDescriptor",
    "DATE0": "beginDate"
    },
"tx/payrollExts": {
        "FISCYEAR": "fiscalYear",
        "AMOUNT": "payrollAmount",
        "/BIC/ZPEIMFUND": "organization",#"payrollFundDescriptor", # FIXME: MAPPING ISSUE
        "PU_CI_SUB1": "payrollFunctionDescriptor",
        "PU_CI_SUB2": "payrollObjectDescriptor",
        "/BIC/ZPU_TEA": "payrollFundDescriptor",#"organization", # FIXME: MAPPING ISSUE
        "/BIC/ZHRPPICTX": "payrollProgramIntentDescriptor",
        "/BIC/ZHRPPYAYC": "payrollActivityDescriptor",
        '/BIC/ZHRPSTFID': 'staffUniqueId', #Not Sure / Added after
        '/BIC/ZHRPDISTR': 'localEducationAgencyId', #Not Sure / Added after
        '/BIC/ZBEGDA': 'beginDate',
        '/BIC/ZENDDA': 'endDate'
    },
"tx/staffEducationOrganizationEmploymentAssociations":  {
    '/BIC/ZHRPPDETX': 'percentDayEmployed',
    '/BIC/ZHRPNDETX': 'numberDaysEmployed',
    '/BIC/ZHRPAUXTX': 'auxiliaryRoleIdDescriptor',
    '/BIC/ZBEGDA'  : 'auxiliaryRoleIdSet_beginDate',
    '/BIC/ZENDDA': 'auxiliaryRoleIdSet_endDate',
    '/BIC/ZHRDATE10': 'hireDate',
    '/BIC/ZHRDATE21': 'endDate',
    '/BIC/ZHRPSTFID': 'staffUniqueId', #Not Sure / Added after
    '/BIC/ZHRPDISTR': 'localEducationAgencyId' #Not Sure / Added after
},
"tx/contractedInstructionalStaffFTEExts": {
    '/BIC/ZHRPDISTR': 'localEducationAgencyId',
    '/BIC/ZHRPORGID': 'SchoolId',
    '/BIC/ZHRPPICTX': 'ciStaffProgramIntentDescriptor',
    '/BIC/ZHRPISFTE': 'totalContractedInstrStaffFTE',
},
"tx/staffEducationOrganizationAssignmentAssociations": {
    '/BIC/ZHRPSTFID': 'staffUniqueId',
    '/BIC/ZHRPSRVID': 'staffServiceDescriptor',
    '/BIC/ZHRPSSAID': 'sharedServiceArrangementStaffDescriptor',
    '/BIC/ZHRPNOSIC': 'numberOfStudentInClass',
    '/BIC/ZHRPPOPSV': 'populationServedDescriptor',
    '/BIC/ZHRPORGID': 'schoolId',
    '/BIC/ZHRPMOMIN': 'monthlyMinutes',
    '/BIC/ZHRPCLTYC': 'classTypeDescriptor', # TODO: Review why this was added (absent in new file)
    '/BIC/ZHRPSRTYP': 'staffClassificationDescriptor',
    '/BIC/ZHRDATE10': 'beginDate',
    '/BIC/ZHRDATE21': 'endDate',
    '/BIC/ZHRDATE11': 'staffService_beginDate',
    '/BIC/ZENDDA': 'staffService_endDate',
    '/BIC/ZHRPDISTR': 'localEducationAgencyId'
},
'tx/staffs': {
    '/BIC/ZHRSSNC': 'staffId',
    '/BIC/ZHRVORNA': 'firstName',
    '/BIC/ZHRNACHN': 'lastSurname',
    '/BIC/ZHRMIDNM': 'middleName',
    'DATEBIRTH': 'birthDate',
    '/BIC/ZHRPHDEGR': 'highestCompletedLevelOfEducationDescriptor',
    '/BIC/ZHRPETHIN': 'hispanicLatinoEthnicity',
    '/BIC/ZHRPRAC01': 'race1_Descriptor',
    '/BIC/ZHRPRAC02': 'race2_Descriptor',
    '/BIC/ZHRPRAC03': 'race3_Descriptor',
    '/BIC/ZHRPRAC04': 'race4_Descriptor',
    '/BIC/ZHRPRAC05': 'race5_Descriptor',
    '/BIC/ZHRPGNRTX': 'sexDescriptor',
    '/BIC/ZHRPGENCD': 'generationCodeDescriptor',
    '/BIC/ZHRTALEXP': 'totalYearsProfExperience',
    '/BIC/ZHRTHSDEX': 'yearsExperienceInDistrict',
    '/BIC/ZHRPSTFID': 'staffUniqueId',
    '/BIC/ZHRPSTFCD': 'staffTypeDescriptor',
    '/BIC/ZHRDATE10': 'staffTypeSet_beginDate',
    '/BIC/ZHRDATE21': 'staffTypeSet_endDate',
    '/BIC/ZHRPPKREQ':'pkTeacherRequirementDescriptor',
    '/BIC/ZHRPPCERT' : 'paraprofessionalCertification',
    '/BIC/ZBEGDA': 'paraprofessional_beginDate',
    '/BIC/ZENDDA': 'paraprofessional_endDate',
    '/BIC/ZHRPNOTRP': 'staffDoNotReportTSDS'
},
},
"PEIMS_MIDYR":
{
    "tx/actualExts":
   {"FISCYEAR": "fiscalYear",
    "AMOUNT": "actualAmount",
    "PU_CI_SUB1": "actualFunctionDescriptor",
    "PU_CI_SUB2": "actualObjectDescriptor",
    "/BIC/ZPEIMFUND": "organization",
    "/BIC/ZPU_TEA": "actualFundDescriptor",
    "PU_FC_SUB2": "actualProgramIntentDescriptor",
    "DATE0": "beginDate"
    }
},
"TSDS_CLASS_ROSTER_FALL": {'tx/staffs': {
    '/BIC/ZHRSSNC': 'staffId',
    '/BIC/ZHRVORNA': 'firstName',
    '/BIC/ZHRNACHN': 'lastSurname',
    '/BIC/ZHRMIDNM': 'middleName',
    'DATEBIRTH': 'birthDate',
    '/BIC/ZHRPHDEGR': 'highestCompletedLevelOfEducationDescriptor',
    '/BIC/ZHRPETHIN': 'hispanicLatinoEthnicity',
    '/BIC/ZHRPRAC01': 'race1_Descriptor',
    '/BIC/ZHRPRAC02': 'race2_Descriptor',
    '/BIC/ZHRPRAC03': 'race3_Descriptor',
    '/BIC/ZHRPRAC04': 'race4_Descriptor',
    '/BIC/ZHRPRAC05': 'race5_Descriptor',
    '/BIC/ZHRPGNRTX': 'sexDescriptor',
    '/BIC/ZHRPGENCD': 'generationCodeDescriptor',
    '/BIC/ZHRTALEXP': 'totalYearsProfExperience',
    '/BIC/ZHRTHSDEX': 'yearsExperienceInDistrict',
    '/BIC/ZHRPSTFID': 'staffUniqueId',
    '/BIC/ZHRPSTFCD': 'staffTypeDescriptor',
    '/BIC/ZHRDATE10': 'staffTypeSet_beginDate',
    '/BIC/ZHRDATE21': 'staffTypeSet_endDate',
    '/BIC/ZHRPPKREQ':'pkTeacherRequirementDescriptor',
    '/BIC/ZHRPPCERT' : 'paraprofessionalCertification',
    '/BIC/ZBEGDA': 'paraprofessional_beginDate',
    '/BIC/ZENDDA': 'paraprofessional_endDate',
    '/BIC/ZHRPNOTRP': 'staffDoNotReportTSDS'

}
}
}
}

In [3]:
def return_file_path_and_url(file_path):
    column_mapping_file_url = oea.to_url(file_path)

    return file_path, column_mapping_file_url

In [ ]:
def dump_to_json(data, file_path):
    # Read the CSV file
    df = spark.createDataFrame([data])
    df.write.json(file_path, mode='overwrite')

In [ ]:
lake_destination = "ingestion-mappings.json"

In [6]:
file_path = f"stage1/Transactional/SAP/metadata-assets/{lake_destination}"

column_mapping_file_path, column_mapping_file_url = return_file_path_and_url(file_path)
dump_to_json(column_mappings, column_mapping_file_url)

### Other Metadata

In [ ]:
other_metadata = {
  "sap_to_edfi_complex": {
    "Budget": "budgetExts",
    "YHROHPM04": "staffs",
    "YHROHPM07": "staffEducationOrganizationEmploymentAssociations",
    "YHROHPM09": "payrollExts",
    "YHROHPM08": "contractedInstructionalStaffFTEExts",
    "YHROHPM10": "staffEducationOrganizationAssignmentAssociations",
    "YHROHPM05": "staffs",
    "YFIOHPEIM": "actualExts"
  },
  "final_columns": {
    "budgetExts": [
      "RECORD",
      "fiscalYear",
      "budgetAmount",
      "budgetFunctionDescriptor",
      "budgetObjectDescriptor",
      "budgetFundDescriptor",
      "budgetOrganization",
      "budgetProgramIntentDescriptor",
      "educationOrganizationReference"
    ],
    "staffEducationOrganizationAssignmentAssociations": [
      "RECORD",
      "sharedServiceArrangementStaffDescriptor",
      "numberOfStudentInClass",
      "populationServedDescriptor",
      "staffClassificationDescriptor",
      "schoolReference",
      "beginDate",
      "endDate",
      "staffReference",
      "educationOrganizationReference",
      "_ext"
    ],
    "staffs": [
      "RECORD",
      "staffUniqueId",
      "firstName",
      "lastSurname",
      "middleName",
      "birthDate",
      "highestCompletedLevelOfEducationDescriptor",
      "hispanicLatinoEthnicity",
      "sexDescriptor",
      "races",
      "_ext"
    ],
    "contractedInstructionalStaffFTEExts": [
      "RECORD",
      "educationOrganizationReference",
      "schoolReference",
      "ciStaffProgramIntentDescriptor",
      "totalContractedInstrStaffFTE"
    ],
    "payrollExts": [
      "RECORD",
      "fiscalYear",
      "payrollAmount",
      "payrollFundDescriptor",
      "payrollFunctionDescriptor",
      "payrollObjectDescriptor",
      "payrollOrganization",
      "payrollProgramIntentDescriptor",
      "payrollActivityDescriptor",
      "beginDate",
      "endDate",
      "educationOrganizationReference",
      "staffReference"
    ],
    "actualExts": [
      "RECORD",
      "fiscalYear",
      "actualAmount",
      "actualFunctionDescriptor",
      "actualObjectDescriptor",
      "actualFundDescriptor",
      "actualOrganization",
      "actualProgramIntentDescriptor",
      "educationOrganizationReference"
    ],
    "staffEducationOrganizationEmploymentAssociations": [
      "RECORD",
      "hireDate",
      "endDate",
      "percentDayEmployed",
      "numberDaysEmployed",
      "employmentStatusDescriptor",
      "educationOrganizationReference",
      "staffReference",
      "_ext"
    ]
  },
  "_ext_TX_cols": {
    "staffs": [
      "generationCodeDescriptor",
      "totalYearsProfExperience",
      "yearsExperienceInDistrict",
      "staffId",
      "typeSets"
    ],
    "staffEducationOrganizationEmploymentAssociations": [
      "percentDayEmployed",
      "numberDaysEmployed",
      "auxiliaryRoleIdSets"
    ],
    "staffEducationOrganizationAssignmentAssociations": [
      "staffServiceSets"
    ]
  },
  "descriptorsDFRef": {},
  "descriptors": [
    "functionDescriptors",
    "objectDescriptors",
    "fundDescriptors",
    "programIntentDescriptors",
    "auxiliaryRoleIdDescriptors",
    "staffServiceDescriptors",
    "sharedServiceArrangementStaffDescriptors",
    "populationServedDescriptors",
    "classTypeDescriptors",
    "staffClassificationDescriptors",
    "levelOfEducationDescriptors",
    "pkTeacherRequirementDescriptors",
    "generationCodeDescriptors",
    "staffTypeDescriptors",
    "sexDescriptors",
    "raceDescriptors",
    "employmentStatusDescriptors",
    "payrollActivityDescriptors"
  ]
}