In [None]:
class Clever(BaseOEAModule):
    """ Provides data processing methods for Clever data.  """

    def __init__(self, oea, source_folder='clever'):
        BaseOEAModule.__init__(self, oea, source_folder)

        self.schemas['resource_usage_students'] = [['date', 'timestamp', 'no-op'],
                                            ['sis_id', 'string', 'hash'],
                                            ['clever_user_id', 'string', 'hash'],
                                            ['clever_school_id', 'string', 'no-op'],
                                            ['resource_type', 'string', 'no-op'],
                                            ['resource_name', 'string', 'no-op'],
                                            ['resource_id', 'string', 'no-op']]
    
    def process_data_from_stage1(self):
        self._process_entity_from_stage1('resource_usage_students')

    def copy_test_data_to_stage1(self):
        mssparkutils.fs.cp(self.module_path + '/test_data/2020-10-27-resource-usage-students.csv', self.stage1np + '/resource_usage_students/2020-10-27-resource-usage-students.csv', True)       

class ContosoSIS(BaseOEAModule):
    def __init__(self, oea, source_folder='contoso_sis'):
        BaseOEAModule.__init__(self, oea, source_folder)
        self.schemas['studentattendance'] = [['id', 'string', 'no-op'],
                                            ['student_id', 'string', 'hash-no-lookup'],
                                            ['school_year', 'integer', 'no-op'],
                                            ['school_id', 'string', 'no-op'],
                                            ['attendance_date', 'timestamp', 'no-op'],
                                            ['all_day', 'string', 'no-op'],
                                            ['Period', 'short', 'no-op'],
                                            ['section_id', 'string', 'no-op'],
                                            ['AttendanceCode', 'string', 'no-op'],
                                            ['PresenceFlag', 'boolean', 'no-op'],
                                            ['attendance_status', 'string', 'no-op'],
                                            ['attendance_type', 'string', 'no-op'],
                                            ['attendance_sequence', 'short', 'no-op']]

        self.schemas['studentsectionmark'] = [['id', 'string', 'no-op'],
                                            ['student_id', 'string', 'hash-no-lookup'],
                                            ['section_id', 'string', 'no-op'],
                                            ['school_year', 'string', 'no-op'],
                                            ['term_id', 'string', 'no-op'],
                                            ['numeric_grade_earned', 'short', 'no-op'],
                                            ['alpha_grade_earned', 'string', 'no-op'],
                                            ['is_final_grade', 'string', 'no-op'],
                                            ['credits_attempted', 'short', 'no-op'],
                                            ['credits_earned', 'short', 'no-op'],
                                            ['grad_credit_type', 'string', 'no-op']]
                                            
    def process_data_from_stage1(self):
        self._process_entity_from_stage1('studentattendance', 'csv', 'overwrite', 'true')
        self._process_entity_from_stage1('studentsectionmark', 'csv', 'overwrite', 'true')

    def copy_test_data_to_stage1(self):
        mssparkutils.fs.cp(self.module_path + '/test_data/studentattendance.csv', self.stage1np + '/studentattendance/studentattendance.csv', True)
        mssparkutils.fs.cp(self.module_path + '/test_data/studentsectionmark.csv', self.stage1np + '/studentsectionmark/studentsectionmark.csv', True)

class IReady(BaseOEAModule):
    """ Provides data processing methods for iReady data.  """
    def __init__(self, oea, source_folder = 'iready'):
        BaseOEAModule.__init__(self, oea, source_folder)

        self.schemas['comprehensive_student_lesson_activity_with_standards_ela'] = [['Last Name', 'string', 'mask'],
                                            ['First Name', 'string', 'mask'],
                                            ['Student ID', 'string', 'hash-no-lookup'],
                                            ['Student Grade', 'string', 'no-op'],
                                            ['Academic Year', 'string', 'no-op'],
                                            ['School', 'string', 'no-op'],
                                            ['Subject', 'string', 'no-op'],
                                            ['Domain', 'string', 'no-op'],
                                            ['Lesson Grade', 'string', 'no-op'],
                                            ['Lesson Level', 'string', 'no-op'],
                                            ['Lesson ID', 'string', 'no-op'],
                                            ['Lesson Name', 'string', 'no-op'],
                                            ['Lesson Objective', 'string', 'no-op'],
                                            ['Completion Date', 'string', 'no-op'],
                                            ['Total Time on Lesson (min)', 'string', 'no-op'],
                                            ['Score', 'string', 'no-op'],
                                            ['Passed or Not Passed', 'string', 'no-op'],
                                            ['Teacher-Assigned Lesson', 'string', 'no-op'],
                                            ['State Standards', 'string', 'no-op'],
                                            ['Type of Standard', 'string', 'no-op'],
                                            ['Standard Code', 'string', 'no-op'],
                                            ['Standard Text', 'string', 'no-op']]
        self.schemas['comprehensive_student_lesson_activity_with_standards_math'] = [['Last Name', 'string', 'mask'],
                                            ['First Name', 'string', 'mask'],
                                            ['Student ID', 'string', 'hash-no-lookup'],
                                            ['Student Grade', 'string', 'no-op'],
                                            ['Academic Year', 'string', 'no-op'],
                                            ['School', 'string', 'no-op'],
                                            ['Subject', 'string', 'no-op'],
                                            ['Domain', 'string', 'no-op'],
                                            ['Lesson Grade', 'string', 'no-op'],
                                            ['Lesson Level', 'string', 'no-op'],
                                            ['Lesson ID', 'string', 'no-op'],
                                            ['Lesson Name', 'string', 'no-op'],
                                            ['Lesson Objective', 'string', 'no-op'],
                                            ['Completion Date', 'string', 'no-op'],
                                            ['Total Time on Lesson (min)', 'string', 'no-op'],
                                            ['Score', 'string', 'no-op'],
                                            ['Passed or Not Passed', 'string', 'no-op'],
                                            ['Teacher-Assigned Lesson', 'string', 'no-op'],
                                            ['State Standards', 'string', 'no-op'],
                                            ['Type of Standard', 'string', 'no-op'],
                                            ['Standard Code', 'string', 'no-op'],
                                            ['Standard Text', 'string', 'no-op']]
        self.schemas['diagnostic_and_instruction_ela_ytd_window'] = [['Last Name', 'string', 'mask'],
                                            ['First Name', 'string', 'mask'],
                                            ['Student ID', 'string', 'hash-no-lookup'],
                                            ['Enrolled', 'string', 'no-op'],
                                            ['Student Grade', 'string', 'no-op'],
                                            ['Academic Year', 'string', 'no-op'],
                                            ['School', 'string', 'no-op'],
                                            ['Subject', 'string', 'no-op'],
                                            ['User Name', 'string', 'mask'],
                                            ['Gender', 'string', 'no-op'],
                                            ['Hispanic or Latino', 'string', 'no-op'],
                                            ['Race', 'string', 'no-op'],
                                            ['English Language Learner', 'string', 'no-op'],
                                            ['Special Education', 'string', 'no-op'],
                                            ['Economically Disadvantaged', 'string', 'no-op'],
                                            ['Migrant', 'string', 'no-op'],
                                            ['Class(es)', 'string', 'no-op'],
                                            ['Class Teacher(s)', 'string', 'no-op'],
                                            ['Report Group(s)', 'string', 'no-op'],
                                            ['Number of Completed Diagnostics during the time frame', 'string', 'no-op'],
                                            ['Annual Typical Growth Measure', 'string', 'no-op'],
                                            ['Annual Stretch Growth Measure', 'string', 'no-op'],
                                            ['Diagnostic Gain (Note: negative gains=zero)', 'string', 'no-op'],
                                            ['Diagnostic: Start Date (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Completion Date (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Time on Task (min) (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Rush Flag (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Overall Scale Score (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Overall Placement (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Percentile (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Overall Relative Placement (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Tier (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Lexile Measure (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Lexile Range (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Grouping (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Language (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Phonological Awareness Scale Score (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Phonological Awareness Placement (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Phonological Awareness Relative Placement (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Phonics Scale Score (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Phonics Placement (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Phonics Relative Placement (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: High-Frequency Words Scale Score (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: High-Frequency Words Placement (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: High-Frequency Words Relative Placement (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Vocabulary Scale Score (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Vocabulary Placement (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Vocabulary Relative Placement (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Reading Comprehension: Literature Scale Score (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Reading Comprehension: Literature Placement (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Reading Comprehension: Literature Relative Placement (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Reading Comprehension: Informational Text Scale Score (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Reading Comprehension: Informational Text Placement (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Reading Comprehension: Informational Text Relative Placement (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Start Date (1)', 'string', 'no-op'],
                                            ['Diagnostic: Completion Date (1)', 'string', 'no-op'],
                                            ['Diagnostic: Time on Task (min) (1)', 'string', 'no-op'],
                                            ['Diagnostic: Rush Flag (1)', 'string', 'no-op'],
                                            ['Diagnostic: Overall Scale Score (1)', 'string', 'no-op'],
                                            ['Diagnostic: Overall Placement (1)', 'string', 'no-op'],
                                            ['Diagnostic: Percentile (1)', 'string', 'no-op'],
                                            ['Diagnostic: Overall Relative Placement (1)', 'string', 'no-op'],
                                            ['Diagnostic: Tier (1)', 'string', 'no-op'],
                                            ['Diagnostic: Language (1)', 'string', 'no-op'],
                                            ['Diagnostic: Phonological Awareness Scale Score (1)', 'string', 'no-op'],
                                            ['Diagnostic: Phonological Awareness Placement (1)', 'string', 'no-op'],
                                            ['Diagnostic: Phonological Awareness Relative Placement (1)', 'string', 'no-op'],
                                            ['Diagnostic: Phonics Scale Score (1)', 'string', 'no-op'],
                                            ['Diagnostic: Phonics Placement (1)', 'string', 'no-op'],
                                            ['Diagnostic: Phonics Relative Placement (1)', 'string', 'no-op'],
                                            ['Diagnostic: High-Frequency Words Scale Score (1)', 'string', 'no-op'],
                                            ['Diagnostic: High-Frequency Words Placement (1)', 'string', 'no-op'],
                                            ['Diagnostic: High-Frequency Words Relative Placement (1)', 'string', 'no-op'],
                                            ['Diagnostic: Vocabulary Scale Score (1)', 'string', 'no-op'],
                                            ['Diagnostic: Vocabulary Placement (1)', 'string', 'no-op'],
                                            ['Diagnostic: Vocabulary Relative Placement (1)', 'string', 'no-op'],
                                            ['Diagnostic: Reading Comprehension: Literature Scale Score (1)', 'string', 'no-op'],
                                            ['Diagnostic: Reading Comprehension: Literature Placement (1)', 'string', 'no-op'],
                                            ['Diagnostic: Reading Comprehension: Literature Relative Placement (1)', 'string', 'no-op'],
                                            ['Diagnostic: Reading Comprehension: Informational Text Scale Score (1)', 'string', 'no-op'],
                                            ['Diagnostic: Reading Comprehension: Informational Text Placement (1)', 'string', 'no-op'],
                                            ['Diagnostic: Reading Comprehension: Informational Text Relative Placement (1)', 'string', 'no-op'],
                                            ['Diagnostic: Start Date (2)', 'string', 'no-op'],
                                            ['Diagnostic: Completion Date (2)', 'string', 'no-op'],
                                            ['Diagnostic: Time on Task (min) (2)', 'string', 'no-op'],
                                            ['Diagnostic: Rush Flag (2)', 'string', 'no-op'],
                                            ['Diagnostic: Overall Scale Score (2)', 'string', 'no-op'],
                                            ['Diagnostic: Overall Placement (2)', 'string', 'no-op'],
                                            ['Diagnostic: Percentile (2)', 'string', 'no-op'],
                                            ['Diagnostic: Overall Relative Placement (2)', 'string', 'no-op'],
                                            ['Diagnostic: Tier (2)', 'string', 'no-op'],
                                            ['Diagnostic: Language (2)', 'string', 'no-op'],
                                            ['Diagnostic: Phonological Awareness Scale Score (2)', 'string', 'no-op'],
                                            ['Diagnostic: Phonological Awareness Placement (2)', 'string', 'no-op'],
                                            ['Diagnostic: Phonological Awareness Relative Placement (2)', 'string', 'no-op'],
                                            ['Diagnostic: Phonics Scale Score (2)', 'string', 'no-op'],
                                            ['Diagnostic: Phonics Placement (2)', 'string', 'no-op'],
                                            ['Diagnostic: Phonics Relative Placement (2)', 'string', 'no-op'],
                                            ['Diagnostic: High-Frequency Words Scale Score (2)', 'string', 'no-op'],
                                            ['Diagnostic: High-Frequency Words Placement (2)', 'string', 'no-op'],
                                            ['Diagnostic: High-Frequency Words Relative Placement (2)', 'string', 'no-op'],
                                            ['Diagnostic: Vocabulary Scale Score (2)', 'string', 'no-op'],
                                            ['Diagnostic: Vocabulary Placement (2)', 'string', 'no-op'],
                                            ['Diagnostic: Vocabulary Relative Placement (2)', 'string', 'no-op'],
                                            ['Diagnostic: Reading Comprehension: Literature Scale Score (2)', 'string', 'no-op'],
                                            ['Diagnostic: Reading Comprehension: Literature Placement (2)', 'string', 'no-op'],
                                            ['Diagnostic: Reading Comprehension: Literature Relative Placement (2)', 'string', 'no-op'],
                                            ['Diagnostic: Reading Comprehension: Informational Text Scale Score (2)', 'string', 'no-op'],
                                            ['Diagnostic: Reading Comprehension: Informational Text Placement (2)', 'string', 'no-op'],
                                            ['Diagnostic: Reading Comprehension: Informational Text Relative Placement (2)', 'string', 'no-op'],
                                            ['Diagnostic: Start Date (3)', 'string', 'no-op'],
                                            ['Diagnostic: Completion Date (3)', 'string', 'no-op'],
                                            ['Diagnostic: Time on Task (min) (3)', 'string', 'no-op'],
                                            ['Diagnostic: Rush Flag (3)', 'string', 'no-op'],
                                            ['Diagnostic: Overall Scale Score (3)', 'string', 'no-op'],
                                            ['Diagnostic: Overall Placement (3)', 'string', 'no-op'],
                                            ['Diagnostic: Percentile (3)', 'string', 'no-op'],
                                            ['Diagnostic: Overall Relative Placement (3)', 'string', 'no-op'],
                                            ['Diagnostic: Tier (3)', 'string', 'no-op'],
                                            ['Diagnostic: Language (3)', 'string', 'no-op'],
                                            ['Diagnostic: Phonological Awareness Scale Score (3)', 'string', 'no-op'],
                                            ['Diagnostic: Phonological Awareness Placement (3)', 'string', 'no-op'],
                                            ['Diagnostic: Phonological Awareness Relative Placement (3)', 'string', 'no-op'],
                                            ['Diagnostic: Phonics Scale Score (3)', 'string', 'no-op'],
                                            ['Diagnostic: Phonics Placement (3)', 'string', 'no-op'],
                                            ['Diagnostic: Phonics Relative Placement (3)', 'string', 'no-op'],
                                            ['Diagnostic: High-Frequency Words Scale Score (3)', 'string', 'no-op'],
                                            ['Diagnostic: High-Frequency Words Placement (3)', 'string', 'no-op'],
                                            ['Diagnostic: High-Frequency Words Relative Placement (3)', 'string', 'no-op'],
                                            ['Diagnostic: Vocabulary Scale Score (3)', 'string', 'no-op'],
                                            ['Diagnostic: Vocabulary Placement (3)', 'string', 'no-op'],
                                            ['Diagnostic: Vocabulary Relative Placement (3)', 'string', 'no-op'],
                                            ['Diagnostic: Reading Comprehension: Literature Scale Score (3)', 'string', 'no-op'],
                                            ['Diagnostic: Reading Comprehension: Literature Placement (3)', 'string', 'no-op'],
                                            ['Diagnostic: Reading Comprehension: Literature Relative Placement (3)', 'string', 'no-op'],
                                            ['Diagnostic: Reading Comprehension: Informational Text Scale Score (3)', 'string', 'no-op'],
                                            ['Diagnostic: Reading Comprehension: Informational Text Placement (3)', 'string', 'no-op'],
                                            ['Diagnostic: Reading Comprehension: Informational Text Relative Placement (3)', 'string', 'no-op'],
                                            ['Diagnostic: Start Date (4)', 'string', 'no-op'],
                                            ['Diagnostic: Completion Date (4)', 'string', 'no-op'],
                                            ['Diagnostic: Time on Task (min) (4)', 'string', 'no-op'],
                                            ['Diagnostic: Rush Flag (4)', 'string', 'no-op'],
                                            ['Diagnostic: Overall Scale Score (4)', 'string', 'no-op'],
                                            ['Diagnostic: Overall Placement (4)', 'string', 'no-op'],
                                            ['Diagnostic: Percentile (4)', 'string', 'no-op'],
                                            ['Diagnostic: Overall Relative Placement (4)', 'string', 'no-op'],
                                            ['Diagnostic: Tier (4)', 'string', 'no-op'],
                                            ['Diagnostic: Language (4)', 'string', 'no-op'],
                                            ['Diagnostic: Phonological Awareness Scale Score (4)', 'string', 'no-op'],
                                            ['Diagnostic: Phonological Awareness Placement (4)', 'string', 'no-op'],
                                            ['Diagnostic: Phonological Awareness Relative Placement (4)', 'string', 'no-op'],
                                            ['Diagnostic: Phonics Scale Score (4)', 'string', 'no-op'],
                                            ['Diagnostic: Phonics Placement (4)', 'string', 'no-op'],
                                            ['Diagnostic: Phonics Relative Placement (4)', 'string', 'no-op'],
                                            ['Diagnostic: High-Frequency Words Scale Score (4)', 'string', 'no-op'],
                                            ['Diagnostic: High-Frequency Words Placement (4)', 'string', 'no-op'],
                                            ['Diagnostic: High-Frequency Words Relative Placement (4)', 'string', 'no-op'],
                                            ['Diagnostic: Vocabulary Scale Score (4)', 'string', 'no-op'],
                                            ['Diagnostic: Vocabulary Placement (4)', 'string', 'no-op'],
                                            ['Diagnostic: Vocabulary Relative Placement (4)', 'string', 'no-op'],
                                            ['Diagnostic: Reading Comprehension: Literature Scale Score (4)', 'string', 'no-op'],
                                            ['Diagnostic: Reading Comprehension: Literature Placement (4)', 'string', 'no-op'],
                                            ['Diagnostic: Reading Comprehension: Literature Relative Placement (4)', 'string', 'no-op'],
                                            ['Diagnostic: Reading Comprehension: Informational Text Scale Score (4)', 'string', 'no-op'],
                                            ['Diagnostic: Reading Comprehension: Informational Text Placement (4)', 'string', 'no-op'],
                                            ['Diagnostic: Reading Comprehension: Informational Text Relative Placement (4)', 'string', 'no-op'],
                                            ['Diagnostic: Start Date (5)', 'string', 'no-op'],
                                            ['Diagnostic: Completion Date (5)', 'string', 'no-op'],
                                            ['Diagnostic: Time on Task (min) (5)', 'string', 'no-op'],
                                            ['Diagnostic: Rush Flag (5)', 'string', 'no-op'],
                                            ['Diagnostic: Overall Scale Score (5)', 'string', 'no-op'],
                                            ['Diagnostic: Overall Placement (5)', 'string', 'no-op'],
                                            ['Diagnostic: Percentile (5)', 'string', 'no-op'],
                                            ['Diagnostic: Overall Relative Placement (5)', 'string', 'no-op'],
                                            ['Diagnostic: Tier (5)', 'string', 'no-op'],
                                            ['Diagnostic: Language (5)', 'string', 'no-op'],
                                            ['Diagnostic: Phonological Awareness Scale Score (5)', 'string', 'no-op'],
                                            ['Diagnostic: Phonological Awareness Placement (5)', 'string', 'no-op'],
                                            ['Diagnostic: Phonological Awareness Relative Placement (5)', 'string', 'no-op'],
                                            ['Diagnostic: Phonics Scale Score (5)', 'string', 'no-op'],
                                            ['Diagnostic: Phonics Placement (5)', 'string', 'no-op'],
                                            ['Diagnostic: Phonics Relative Placement (5)', 'string', 'no-op'],
                                            ['Diagnostic: High-Frequency Words Scale Score (5)', 'string', 'no-op'],
                                            ['Diagnostic: High-Frequency Words Placement (5)', 'string', 'no-op'],
                                            ['Diagnostic: High-Frequency Words Relative Placement (5)', 'string', 'no-op'],
                                            ['Diagnostic: Vocabulary Scale Score (5)', 'string', 'no-op'],
                                            ['Diagnostic: Vocabulary Placement (5)', 'string', 'no-op'],
                                            ['Diagnostic: Vocabulary Relative Placement (5)', 'string', 'no-op'],
                                            ['Diagnostic: Reading Comprehension: Literature Scale Score (5)', 'string', 'no-op'],
                                            ['Diagnostic: Reading Comprehension: Literature Placement (5)', 'string', 'no-op'],
                                            ['Diagnostic: Reading Comprehension: Literature Relative Placement (5)', 'string', 'no-op'],
                                            ['Diagnostic: Reading Comprehension: Informational Text Scale Score (5)', 'string', 'no-op'],
                                            ['Diagnostic: Reading Comprehension: Informational Text Placement (5)', 'string', 'no-op'],
                                            ['Diagnostic: Reading Comprehension: Informational Text Relative Placement (5)', 'string', 'no-op'],
                                            ['Instruction: Overall Lessons Passed', 'string', 'no-op'],
                                            ['Instruction: Overall Lessons Not Passed', 'string', 'no-op'],
                                            ['Instruction: Overall Lessons Completed', 'string', 'no-op'],
                                            ['Instruction: Overall Pass Rate (%)', 'string', 'no-op'],
                                            ['Instruction: Overall Time on Task (min)', 'string', 'no-op'],
                                            ['Instruction: Phonological Awareness Lessons Passed', 'string', 'no-op'],
                                            ['Instruction: Phonological Awareness Lessons Not Passed', 'string', 'no-op'],
                                            ['Instruction: Phonological Awareness Lessons Completed', 'string', 'no-op'],
                                            ['Instruction: Phonological Awareness Pass Rate (%)', 'string', 'no-op'],
                                            ['Instruction: Phonological Awareness Time on Task (min)', 'string', 'no-op'],
                                            ['Instruction: Phonics Lessons Passed', 'string', 'no-op'],
                                            ['Instruction: Phonics Lessons Not Passed', 'string', 'no-op'],
                                            ['Instruction: Phonics Lessons Completed', 'string', 'no-op'],
                                            ['Instruction: Phonics Pass Rate (%)', 'string', 'no-op'],
                                            ['Instruction: Phonics Time on Task (min)', 'string', 'no-op'],
                                            ['Instruction: High-Frequency Words Lessons Passed', 'string', 'no-op'],
                                            ['Instruction: High-Frequency Words Lessons Not Passed', 'string', 'no-op'],
                                            ['Instruction: High-Frequency Words Lessons Completed', 'string', 'no-op'],
                                            ['Instruction: High-Frequency Words Pass Rate (%)', 'string', 'no-op'],
                                            ['Instruction: High-Frequency Words Time on Task (min)', 'string', 'no-op'],
                                            ['Instruction: Vocabulary Lessons Passed', 'string', 'no-op'],
                                            ['Instruction: Vocabulary Lessons Not Passed', 'string', 'no-op'],
                                            ['Instruction: Vocabulary Lessons Completed', 'string', 'no-op'],
                                            ['Instruction: Vocabulary Pass Rate (%)', 'string', 'no-op'],
                                            ['Instruction: Vocabulary Time on Task (min)', 'string', 'no-op'],
                                            ['Instruction: Reading Comprehension Lessons Passed', 'string', 'no-op'],
                                            ['Instruction: Reading Comprehension Lessons Not Passed', 'string', 'no-op'],
                                            ['Instruction: Reading Comprehension Lessons Completed', 'string', 'no-op'],
                                            ['Instruction: Reading Comprehension Pass Rate (%)', 'string', 'no-op'],
                                            ['Instruction: Reading Comprehension Time on Task (min)', 'string', 'no-op'],
                                            ['Instruction: Reading Comprehension: Close Reading Lessons Passed', 'string', 'no-op'],
                                            ['Instruction: Reading Comprehension: Close Reading Lessons Not Passed', 'string', 'no-op'],
                                            ['Instruction: Reading Comprehension: Close Reading Lessons Completed', 'string', 'no-op'],
                                            ['Instruction: Reading Comprehension: Close Reading Pass Rate (%)', 'string', 'no-op'],
                                            ['Instruction: Reading Comprehension: Close Reading Time on Task (min)', 'string', 'no-op']]
        self.schemas['diagnostic_and_instruction_math_ytd_window'] = [['Last Name', 'string', 'mask'],
                                            ['First Name', 'string', 'mask'],
                                            ['Student ID', 'string', 'hash-no-lookup'],
                                            ['Enrolled', 'string', 'no-op'],
                                            ['Student Grade', 'string', 'no-op'],
                                            ['Academic Year', 'string', 'no-op'],
                                            ['School', 'string', 'no-op'],
                                            ['Subject', 'string', 'no-op'],
                                            ['User Name', 'string', 'mask'],
                                            ['Gender', 'string', 'no-op'],
                                            ['Hispanic or Latino', 'string', 'no-op'],
                                            ['Race', 'string', 'no-op'],
                                            ['English Language Learner', 'string', 'no-op'],
                                            ['Special Education', 'string', 'no-op'],
                                            ['Economically Disadvantaged', 'string', 'no-op'],
                                            ['Migrant', 'string', 'no-op'],
                                            ['Class(es)', 'string', 'no-op'],
                                            ['Class Teacher(s)', 'string', 'no-op'],
                                            ['Report Group(s)', 'string', 'no-op'],
                                            ['Number of Completed Diagnostics during the time frame', 'string', 'no-op'],
                                            ['Annual Typical Growth Measure', 'string', 'no-op'],
                                            ['Annual Stretch Growth Measure', 'string', 'no-op'],
                                            ['Diagnostic Gain (Note: negative gains=zero)', 'string', 'no-op'],
                                            ['Diagnostic: Start Date (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Completion Date (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Time on Task (min) (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Rush Flag (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Overall Scale Score (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Overall Placement (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Percentile (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Overall Relative Placement (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Tier (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Quantile Measure (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Quantile Range (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Grouping (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Language (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Number and Operations Scale Score (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Number and Operations Placement (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Number and Operations Relative Placement (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Algebra and Algebraic Thinking Scale Score (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Algebra and Algebraic Thinking Placement (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Algebra and Algebraic Thinking Relative Placement (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Measurement and Data Scale Score (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Measurement and Data Placement (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Measurement and Data Relative Placement (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Geometry Scale Score (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Geometry Placement (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Geometry Relative Placement (Most Recent)', 'string', 'no-op'],
                                            ['Diagnostic: Start Date (1)', 'string', 'no-op'],
                                            ['Diagnostic: Completion Date (1)', 'string', 'no-op'],
                                            ['Diagnostic: Time on Task (min) (1)', 'string', 'no-op'],
                                            ['Diagnostic: Rush Flag (1)', 'string', 'no-op'],
                                            ['Diagnostic: Overall Scale Score (1)', 'string', 'no-op'],
                                            ['Diagnostic: Overall Placement (1)', 'string', 'no-op'],
                                            ['Diagnostic: Percentile (1)', 'string', 'no-op'],
                                            ['Diagnostic: Overall Relative Placement (1)', 'string', 'no-op'],
                                            ['Diagnostic: Tier (1)', 'string', 'no-op'],
                                            ['Diagnostic: Language (1)', 'string', 'no-op'],
                                            ['Diagnostic: Number and Operations Scale Score (1)', 'string', 'no-op'],
                                            ['Diagnostic: Number and Operations Placement (1)', 'string', 'no-op'],
                                            ['Diagnostic: Number and Operations Relative Placement (1)', 'string', 'no-op'],
                                            ['Diagnostic: Algebra and Algebraic Thinking Scale Score (1)', 'string', 'no-op'],
                                            ['Diagnostic: Algebra and Algebraic Thinking Placement (1)', 'string', 'no-op'],
                                            ['Diagnostic: Algebra and Algebraic Thinking Relative Placement (1)', 'string', 'no-op'],
                                            ['Diagnostic: Measurement and Data Scale Score (1)', 'string', 'no-op'],
                                            ['Diagnostic: Measurement and Data Placement (1)', 'string', 'no-op'],
                                            ['Diagnostic: Measurement and Data Relative Placement (1)', 'string', 'no-op'],
                                            ['Diagnostic: Geometry Scale Score (1)', 'string', 'no-op'],
                                            ['Diagnostic: Geometry Placement (1)', 'string', 'no-op'],
                                            ['Diagnostic: Geometry Relative Placement (1)', 'string', 'no-op'],
                                            ['Diagnostic: Start Date (2)', 'string', 'no-op'],
                                            ['Diagnostic: Completion Date (2)', 'string', 'no-op'],
                                            ['Diagnostic: Time on Task (min) (2)', 'string', 'no-op'],
                                            ['Diagnostic: Rush Flag (2)', 'string', 'no-op'],
                                            ['Diagnostic: Overall Scale Score (2)', 'string', 'no-op'],
                                            ['Diagnostic: Overall Placement (2)', 'string', 'no-op'],
                                            ['Diagnostic: Percentile (2)', 'string', 'no-op'],
                                            ['Diagnostic: Overall Relative Placement (2)', 'string', 'no-op'],
                                            ['Diagnostic: Tier (2)', 'string', 'no-op'],
                                            ['Diagnostic: Language (2)', 'string', 'no-op'],
                                            ['Diagnostic: Number and Operations Scale Score (2)', 'string', 'no-op'],
                                            ['Diagnostic: Number and Operations Placement (2)', 'string', 'no-op'],
                                            ['Diagnostic: Number and Operations Relative Placement (2)', 'string', 'no-op'],
                                            ['Diagnostic: Algebra and Algebraic Thinking Scale Score (2)', 'string', 'no-op'],
                                            ['Diagnostic: Algebra and Algebraic Thinking Placement (2)', 'string', 'no-op'],
                                            ['Diagnostic: Algebra and Algebraic Thinking Relative Placement (2)', 'string', 'no-op'],
                                            ['Diagnostic: Measurement and Data Scale Score (2)', 'string', 'no-op'],
                                            ['Diagnostic: Measurement and Data Placement (2)', 'string', 'no-op'],
                                            ['Diagnostic: Measurement and Data Relative Placement (2)', 'string', 'no-op'],
                                            ['Diagnostic: Geometry Scale Score (2)', 'string', 'no-op'],
                                            ['Diagnostic: Geometry Placement (2)', 'string', 'no-op'],
                                            ['Diagnostic: Geometry Relative Placement (2)', 'string', 'no-op'],
                                            ['Diagnostic: Start Date (3)', 'string', 'no-op'],
                                            ['Diagnostic: Completion Date (3)', 'string', 'no-op'],
                                            ['Diagnostic: Time on Task (min) (3)', 'string', 'no-op'],
                                            ['Diagnostic: Rush Flag (3)', 'string', 'no-op'],
                                            ['Diagnostic: Overall Scale Score (3)', 'string', 'no-op'],
                                            ['Diagnostic: Overall Placement (3)', 'string', 'no-op'],
                                            ['Diagnostic: Percentile (3)', 'string', 'no-op'],
                                            ['Diagnostic: Overall Relative Placement (3)', 'string', 'no-op'],
                                            ['Diagnostic: Tier (3)', 'string', 'no-op'],
                                            ['Diagnostic: Language (3)', 'string', 'no-op'],
                                            ['Diagnostic: Number and Operations Scale Score (3)', 'string', 'no-op'],
                                            ['Diagnostic: Number and Operations Placement (3)', 'string', 'no-op'],
                                            ['Diagnostic: Number and Operations Relative Placement (3)', 'string', 'no-op'],
                                            ['Diagnostic: Algebra and Algebraic Thinking Scale Score (3)', 'string', 'no-op'],
                                            ['Diagnostic: Algebra and Algebraic Thinking Placement (3)', 'string', 'no-op'],
                                            ['Diagnostic: Algebra and Algebraic Thinking Relative Placement (3)', 'string', 'no-op'],
                                            ['Diagnostic: Measurement and Data Scale Score (3)', 'string', 'no-op'],
                                            ['Diagnostic: Measurement and Data Placement (3)', 'string', 'no-op'],
                                            ['Diagnostic: Measurement and Data Relative Placement (3)', 'string', 'no-op'],
                                            ['Diagnostic: Geometry Scale Score (3)', 'string', 'no-op'],
                                            ['Diagnostic: Geometry Placement (3)', 'string', 'no-op'],
                                            ['Diagnostic: Geometry Relative Placement (3)', 'string', 'no-op'],
                                            ['Diagnostic: Start Date (4)', 'string', 'no-op'],
                                            ['Diagnostic: Completion Date (4)', 'string', 'no-op'],
                                            ['Diagnostic: Time on Task (min) (4)', 'string', 'no-op'],
                                            ['Diagnostic: Rush Flag (4)', 'string', 'no-op'],
                                            ['Diagnostic: Overall Scale Score (4)', 'string', 'no-op'],
                                            ['Diagnostic: Overall Placement (4)', 'string', 'no-op'],
                                            ['Diagnostic: Percentile (4)', 'string', 'no-op'],
                                            ['Diagnostic: Overall Relative Placement (4)', 'string', 'no-op'],
                                            ['Diagnostic: Tier (4)', 'string', 'no-op'],
                                            ['Diagnostic: Language (4)', 'string', 'no-op'],
                                            ['Diagnostic: Number and Operations Scale Score (4)', 'string', 'no-op'],
                                            ['Diagnostic: Number and Operations Placement (4)', 'string', 'no-op'],
                                            ['Diagnostic: Number and Operations Relative Placement (4)', 'string', 'no-op'],
                                            ['Diagnostic: Algebra and Algebraic Thinking Scale Score (4)', 'string', 'no-op'],
                                            ['Diagnostic: Algebra and Algebraic Thinking Placement (4)', 'string', 'no-op'],
                                            ['Diagnostic: Algebra and Algebraic Thinking Relative Placement (4)', 'string', 'no-op'],
                                            ['Diagnostic: Measurement and Data Scale Score (4)', 'string', 'no-op'],
                                            ['Diagnostic: Measurement and Data Placement (4)', 'string', 'no-op'],
                                            ['Diagnostic: Measurement and Data Relative Placement (4)', 'string', 'no-op'],
                                            ['Diagnostic: Geometry Scale Score (4)', 'string', 'no-op'],
                                            ['Diagnostic: Geometry Placement (4)', 'string', 'no-op'],
                                            ['Diagnostic: Geometry Relative Placement (4)', 'string', 'no-op'],
                                            ['Diagnostic: Start Date (5)', 'string', 'no-op'],
                                            ['Diagnostic: Completion Date (5)', 'string', 'no-op'],
                                            ['Diagnostic: Time on Task (min) (5)', 'string', 'no-op'],
                                            ['Diagnostic: Rush Flag (5)', 'string', 'no-op'],
                                            ['Diagnostic: Overall Scale Score (5)', 'string', 'no-op'],
                                            ['Diagnostic: Overall Placement (5)', 'string', 'no-op'],
                                            ['Diagnostic: Percentile (5)', 'string', 'no-op'],
                                            ['Diagnostic: Overall Relative Placement (5)', 'string', 'no-op'],
                                            ['Diagnostic: Tier (5)', 'string', 'no-op'],
                                            ['Diagnostic: Language (5)', 'string', 'no-op'],
                                            ['Diagnostic: Number and Operations Scale Score (5)', 'string', 'no-op'],
                                            ['Diagnostic: Number and Operations Placement (5)', 'string', 'no-op'],
                                            ['Diagnostic: Number and Operations Relative Placement (5)', 'string', 'no-op'],
                                            ['Diagnostic: Algebra and Algebraic Thinking Scale Score (5)', 'string', 'no-op'],
                                            ['Diagnostic: Algebra and Algebraic Thinking Placement (5)', 'string', 'no-op'],
                                            ['Diagnostic: Algebra and Algebraic Thinking Relative Placement (5)', 'string', 'no-op'],
                                            ['Diagnostic: Measurement and Data Scale Score (5)', 'string', 'no-op'],
                                            ['Diagnostic: Measurement and Data Placement (5)', 'string', 'no-op'],
                                            ['Diagnostic: Measurement and Data Relative Placement (5)', 'string', 'no-op'],
                                            ['Diagnostic: Geometry Scale Score (5)', 'string', 'no-op'],
                                            ['Diagnostic: Geometry Placement (5)', 'string', 'no-op'],
                                            ['Diagnostic: Geometry Relative Placement (5)', 'string', 'no-op'],
                                            ['Instruction: Overall Lessons Passed', 'string', 'no-op'],
                                            ['Instruction: Overall Lessons Not Passed', 'string', 'no-op'],
                                            ['Instruction: Overall Lessons Completed', 'string', 'no-op'],
                                            ['Instruction: Overall Pass Rate (%)', 'string', 'no-op'],
                                            ['Instruction: Overall Time on Task (min)', 'string', 'no-op'],
                                            ['Instruction: Number and Operations Lessons Passed', 'string', 'no-op'],
                                            ['Instruction: Number and Operations Lessons Not Passed', 'string', 'no-op'],
                                            ['Instruction: Number and Operations Lessons Completed', 'string', 'no-op'],
                                            ['Instruction: Number and Operations Pass Rate (%)', 'string', 'no-op'],
                                            ['Instruction: Number and Operations Time on Task (min)', 'string', 'no-op'],
                                            ['Instruction: Algebra and Algebraic Thinking Lessons Passed', 'string', 'no-op'],
                                            ['Instruction: Algebra and Algebraic Thinking Lessons Not Passed', 'string', 'no-op'],
                                            ['Instruction: Algebra and Algebraic Thinking Lessons Completed', 'string', 'no-op'],
                                            ['Instruction: Algebra and Algebraic Thinking Pass Rate (%)', 'string', 'no-op'],
                                            ['Instruction: Algebra and Algebraic Thinking Time on Task (min)', 'string', 'no-op'],
                                            ['Instruction: Measurement and Data Lessons Passed', 'string', 'no-op'],
                                            ['Instruction: Measurement and Data Lessons Not Passed', 'string', 'no-op'],
                                            ['Instruction: Measurement and Data Lessons Completed', 'string', 'no-op'],
                                            ['Instruction: Measurement and Data Pass Rate (%)', 'string', 'no-op'],
                                            ['Instruction: Measurement and Data Time on Task (min)', 'string', 'no-op'],
                                            ['Instruction: Geometry Lessons Passed', 'string', 'no-op'],
                                            ['Instruction: Geometry Lessons Not Passed', 'string', 'no-op'],
                                            ['Instruction: Geometry Lessons Completed', 'string', 'no-op'],
                                            ['Instruction: Geometry Pass Rate (%)', 'string', 'no-op'],
                                            ['Instruction: Geometry Time on Task (min)', 'string', 'no-op']]
        self.schemas['diagnostic_results_ela'] = [['Last Name', 'string', 'mask'],
                                            ['First Name', 'string', 'mask'],
                                            ['Student ID', 'string', 'hash-no-lookup'],
                                            ['Student Grade', 'string', 'no-op'],
                                            ['Academic Year', 'string', 'no-op'],
                                            ['School', 'string', 'no-op'],
                                            ['Start Date', 'string', 'no-op'],
                                            ['Completion Date', 'string', 'no-op'],
                                            ['Diagnostic used to establish Growth Measures (Y/N)', 'string', 'no-op'],
                                            ['Most Recent Diagnostic (Y/N)', 'string', 'no-op'],
                                            ['Duration (min)', 'string', 'no-op'],
                                            ['Rush Flag', 'string', 'no-op'],
                                            ['Overall Scale Score', 'string', 'no-op'],
                                            ['Overall Placement', 'string', 'no-op'],
                                            ['Overall Relative Placement', 'string', 'no-op'],
                                            ['Percentile', 'string', 'no-op'],
                                            ['Grouping', 'string', 'no-op'],
                                            ['Lexile Measure', 'string', 'no-op'],
                                            ['Lexile Range', 'string', 'no-op'],
                                            ['Phonological Awareness Scale Score', 'string', 'no-op'],
                                            ['Phonological Awareness Placement', 'string', 'no-op'],
                                            ['Phonics Scale Score', 'string', 'no-op'],
                                            ['Phonics Placement', 'string', 'no-op'],
                                            ['High-Frequency Words Scale Score', 'string', 'no-op'],
                                            ['High-Frequency Words Placement', 'string', 'no-op'],
                                            ['Vocabulary Scale Score', 'string', 'no-op'],
                                            ['Vocabulary Placement', 'string', 'no-op'],
                                            ['Reading Comprehension: Literature Scale Score', 'string', 'no-op'],
                                            ['Reading Comprehension: Literature Placement', 'string', 'no-op'],
                                            ['Reading Comprehension: Informational Text Scale Score', 'string', 'no-op'],
                                            ['Reading Comprehension: Informational Text Placement', 'string', 'no-op'],
                                            ['Diagnostic Language', 'string', 'no-op'],
                                            ['Annual Typical Growth Measure', 'string', 'no-op'],
                                            ['Annual Stretch Growth Measure', 'string', 'no-op'],
                                            ['Mid On Grade Level Scale Score', 'string', 'no-op']]
        self.schemas['diagnostic_results_math'] = [['Last Name', 'string', 'mask'],
                                            ['First Name', 'string', 'mask'],
                                            ['Student ID', 'string', 'hash-no-lookup'],
                                            ['Student Grade', 'string', 'no-op'],
                                            ['Academic Year', 'string', 'no-op'],
                                            ['School', 'string', 'no-op'],
                                            ['Start Date', 'string', 'no-op'],
                                            ['Completion Date', 'string', 'no-op'],
                                            ['Diagnostic used to establish Growth Measures (Y/N)', 'string', 'no-op'],
                                            ['Most Recent Diagnostic (Y/N)', 'string', 'no-op'],
                                            ['Duration (min)', 'string', 'no-op'],
                                            ['Rush Flag', 'string', 'no-op'],
                                            ['Overall Scale Score', 'string', 'no-op'],
                                            ['Overall Placement', 'string', 'no-op'],
                                            ['Overall Relative Placement', 'string', 'no-op'],
                                            ['Percentile', 'string', 'no-op'],
                                            ['Grouping', 'string', 'no-op'],
                                            ['Quantile Measure', 'string', 'no-op'],
                                            ['Quantile Range', 'string', 'no-op'],
                                            ['Number and Operations Scale Score', 'string', 'no-op'],
                                            ['Number and Operations Placement', 'string', 'no-op'],
                                            ['Algebra and Algebraic Thinking Scale Score', 'string', 'no-op'],
                                            ['Algebra and Algebraic Thinking Placement', 'string', 'no-op'],
                                            ['Measurement and Data Scale Score', 'string', 'no-op'],
                                            ['Measurement and Data Placement', 'string', 'no-op'],
                                            ['Geometry Scale Score', 'string', 'no-op'],
                                            ['Geometry Placement', 'string', 'no-op'],
                                            ['Diagnostic Language', 'string', 'no-op'],
                                            ['Annual Typical Growth Measure', 'string', 'no-op'],
                                            ['Annual Stretch Growth Measure', 'string', 'no-op'],
                                            ['Mid On Grade Level Scale Score', 'string', 'no-op']]
        self.schemas['personalized_instruction_by_lesson_ela'] = [['Last Name', 'string', 'mask'],
                                            ['First Name', 'string', 'mask'],
                                            ['Student ID', 'string', 'hash-no-lookup'],
                                            ['Student Grade', 'string', 'no-op'],
                                            ['Academic Year', 'string', 'no-op'],
                                            ['School', 'string', 'no-op'],
                                            ['Subject', 'string', 'no-op'],
                                            ['Domain', 'string', 'no-op'],
                                            ['Lesson Grade', 'string', 'no-op'],
                                            ['Lesson Level', 'string', 'no-op'],
                                            ['Lesson ID', 'string', 'no-op'],
                                            ['Lesson Name', 'string', 'no-op'],
                                            ['Lesson Objective', 'string', 'no-op'],
                                            ['Completion Date', 'string', 'no-op'],
                                            ['Total Time on Lesson (min)', 'string', 'no-op'],
                                            ['Score', 'string', 'no-op'],
                                            ['Passed or Not Passed', 'string', 'no-op'],
                                            ['Teacher-Assigned Lesson', 'string', 'no-op']]
        self.schemas['personalized_instruction_by_lesson_math'] = [['Last Name', 'string', 'mask'],
                                            ['First Name', 'string', 'mask'],
                                            ['Student ID', 'string', 'hash-no-lookup'],
                                            ['Student Grade', 'integer', 'no-op'],
                                            ['Academic Year', 'integer', 'no-op'],
                                            ['School', 'string', 'no-op'],
                                            ['Subject', 'string', 'no-op'],
                                            ['Domain', 'string', 'no-op'],
                                            ['Lesson Grade', 'integer', 'no-op'],
                                            ['Lesson Level', 'integer', 'no-op'],
                                            ['Lesson ID', 'string', 'no-op'],
                                            ['Lesson Name', 'string', 'no-op'],
                                            ['Lesson Objective', 'string', 'no-op'],
                                            ['Completion Date', 'string', 'no-op'],
                                            ['Total Time on Lesson (min)', 'integer', 'no-op'],
                                            ['Score', 'integer', 'no-op'],
                                            ['Passed or Not Passed', 'string', 'no-op'],
                                            ['Teacher-Assigned Lesson', 'boolean', 'no-op']]
   
    def process_data_from_stage1(self):
        folders, files = self.oea.ls(self.stage1np)
        for entity_name in folders:  
            self._process_entity_from_stage1(entity_name)

    def copy_test_data_to_stage1(self):
        path = f"{self.module_path}/test_data"
        folders, files = self.oea.ls(path)
        for filename in files:
            entity_name = filename[:-4] # strip off .csv
            mssparkutils.fs.cp(f"{path}/{filename}", f"{self.stage1np}/{entity_name}/{filename}")

class M365(BaseOEAModule):
    """
    Provides data processing methods for MS Insights data v0.2 format.
    """

    def __init__(self, oea, source_folder='m365'):
        BaseOEAModule.__init__(self, oea, source_folder)

        self.stage1np_activity = self.stage1np + '/DIPData/Activity/ApplicationUsage'
        self.stage1np_roster = self.stage1np + '/DIPData/Roster'

        self.schemas['Activity0p2'] = [['SignalType', 'string', 'no-op'],
                                            ['StartTime', 'timestamp', 'no-op'],
                                            ['UserAgent', 'string', 'no-op'],
                                            ['SignalId', 'string', 'no-op'],
                                            ['SISClassId', 'string', 'no-op'],
                                            ['OfficeClassId', 'string', 'no-op'],
                                            ['ChannelId', 'string', 'no-op'],
                                            ['AppName', 'string', 'no-op'],
                                            ['ActorId', 'string', 'hash-no-lookup'],
                                            ['ActorRole', 'string', 'no-op'],
                                            ['SchemaVersion', 'string', 'no-op'],
                                            ['AssignmentId', 'string', 'no-op'],
                                            ['SubmissionId', 'string', 'no-op'],
                                            ['Action', 'string', 'no-op'],
                                            ['AssginmentDueDate', 'string', 'no-op'],
                                            ['ClassCreationDate', 'string', 'no-op'],
                                            ['Grade', 'string', 'no-op'],
                                            ['SourceFileExtension', 'string', 'no-op'],
                                            ['MeetingDuration', 'string', 'no-op']]
        self.schemas['Calendar'] = [['Id', 'string', 'no-op'],
                                            ['Name', 'string', 'no-op'],
                                            ['Description', 'string', 'no-op'],
                                            ['SchoolYear', 'integer', 'no-op'],
                                            ['IsCurrent', 'boolean', 'no-op'],
                                            ['ExternalId', 'string', 'no-op'],
                                            ['CreateDate', 'timestamp', 'no-op'],
                                            ['LastModifiedDate', 'timestamp', 'no-op'],
                                            ['IsActive', 'boolean', 'no-op'],
                                            ['OrgId', 'string', 'no-op']]
        self.schemas['Course'] = [['Id', 'string', 'no-op'],
                                            ['Name', 'string', 'no-op'],
                                            ['Code', 'string', 'no-op'],
                                            ['Description', 'string', 'no-op'],
                                            ['ExternalId', 'string', 'no-op'],
                                            ['CreateDate', 'timestamp', 'no-op'],
                                            ['LastModifiedDate', 'timestamp', 'no-op'],
                                            ['IsActive', 'boolean', 'no-op'],
                                            ['CalendarId', 'string', 'no-op']]
        self.schemas['Org'] = [['Id', 'string', 'no-op'],
                                            ['Name', 'string', 'no-op'],
                                            ['Identifier', 'string', 'no-op'],
                                            ['ExternalId', 'string', 'no-op'],
                                            ['CreateDate', 'timestamp', 'no-op'],
                                            ['LastModifiedDate', 'timestamp', 'no-op'],
                                            ['IsActive', 'boolean', 'no-op'],
                                            ['ParentOrgId', 'string', 'no-op'],
                                            ['RefOrgTypeId', 'string', 'no-op'],
                                            ['SourceSystemId', 'string', 'no-op']]
        self.schemas['Person'] = [['Id', 'string', 'hash'],
                                            ['FirstName', 'string', 'mask'],
                                            ['MiddleName', 'string', 'mask'],
                                            ['LastName', 'string', 'mask'],
                                            ['GenerationCode', 'string', 'no-op'],
                                            ['Prefix', 'string', 'no-op'],
                                            ['EnabledUser', 'string', 'no-op'],
                                            ['ExternalId', 'string', 'hash'],
                                            ['CreateDate', 'timestamp', 'no-op'],
                                            ['LastModifiedDate', 'timestamp', 'no-op'],
                                            ['IsActive', 'boolean', 'no-op'],
                                            ['SourceSystemId', 'string', 'no-op']]
        self.schemas['PersonIdentifier'] = [['Id', 'string', 'hash'],
                                            ['Identifier', 'string', 'hash'],
                                            ['Description', 'string', 'no-op'],
                                            ['RefIdentifierTypeId', 'string', 'no-op'],
                                            ['ExternalId', 'string', 'hash'],
                                            ['CreateDate', 'timestamp', 'no-op'],
                                            ['LastModifiedDate', 'timestamp', 'no-op'],
                                            ['IsActive', 'boolean', 'no-op'],
                                            ['PersonId', 'string', 'hash'],
                                            ['SourceSystemId', 'string', 'no-op']]
        self.schemas['RefDefinition'] = [['Id', 'string', 'no-op'],
                                            ['RefType', 'string', 'no-op'],
                                            ['Namespace', 'string', 'no-op'],
                                            ['Code', 'string', 'no-op'],
                                            ['SortOrder', 'integer', 'no-op'],
                                            ['Description', 'string', 'no-op'],
                                            ['IsActive', 'boolean', 'no-op']]
        self.schemas['Section'] = [['Id', 'string', 'no-op'],
                                            ['Name', 'string', 'no-op'],
                                            ['Code', 'string', 'no-op'],
                                            ['Location', 'string', 'no-op'],
                                            ['ExternalId', 'string', 'no-op'],
                                            ['CreateDate', 'timestamp', 'no-op'],
                                            ['LastModifiedDate', 'timestamp', 'no-op'],
                                            ['IsActive', 'boolean', 'no-op'],
                                            ['CourseId', 'string', 'no-op'],
                                            ['RefSectionTypeId', 'string', 'no-op'],
                                            ['SessionId', 'string', 'no-op'],
                                            ['OrgId', 'string', 'no-op']]
        self.schemas['Session'] = [['Id', 'string', 'no-op'],
                                            ['Name', 'string', 'no-op'],
                                            ['BeginDate', 'timestamp', 'no-op'],
                                            ['EndDate', 'timestamp', 'no-op'],
                                            ['ExternalId', 'string', 'no-op'],
                                            ['CreateDate', 'timestamp', 'no-op'],
                                            ['LastModifiedDate', 'timestamp', 'no-op'],
                                            ['IsActive', 'boolean', 'no-op'],
                                            ['CalendarId', 'string', 'no-op'],
                                            ['ParentSessionId', 'string', 'no-op'],
                                            ['RefSessionTypeId', 'string', 'no-op']]
        self.schemas['StaffOrgAffiliation'] = [['Id', 'string', 'no-op'],
                                            ['IsPrimary', 'boolean', 'no-op'],
                                            ['EntryDate', 'timestamp', 'no-op'],
                                            ['ExitDate', 'timestamp', 'no-op'],
                                            ['ExternalId', 'string', 'no-op'],
                                            ['CreateDate', 'timestamp', 'no-op'],
                                            ['LastModifiedDate', 'timestamp', 'no-op'],
                                            ['IsActive', 'boolean', 'no-op'],
                                            ['OrgId', 'string', 'no-op'],
                                            ['PersonId', 'string', 'hash'],
                                            ['RefStaffOrgRoleId', 'string', 'no-op']]
        self.schemas['StaffSectionMembership'] = [['Id', 'string', 'no-op'],
                                            ['IsPrimaryStaffForSection', 'boolean', 'no-op'],
                                            ['EntryDate', 'timestamp', 'no-op'],
                                            ['ExitDate', 'timestamp', 'no-op'],
                                            ['ExternalId', 'string', 'no-op'],
                                            ['CreateDate', 'timestamp', 'no-op'],
                                            ['LastModifiedDate', 'timestamp', 'no-op'],
                                            ['IsActive', 'boolean', 'no-op'],
                                            ['PersonId', 'string', 'hash'],
                                            ['RefStaffSectionRoleId', 'string', 'no-op'],
                                            ['SectionId', 'string', 'no-op']]
        self.schemas['StudentOrgAffiliation'] = [['Id', 'string', 'no-op'],
                                            ['IsPrimary', 'boolean', 'no-op'],
                                            ['EntryDate', 'timestamp', 'no-op'],
                                            ['ExitDate', 'timestamp', 'no-op'],
                                            ['ExternalId', 'string', 'no-op'],
                                            ['CreateDate', 'timestamp', 'no-op'],
                                            ['LastModifiedDate', 'timestamp', 'no-op'],
                                            ['IsActive', 'boolean', 'no-op'],
                                            ['OrgId', 'string', 'no-op'],
                                            ['PersonId', 'string', 'hash'],
                                            ['RefGradeLevelId', 'string', 'no-op'],
                                            ['RefStudentOrgRoleId', 'string', 'no-op'],
                                            ['RefEnrollmentStatusId', 'string', 'no-op']]
        self.schemas['StudentSectionMembership'] = [['Id', 'string', 'no-op'],
                                            ['EntryDate', 'timestamp', 'no-op'],
                                            ['ExitDate', 'timestamp', 'no-op'],
                                            ['ExternalId', 'string', 'no-op'],
                                            ['CreateDate', 'timestamp', 'no-op'],
                                            ['LastModifiedDate', 'timestamp', 'no-op'],
                                            ['IsActive', 'boolean', 'no-op'],
                                            ['PersonId', 'string', 'hash'],
                                            ['RefGradeLevelWhenCourseTakenId', 'string', 'no-op'],
                                            ['RefStudentSectionRoleId', 'string', 'no-op'],
                                            ['SectionId', 'string', 'no-op']]
    
    def process_activity(self):
        """ Processes activity data from stage1 into stage2 using structured streaming. 
            https://spark.apache.org/docs/latest/structured-streaming-programming-guide.html
        """
        self.oea.logger.info("Processing ms_insights activity data from: " + self.stage1np_activity)

        spark_schema = self.oea.to_spark_schema(self.schemas['Activity0p2'])
        df = spark.read.csv(self.stage1np_activity + '/*.csv', header='false', schema=spark_schema) 
        sqlContext.registerDataFrameAsTable(df, 'Activity')
        sqlContext.registerDataFrameAsTable(spark.read.format('parquet').load(self.oea.stage2np + '/m365/PersonIdentifier_lookup'), 'PersonIdentifier_lookup')
        sqlContext.registerDataFrameAsTable(spark.read.format('parquet').load(self.oea.stage2p + '/m365/PersonIdentifier'), 'PersonIdentifier')
        sqlContext.registerDataFrameAsTable(spark.read.format('parquet').load(self.oea.stage2p + '/m365/RefDefinition'), 'RefDefinition')

        df = spark.sql( 
            "select act.SignalType, act.StartTime, act.UserAgent, act.SignalId, act.SISClassId, act.OfficeClassId, act.ChannelId, \
            act.AppName, act.ActorId, act.ActorRole, act.SchemaVersion, act.AssignmentId, act.SubmissionId, act.Action, act.AssginmentDueDate, \
            act.ClassCreationDate, act.Grade, act.SourceFileExtension, act.MeetingDuration, pil.PersonId \
            from PersonIdentifier pi, PersonIdentifier_lookup pil, RefDefinition rd, Activity act \
            where \
                pil.Id_pseudonym = pi.Id_pseudonym \
                and pi.RefIdentifierTypeId = rd.Id \
                and rd.RefType = 'RefIdentifierType' \
                and rd.Code = 'ActiveDirectoryId' \
                and pil.Identifier = act.ActorId")

        df = df.dropDuplicates(['SignalId'])
        df = df.withColumn('year', F.year(F.col('StartTime'))).withColumn('month', F.month(F.col('StartTime')))

        self.schemas['Activity0p2'].append(['PersonId', 'string', 'hash-no-lookup'])
        self.schemas['Activity0p2'].append(['year', 'string', 'no-op'])
        self.schemas['Activity0p2'].append(['month', 'string', 'no-op'])
        df_pseudo, df_lookup = self.oea.pseudonymize(df, self.schemas['Activity0p2'])

        if len(df_pseudo.columns) > 0: 
            df_pseudo.write.format('parquet').mode('overwrite').option("mergeSchema", "true").save(self.stage2p + '/TechActivity')
        if len(df_lookup.columns) > 0: 
            df_lookup.write.format('parquet').mode('overwrite').option("mergeSchema", "true").save(self.stage2np + '/TechActivity_lookup')

    def reset_activity_processing(self):
        """ Resets all TechActivity processing. This is intended for use during initial testing - use with caution. """
        self.oea.rm_if_exists(self.stage2p + '/TechActivity')
        self.oea.rm_if_exists(self.stage2np + '/TechActivity')
        self.oea.logger.info(f"Deleted TechActivity from stage2")  

    def _process_roster_entity(self, path):
        try:
            base_path, filename = self.oea.pop_from_path(path)
            entity = filename[:-4]
            self.oea.logger.debug(f"Processing roster entity: path={path}, entity={entity}")
            spark_schema = self.oea.to_spark_schema(self.schemas[entity])
            df = spark.read.csv(path, header='false', schema=spark_schema)

            df_pseudo, df_lookup = self.oea.pseudonymize(df, self.schemas[entity])

            if len(df_pseudo.columns) > 0: 
                df_pseudo.write.format('parquet').mode('overwrite').option("mergeSchema", "true").save(self.stage2p + '/' + entity)
            if len(df_lookup.columns) > 0: 
                df_lookup.write.format('parquet').mode('overwrite').option("mergeSchema", "true").save(self.stage2np + '/' + entity + '_lookup')

        except (AnalysisException) as error:
            self.oea.logger.exception(str(error))

    def process_roster(self):
        """ Processes all roster data in stage1 and writes out to stage2 and stage2p """
        self.oea.logger.info("Processing ms_insights roster data from: " + self.stage1np)

        items = mssparkutils.fs.ls(self.stage1np_roster)
        #print(items)
        for item in items:
            if item.isFile:
                self._process_roster_entity(item.path)

    def reset_roster_processing(self):
        """ Resets all stage1 to stage2 processing of roster data. """
        # cleanup stage2np
        if self.oea.path_exists(self.stage2np):
            # Delete roster tables (everything other than TechActivity)
            items = mssparkutils.fs.ls(self.stage2np)
            #print(file.name, file.isDir, file.isFile, file.path, file.size)
            for item in items:
                if item.name != 'TechActivity':
                    mssparkutils.fs.rm(item.path, True)
        # cleanup stage2p
        if self.oea.path_exists(self.stage2p):
            # Delete roster tables (everything other than TechActivity)
            items = mssparkutils.fs.ls(self.stage2p)
            #print(file.name, file.isDir, file.isFile, file.path, file.size)
            for item in items:
                if item.name != 'TechActivity':
                    mssparkutils.fs.rm(item.path, True)    
  
