# ContosoISD Example
This example demonstrates how to use the OEA framework and modules to process incoming data, perform data prep, and view the data in an example Power BI dashboard.

# Running the example
1) Click on "Publish" in the top nav bar (to ensure all notebooks have been published)

2) Select your spark pool in the "Attach to" dropdown list above.

3) Click on "Run all" at the top of this tab.

4) Open the dashboard in Power BI desktop and point it to your newly setup data lake (you can download the pbix from here: [techInequityDashboardContoso v2.pbix](https://github.com/microsoft/OpenEduAnalytics/blob/main/packages/ContosoISD/powerbi/techInequityDashboardContoso%20v2.pbix) )

# More info
See [OEA Solution Guide](https://github.com/microsoft/OpenEduAnalytics/blob/main/docs/OpenEduAnalyticsSolutionGuide.pdf) for more details on this example.

In [3]:
%run /OEA_py

In [4]:
%run /M365_py

In [None]:
%run /ContosoSIS_py

In [5]:
# 0) Initialize the OEA framework and modules needed.
oea = OEA()
m365 = M365(oea)
contoso_sis = ContosoSIS(oea, 'contoso_sis', False)

In [None]:
# 1) Land data into stage1 of your data lake, from multiple source systems (this example copies in test data sets that came with the OEA installation).
contoso_sis.copy_test_data_to_stage1()
m365.copy_test_data_to_stage1()

In [None]:
# 2) Process the raw data (csv format) from stage1 into stage2 (adds schema details and writes out in parquet format).
#    [Note: we're not performing pseudonymization in this example, so everything is written to container stage2np.]
m365.process_roster_data_from_stage1()
contoso_sis.process_data_from_stage1()
m365.process_activity_data_from_stage1()

In [None]:
# 3) Run additional prep on the data to create a unified dataset that can be used in a Power BI report

# Process sectionmark data. Convert id values to use the Person.Id and Section.Id values set in the m365 data.
sqlContext.registerDataFrameAsTable(spark.read.format('parquet').load(oea.stage2np + '/contoso_sis/studentsectionmark'), 'SectionMark')
sqlContext.registerDataFrameAsTable(spark.read.format('parquet').load(oea.stage2np + '/m365/Person'), 'Person')
sqlContext.registerDataFrameAsTable(spark.read.format('parquet').load(oea.stage2np + '/m365/Section'), 'Section')
df = spark.sql("select sm.id Id, p.Id PersonId, s.Id SectionId, cast(sm.numeric_grade_earned as int) NumericGrade, \
sm.alpha_grade_earned AlphaGrade, sm.is_final_grade IsFinalGrade, cast(sm.credits_attempted as int) CreditsAttempted, cast(sm.credits_earned as int) CreditsEarned, \
sm.grad_credit_type GraduationCreditType, sm.id ExternalId, CURRENT_TIMESTAMP CreateDate, CURRENT_TIMESTAMP LastModifiedDate, true IsActive \
from SectionMark sm, Person p, Section s \
where sm.student_id = p.ExternalId \
and sm.section_id = s.ExternalId")
df.write.format('parquet').mode('overwrite').save(oea.stage2np + '/ContosoISD/SectionMark')

# Repeat the above process, this time for student attendance
# Convert id values to use the Person.Id, Org.Id and Section.Id values
sqlContext.registerDataFrameAsTable(spark.read.format('parquet').load(oea.stage2np + '/contoso_sis/studentattendance'), 'Attendance')
sqlContext.registerDataFrameAsTable(spark.read.format('parquet').load(oea.stage2np + '/m365/Org'), 'Org')
df = spark.sql("select att.id Id, p.Id PersonId, att.school_year SchoolYear, o.Id OrgId, to_date(att.attendance_date,'MM/dd/yyyy') AttendanceDate, \
att.all_day AllDay, att.Period Period, s.Id SectionId, att.AttendanceCode AttendanceCode, att.PresenceFlag PresenceFlag, \
att.attendance_status AttendanceStatus, att.attendance_type AttendanceType, att.attendance_sequence AttendanceSequence \
from Attendance att, Org o, Person p, Section s \
where att.student_id = p.ExternalId \
and att.school_id = o.ExternalId \
and att.section_id = s.ExternalId")
df.write.format('parquet').mode('overwrite').save(oea.stage2np +'/ContosoISD/Attendance')

# Add 'Department' column to Course (hardcoded to "Math" for this Contoso example)
sqlContext.registerDataFrameAsTable(spark.read.format('parquet').load(oea.stage2np + '/m365/Course'), 'Course')
df = spark.sql("select Id, Name, Code, Description, ExternalId, CreateDate, LastModifiedDate, IsActive, CalendarId, 'Math' Department from Course")
df.write.format('parquet').mode('overwrite').save(oea.stage2np + '/ContosoISD/Course')

In [None]:
# 4) Create spark db's that point to the data in the data lake to allow for connecting via Power BI through use of the Serverless SQL endpoint.
contoso_sis.create_stage2_db('PARQUET')
m365.create_stage2_db('PARQUET')

spark.sql('CREATE DATABASE IF NOT EXISTS s2_ContosoISD')
spark.sql(f"create table if not exists s2_ContosoISD.Activity using PARQUET location '" + oea.stage2np + "/m365/TechActivity'")
spark.sql(f"create table if not exists s2_ContosoISD.Calendar using PARQUET location '" + oea.stage2np + "/m365/Calendar'")
spark.sql(f"create table if not exists s2_ContosoISD.Org using PARQUET location '" + oea.stage2np + "/m365/Org'")
spark.sql(f"create table if not exists s2_ContosoISD.Person using PARQUET location '" + oea.stage2np + "/m365/Person'")
spark.sql(f"create table if not exists s2_ContosoISD.PersonIdentifier using PARQUET location '" + oea.stage2np + "/m365/PersonIdentifier'")
spark.sql(f"create table if not exists s2_ContosoISD.RefDefinition using PARQUET location '" + oea.stage2np + "/m365/RefDefinition'")
spark.sql(f"create table if not exists s2_ContosoISD.Section using PARQUET location '" + oea.stage2np + "/m365/Section'")
spark.sql(f"create table if not exists s2_ContosoISD.Session using PARQUET location '" + oea.stage2np + "/m365/Session'")
spark.sql(f"create table if not exists s2_ContosoISD.StaffOrgAffiliation using PARQUET location '" + oea.stage2np + "/m365/StaffOrgAffiliation'")
spark.sql(f"create table if not exists s2_ContosoISD.StaffSectionMembership using PARQUET location '" + oea.stage2np + "/m365/StaffSectionMembership'")
spark.sql(f"create table if not exists s2_ContosoISD.StudentOrgAffiliation using PARQUET location '" + oea.stage2np + "/m365/StudentOrgAffiliation'")
spark.sql(f"create table if not exists s2_ContosoISD.StudentSectionMembership using PARQUET location '" + oea.stage2np + "/m365/StudentSectionMembership'")
spark.sql(f"create table if not exists s2_ContosoISD.Course using PARQUET location '" + oea.stage2np + "/ContosoISD/Course'")
spark.sql(f"create table if not exists s2_ContosoISD.Attendance using PARQUET location '" + oea.stage2np + "/ContosoISD/Attendance'")
spark.sql(f"create table if not exists s2_ContosoISD.SectionMark using PARQUET location '" + oea.stage2np + "/ContosoISD/SectionMark'")

print(f"Created spark db's.\nYou can now open the 'techInequityDashboardContoso v2.pbix' dashboard and change the datasource to point to: {oea.serverless_sql_endpoint}")

# Reset everything
You can uncomment line 11 in the cell below and run the cell to reset everything and walk through the process again from the top.

Note: remember to comment out line 11 again to prevent accidental resetting of the example

In [6]:
def reset_all_processing():
    contoso_sis.delete_all_stages()
    m365.delete_all_stages()
    oea.rm_if_exists(oea.stage2np + '/ContosoISD')

    oea.drop_db('s2_contoso_sis')
    oea.drop_db('s2_contosoisd')
    oea.drop_db('s2_m365')

# Uncomment the following line and run this cell to reset everything if you want to walk through the process again.
reset_all_processing()