In [None]:
from pyspark.sql.functions import *
from pyspark.sql.types import *
from delta.tables import DeltaTable
from datetime import datetime

## Teaching Tables

In [None]:
# 1. Transform Departments
raw_departments = spark.table("raw_g4s_departments")

base_departments = raw_departments.select(
    concat(col("_academy_code"), col("_academic_year"), lit("-"), col("id")).alias("DepartmentId"),
    col("_academic_year").alias("DataSet"),
    col("_academy_code").alias("Academy"),
    col("id").alias("G4SDeptId"),
    col("name").alias("Name"),
    col("code").alias("Code"),
    current_timestamp().alias("TransformedAt")
)

base_departments.write.format("delta").mode("overwrite") \
    .option("overwriteSchema", "true") \
    .partitionBy("Academy", "DataSet") \
    .saveAsTable("base_departments")

print(f"✓ Transformed {base_departments.count()} departments")

In [None]:
# 2. Transform Subjects
raw_subjects = spark.table("raw_g4s_subjects")

base_subjects = raw_subjects.select(
    concat(col("_academy_code"), col("_academic_year"), lit("-"), col("id")).alias("SubjectId"),
    col("_academic_year").alias("DataSet"),
    col("_academy_code").alias("Academy"),
    col("id").alias("G4SSubjectId"),
    concat(col("_academy_code"), col("_academic_year"), lit("-"), col("department_id")).alias("DepartmentId"),
    col("name").alias("Name"),
    col("code").alias("Code"),
    current_timestamp().alias("TransformedAt")
)

base_subjects.write.format("delta").mode("overwrite") \
    .option("overwriteSchema", "true") \
    .partitionBy("Academy", "DataSet") \
    .saveAsTable("base_subjects")

print(f"✓ Transformed {base_subjects.count()} subjects")

In [None]:
# 3. Transform Groups (Classes)
raw_groups = spark.table("raw_g4s_groups")

base_groups = raw_groups.select(
    concat(col("_academy_code"), col("_academic_year"), lit("-"), col("id")).alias("GroupId"),
    col("_academic_year").alias("DataSet"),
    col("_academy_code").alias("Academy"),
    col("id").alias("G4SGroupId"),
    concat(col("_academy_code"), col("_academic_year"), lit("-"), col("subject_id")).alias("SubjectId"),
    col("name").alias("Name"),
    col("code").alias("Code"),
    col("year_group").alias("YearGroup"),
    current_timestamp().alias("TransformedAt")
)

base_groups.write.format("delta").mode("overwrite") \
    .option("overwriteSchema", "true") \
    .partitionBy("Academy", "DataSet") \
    .saveAsTable("base_groups")

print(f"✓ Transformed {base_groups.count()} groups")

In [None]:
# 4. Transform Group Students (enrollment)
raw_group_students = spark.table("raw_g4s_group_students")

base_group_students = raw_group_students.select(
    concat(
        col("_academy_code"), col("_academic_year"), lit("-"), 
        col("group_id"), lit("-"), col("student_id")
    ).alias("GroupStudentId"),
    col("_academic_year").alias("DataSet"),
    col("_academy_code").alias("Academy"),
    concat(col("_academy_code"), col("_academic_year"), lit("-"), col("group_id")).alias("GroupId"),
    concat(col("_academy_code"), col("_academic_year"), lit("-"), col("student_id")).alias("StudentId"),
    col("student_id").alias("G4SStudentId"),
    current_timestamp().alias("TransformedAt")
)

base_group_students.write.format("delta").mode("overwrite") \
    .option("overwriteSchema", "true") \
    .partitionBy("Academy", "DataSet") \
    .saveAsTable("base_group_students")

print(f"✓ Transformed {base_group_students.count()} group student enrollments")

In [None]:
# 5. Transform Teachers
raw_teachers = spark.table("raw_g4s_teachers")

base_teachers = raw_teachers.select(
    concat(col("_academy_code"), col("_academic_year"), lit("-"), col("id")).alias("TeacherId"),
    col("_academic_year").alias("DataSet"),
    col("_academy_code").alias("Academy"),
    col("id").alias("G4STeacherId"),
    concat(col("_academy_code"), col("_academic_year"), lit("-"), col("group_id")).alias("GroupId"),
    col("staff_code").alias("StaffCode"),
    col("title").alias("Title"),
    col("forename").alias("Forename"),
    col("surname").alias("Surname"),
    current_timestamp().alias("TransformedAt")
)

base_teachers.write.format("delta").mode("overwrite") \
    .option("overwriteSchema", "true") \
    .partitionBy("Academy", "DataSet") \
    .saveAsTable("base_teachers")

print(f"✓ Transformed {base_teachers.count()} teachers")

## Assessment Tables

In [None]:
# 6. Transform Markbooks
raw_markbooks = spark.table("raw_g4s_markbooks")

base_markbooks = raw_markbooks.select(
    concat(col("_academy_code"), col("_academic_year"), lit("-"), col("id")).alias("MarkbookId"),
    col("_academic_year").alias("DataSet"),
    col("_academy_code").alias("Academy"),
    col("id").alias("G4SMarkbookId"),
    concat(col("_academy_code"), col("_academic_year"), lit("-"), col("group_id")).alias("GroupId"),
    col("name").alias("Name"),
    col("markbook_template_name").alias("MarkbookTemplateName"),
    col("subject_name").alias("SubjectName"),
    current_timestamp().alias("TransformedAt")
)

base_markbooks.write.format("delta").mode("overwrite") \
    .option("overwriteSchema", "true") \
    .partitionBy("Academy", "DataSet") \
    .saveAsTable("base_markbooks")

print(f"✓ Transformed {base_markbooks.count()} markbooks")

In [None]:
# 7. Transform Marksheet Grades
raw_marksheet_grades = spark.table("raw_g4s_marksheet_grades")

base_marksheet_grades = raw_marksheet_grades.select(
    concat(
        col("_academy_code"), col("_academic_year"), lit("-"), 
        col("marksheet_id"), lit("-"), col("student_id")
    ).alias("MarksheetGradeId"),
    col("_academic_year").alias("DataSet"),
    col("_academy_code").alias("Academy"),
    concat(col("_academy_code"), col("_academic_year"), lit("-"), col("student_id")).alias("StudentId"),
    concat(col("_academy_code"), col("_academic_year"), lit("-"), col("marksheet_id")).alias("MarksheetId"),
    col("grade").alias("Grade"),
    col("grade_value").alias("GradeValue"),
    current_timestamp().alias("TransformedAt")
)

base_marksheet_grades.write.format("delta").mode("overwrite") \
    .option("overwriteSchema", "true") \
    .partitionBy("Academy", "DataSet") \
    .saveAsTable("base_marksheet_grades")

print(f"✓ Transformed {base_marksheet_grades.count()} marksheet grades")

In [None]:
# 8. Transform Markslot Marks
raw_markslot_marks = spark.table("raw_g4s_markslot_marks")

base_markslot_marks = raw_markslot_marks.select(
    concat(
        col("_academy_code"), col("_academic_year"), lit("-"), 
        col("markslot_id"), lit("-"), col("student_id")
    ).alias("MarkslotMarkId"),
    col("_academic_year").alias("DataSet"),
    col("_academy_code").alias("Academy"),
    concat(col("_academy_code"), col("_academic_year"), lit("-"), col("student_id")).alias("StudentId"),
    concat(col("_academy_code"), col("_academic_year"), lit("-"), col("markslot_id")).alias("MarkslotId"),
    col("mark").alias("Mark"),
    col("comment").alias("Comment"),
    current_timestamp().alias("TransformedAt")
)

base_markslot_marks.write.format("delta").mode("overwrite") \
    .option("overwriteSchema", "true") \
    .partitionBy("Academy", "DataSet") \
    .saveAsTable("base_markslot_marks")

print(f"✓ Transformed {base_markslot_marks.count()} markslot marks")

## Summary

In [None]:
print("\n=== Teaching & Assessment Transformation Summary ===")
print(f"Departments: {spark.table('base_departments').count()}")
print(f"Subjects: {spark.table('base_subjects').count()}")
print(f"Groups: {spark.table('base_groups').count()}")
print(f"Group Students: {spark.table('base_group_students').count()}")
print(f"Teachers: {spark.table('base_teachers').count()}")
print(f"Markbooks: {spark.table('base_markbooks').count()}")
print(f"Marksheet Grades: {spark.table('base_marksheet_grades').count()}")
print(f"Markslot Marks: {spark.table('base_markslot_marks').count()}")
print("\n✓ Transformation Complete")