### Summary of Work Done in This Notebook

- **Read Silver Tables:** Loaded students, courses, enrollments and results data from Silver layer tables.
- **Aggregated Student Performance:** Created a Gold table summarizing each student's total courses, marks, and performance metrics.
- **Aggregated Course Performance:** Built a Gold table with course-level statistics such as total students, average, max, and min marks.
- **Aggregated Department Performance:** Generated a Gold table with department-wise student counts and marks statistics.
- **Enrollment Statistics:** Produced a Gold table showing enrollment counts by course and status.
- **Displayed Gold Tables:** Visualized all Gold tables for review and validation.
- **Created Dashboards:** Built dashboards to present and share key Gold table metrics and visualizations.

In [0]:
from pyspark.sql import functions as F

# Define schema name for database tables
schema_name = "kusha_solutions.Jeevan"

# Read Silver tables: students, enrollments, and results
students_df    = spark.table(f"{schema_name}.silver_students")
enrollments_df = spark.table(f"{schema_name}.silver_enrollments")
results_df     = spark.table(f"{schema_name}.silver_results")

# Aggregate student performance metrics by joining results, enrollments, and students tables
student_perf = (
    results_df
    .join(enrollments_df, "enrollment_id", "inner")
    .join(students_df, "student_id", "inner")
    .groupBy("student_id", "name", "email", "department")
    .agg(
        F.count("result_id").alias("total_courses"),   # Total courses taken by student
        F.sum("marks").alias("total_marks"),           # Total marks obtained by student
        F.avg("marks").alias("avg_marks"),             # Average marks per student
        F.max("marks").alias("max_marks"),             # Maximum marks obtained by student
        F.min("marks").alias("min_marks")              # Minimum marks obtained by student
    )
)

# Write the aggregated student performance data to the Gold table
student_perf.write.format("delta").mode("overwrite").saveAsTable(f"{schema_name}.gold_student_performance")
print("✅ gold_student_performance created")

In [0]:
# Display the Gold table containing aggregated student performance metrics
display(spark.table(f"{schema_name}.gold_student_performance"))

Databricks visualization. Run in Databricks to view.

In [0]:
from pyspark.sql import functions as F

# Define schema name for database tables
schema_name = "kusha_solutions.Jeevan"

# Read Silver tables: courses, enrollments, and results
courses_df     = spark.table(f"{schema_name}.silver_courses")
enrollments_df = spark.table(f"{schema_name}.silver_enrollments")
results_df     = spark.table(f"{schema_name}.silver_results")

# Aggregate course performance metrics by joining results, enrollments, and courses tables
course_perf = (
    results_df
    .join(enrollments_df, "enrollment_id", "inner")  # Join results with enrollments on enrollment_id
    .join(courses_df, "course_id", "inner")          # Join with courses on course_id
    .groupBy("course_id", "course_name")             # Group by course_id and course_name
    .agg(
        F.count("enrollment_id").alias("total_students"),  # Total students enrolled in the course
        F.avg("marks").alias("avg_marks"),                 # Average marks for the course
        F.max("marks").alias("max_marks"),                 # Maximum marks in the course
        F.min("marks").alias("min_marks")                  # Minimum marks in the course
    )
)

# Write the aggregated course performance data to the Gold table
course_perf.write.format("delta").mode("overwrite").option("overwriteSchema", "true") \
    .saveAsTable(f"{schema_name}.gold_course_performance")

print("✅ gold_course_performance created")

In [0]:
# Display the Gold table containing aggregated course performance metrics
display(spark.table(f"{schema_name}.gold_course_performance"))

Databricks visualization. Run in Databricks to view.

In [0]:
from pyspark.sql import functions as F
schema_name = "kusha_solutions.Jeevan"

# Read Silver tables: students, enrollments, and results
students_df    = spark.table(f"{schema_name}.silver_students")
enrollments_df = spark.table(f"{schema_name}.silver_enrollments")
results_df     = spark.table(f"{schema_name}.silver_results")

# Aggregate department-level performance metrics by joining results, enrollments, and students tables
dept_perf = (
    results_df
    .join(enrollments_df, "enrollment_id", "inner")   # Join results with enrollments on enrollment_id
    .join(students_df, "student_id", "inner")         # Join with students on student_id
    .groupBy("department")                            # Group by department
    .agg(
        F.countDistinct("student_id").alias("total_students"),  # Total unique students in each department
        F.avg("marks").alias("avg_marks"),                     # Average marks in each department
        F.max("marks").alias("max_marks"),                     # Maximum marks in each department
        F.min("marks").alias("min_marks")                      # Minimum marks in each department
    )
)

# Write the aggregated department performance data to the Gold table
dept_perf.write.format("delta").mode("overwrite").option("overwriteSchema", "true") \
    .saveAsTable(f"{schema_name}.gold_department_performance")

print("✅ gold_department_performance created")

In [0]:
# Display the Gold table containing aggregated department performance metrics
display(spark.table(f"{schema_name}.gold_department_performance"))

Databricks visualization. Run in Databricks to view.

In [0]:
from pyspark.sql import functions as F
schema_name = "kusha_solutions.Jeevan"

# Read the Silver enrollments table from the specified schema
enrollments_df = spark.table(f"{schema_name}.silver_enrollments")

# Aggregate enrollment statistics by course and status
enroll_stats = (
    enrollments_df
    .groupBy("course_id", "status")
    .agg(F.count("enrollment_id").alias("total_enrollments"))
)

# Write the aggregated enrollment statistics to the Gold table
enroll_stats.write.format("delta").mode("overwrite").option("overwriteSchema", "true") \
    .saveAsTable(f"{schema_name}.gold_enrollment_stats")

print("✅ gold_enrollment_stats created")

In [0]:
# Display the Gold table containing aggregated enrollment statistics by course and status
display(spark.table(f"{schema_name}.gold_enrollment_stats"))

Databricks visualization. Run in Databricks to view.