In [1]:
import os
os.chdir('..')

os.environ["DJANGO_SETTINGS_MODULE"] = "course_management.settings"
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"
os.environ["IS_LOCAL"] = "1"

import django
django.setup()

IS_LOCAL=True
['C:\\Users\\alexe\\git\\course-management-platform\\.venv\\Lib\\site-packages\\ipykernel_launcher.py', '-f', 'C:\\Users\\alexe\\AppData\\Roaming\\jupyter\\runtime\\kernel-0da6e238-2541-47e5-b3e5-54959c9f90d1.json']
Is test: False


In [73]:
from pathlib import Path

In [2]:
from courses.models import *

In [3]:
from django.core.serializers import serialize
from django.db.models import Prefetch

In [67]:
def get_course_data_dump(course_slug):
    try:
        course = Course.objects.get(slug=course_slug)
    except Course.DoesNotExist:
        return None
    
    # Get all projects for this course
    projects = Project.objects.filter(course=course)
    
    # Get all submissions for projects in this course
    submissions = ProjectSubmission.objects.filter(
        project__course=course
    ).prefetch_related(
        'student', 
        'enrollment', 
        'project'
    )
    
    # Get all evaluation criteria for this course
    criteria = ReviewCriteria.objects.filter(course=course)
    
    # Get all peer reviews for submissions in this course
    peer_reviews = PeerReview.objects.filter(
        submission_under_evaluation__project__course=course
    ).prefetch_related(
        'submission_under_evaluation',
        'reviewer',
        'criteria_responses',
        'criteria_responses__criteria'
    )
    
    # Get all evaluation scores for this course
    evaluation_scores = ProjectEvaluationScore.objects.filter(
        submission__project__course=course
    ).prefetch_related(
        'submission',
        'review_criteria'
    )
    
    data = {
        'course': [course],
        'projects': projects,
        'submissions': submissions,
        'evaluation_criteria': criteria,
        'peer_reviews': peer_reviews,
        'evaluation_scores': evaluation_scores
    }
    
    return data

In [7]:
course_slug = 'de-zoomcamp-2025'

In [68]:
data = get_course_data_dump(course_slug)

In [69]:
print(f"Course: {data['course'][0].title}")
print(f"Number of projects: {data['projects'].count()}")
print(f"Number of submissions: {data['submissions'].count()}")
print(f"Number of criteria: {data['evaluation_criteria'].count()}")
print(f"Number of peer reviews: {data['peer_reviews'].count()}")
print(f"Number of evaluation scores: {data['evaluation_scores'].count()}")

Course: Data Engineering Zoomcamp 2025
Number of projects: 3
Number of submissions: 398
Number of criteria: 8
Number of peer reviews: 1240
Number of evaluation scores: 3184


In [81]:
dump_dir = Path('notebooks/data/dump') / course_slug
dump_dir.mkdir(parents=True, exist_ok=True)

In [82]:
for k, values in data.items():
    serialized = serialize('jsonl', values)
    target_file = dump_dir / (k + '.jsonl')

    with target_file.open('wt', encoding='utf-8') as f_out:
        f_out.write(serialized)