In [18]:
import numpy as np
import pandas as pd

In [19]:
## Generate courses
def create_courses(num_rows):
    codes = []
    for i in range(num_rows):
        while True:
            code = get_course_code()
            if code not in codes:
                codes.append(code)
                break
    courses = []
    for code in codes:
        row = {}
        row["code"] = code
        row["name"] = get_course_name(code)
        row["credits"] = np.random.randint(0,120)
        courses.append(row)
    return pd.DataFrame(courses)

In [40]:
## Generate Degrees
def create_degrees(num_rows):
    codes = []
    for i in range(num_rows):
        while True:
            code = get_degree_code()
            if code not in codes:
                codes.append(code)
                break
    degrees = []
    for code in codes:
        row = {}
        row["code"] = code
        row["name"] = get_degree_name()
        row["type"] = ["UG","PG"][np.random.rand() > 0.50]
        print(row["type"])
        degrees.append(row)
    return pd.DataFrame(degrees)

In [21]:
## Generate Students
def create_students(num_rows, degrees):
    degree_uuns = degrees["code"].values
    uuns = []
    for i in range(num_rows):
        while True:
            uun = get_student_uun()
            if uun not in uuns:
                uuns.append(uun)
                break
    students = []
    for uun in uuns:
        row = {}
        row["uun"] = uun
        row["name"] = get_student_name()
        row["degree"] = degree_uuns[np.random.randint(0,len(degree_uuns))]
        students.append(row)
    return pd.DataFrame(students)

In [22]:
## Generate Exams
def create_exams(num_rows, students, courses):
    student_uuns = students["uun"].values
    course_codes = courses["code"].values
    students_courses = []
    for i in range(num_rows):
        while True:
            student = student_uuns[np.random.randint(0,len(student_uuns))]
            course = course_codes[np.random.randint(0,len(course_codes))]
            stu_cou = (student, course)
            if stu_cou not in students_courses:
                students_courses.append(stu_cou)
                break
    exams = []
    for stu_cou in students_courses:
        row = {}
        row["student"] = stu_cou[0]
        row["course"] = stu_cou[1]
        row["date"] = get_exam_date()
        row["grade"] = get_exam_grade()
        exams.append(row)
    return pd.DataFrame(exams)

In [49]:
## Generate Programmes
def create_programmes(num_rows, degrees, courses):
    degree_codes = degrees["code"].values
    course_codes = courses["code"].values
    degrees_courses = []
    for i in range(num_rows):
        while True:
            degree = degree_codes[np.random.randint(1,len(degree_codes))] # start at 1, at least one degree has no courses
            course = course_codes[np.random.randint(0,len(course_codes))]
            deg_cou = (degree, course)
            if deg_cou not in degrees_courses:
                degrees_courses.append(deg_cou)
                break
    programmes = []
    for deg_cou in degrees_courses:
        row = {}
        row["degree"] = deg_cou[0]
        row["course"] = deg_cou[1]
        programmes.append(row)
    return pd.DataFrame(programmes)

In [24]:
### AESTHETICS
def get_course_name(code): # upto 60 characters
    course_prefixes = ["Intro to","Advanced","Applied", "Numerical", "Practical", "Theoretical"]
    prefix = course_prefixes[np.random.randint(len(course_prefixes))]
    course_names = ["Databases","Stats","Probability","Algebra","Computer Systems", "Psychology","Engineering"]
    code = code[:4]
    if code in ["INF0","DATA","AI00"]:
        course_names = ["Databases","Logic","Computer Systems", "Artifical Int","Machine Learning"]
    elif code in ["MATH","MECH","ENGI","STAT","CHEM"]:
        course_names = ["Stats","Probability","Algebra","Calculus", "Engineering"]
    else:
        course_names = ["Psychology", "Criminology","Art History", "Accounting","Business"]
    name = course_names[np.random.randint(len(course_names))]
    return f"{prefix} {name} {np.random.randint(1,3)}"

deg_prefixs = ["INF","AI","DATA","MATH","STAT","MECH","ENGI","CHEM","LAW","PSYC","BUSI"]

def get_course_code():
    prefix = deg_prefixs[np.random.randint(len(deg_prefixs))].ljust(4,"0")
    return f"{prefix}{np.random.randint(1000,10000)}"


def get_degree_code():
    prefix = deg_prefixs[np.random.randint(len(deg_prefixs))].ljust(4,"0")
    return f"D{prefix}{np.random.randint(100,1000)}"

def get_degree_name(): # upto 10 characters
    prefix = deg_prefixs[np.random.randint(len(deg_prefixs))]
    return f"DEG{prefix}{np.random.randint(1,9)}" 

def get_student_uun():
    return f"s18{np.random.randint(10000,100000)}"

def get_student_name():
    firsts = ['Amanda','Bob', 'Catherine', 'David', 'Edward', 'Frank', 'Gloria', 'Helen', 'Irene', 'Jessica', 'Kevin', 'Lee', 'Michael', 'Nicole',
              'Ollie', 'Patty', 'Quentin','Russell', 'Stacy', 'Timothy', 'Ursula', 'Velma', 'William','Xander', 'Yvonne', 'Zoe']
    lasts = ['Armstrong', 'Brown', 'Clark', 'Dorn', 'Evans', 'Foreman', 'Glass', 'Hall', 'Inda', 'Jackson', 'Kelly', 'Larsen', 'Mcdonald', 'Norton',
             'Obryan', 'Parker', 'Quinn', 'Richards', 'Smith', 'Talbott', 'Underwood', 'Vogler', 'White', 'Xiong', 'Young', 'Zeigler']
    first = firsts[np.random.randint(len(firsts))]
    last = lasts[np.random.randint(len(lasts))]
    return f"{first} {last}"

def get_exam_date():
    return f"2020-12-{np.random.randint(10,20)}"

def get_exam_grade():
    grade_band = np.random.randint(0,4)
    return int([0,40,60,80][grade_band] + np.random.rand() * [40,20,20,20][grade_band])

In [51]:
np.random.seed(1)
courses = create_courses(20)
degrees = create_degrees(15)
students = create_students(250,degrees)
exams = create_exams(500,students,courses)
programmes = create_programmes(30, degrees, courses)


courses.to_csv("courses.csv", index=False,header=False)
degrees.to_csv("degrees.csv", index=False,header=False)
students.to_csv("students.csv", index=False,header=False)
exams.to_csv("exams.csv", index=False,header=False)
programmes.to_csv("programmes.csv", index=False,header=False)


UG
PG
UG
PG
UG
UG
UG
UG
UG
UG
UG
PG
PG
UG
PG
