In [10]:
import pandas as pd
import numpy as np
import random
from faker import Faker

In [11]:
fake = Faker()

In [12]:
# Parameters
num_students = 500
num_courses = 50
num_faculty = 50
departments = ['Computer Science', 'Mathematics', 'Physics', 'Chemistry', 'Biology', 'English', 'History', 'Economics']
grades = ['A', 'B', 'C', 'D', 'F']

In [13]:
# Predefined course names
course_names = {
    'Computer Science': ["Introduction to Programming", "Data Structures", "Algorithms", "Operating Systems", "Databases"],
    'Mathematics': ["Calculus I", "Calculus II", "Linear Algebra", "Differential Equations", "Probability and Statistics"],
    'Physics': ["Classical Mechanics", "Electromagnetism", "Quantum Mechanics", "Thermodynamics", "Optics"],
    'Chemistry': ["General Chemistry", "Organic Chemistry", "Inorganic Chemistry", "Physical Chemistry", "Analytical Chemistry"],
    'Biology': ["General Biology", "Genetics", "Cell Biology", "Microbiology", "Ecology"],
    'English': ["Introduction to Literature", "Creative Writing", "Shakespeare", "American Literature", "British Literature"],
    'History': ["World History", "European History", "American History", "History of Asia", "History of Africa"],
    'Economics': ["Microeconomics", "Macroeconomics", "International Economics", "Econometrics", "Development Economics"]
}

In [14]:
# Generate Student Enrollment Data
students = []
for _ in range(num_students):
    students.append({
        'student_id': fake.unique.uuid4(),
        'first_name': fake.first_name(),
        'last_name': fake.last_name(),
        'gender': fake.random_element(elements=('M', 'F')),
        'date_of_birth': fake.date_of_birth(minimum_age=18, maximum_age=25),
        'enrollment_date': fake.date_between(start_date='-4y', end_date='today'),
        'major': fake.random_element(elements=departments)
    })

In [15]:
students_df = pd.DataFrame(students)

In [16]:
# Display first few rows of dataframe to ensure correctness
students_df.head()

Unnamed: 0,student_id,first_name,last_name,gender,date_of_birth,enrollment_date,major
0,794dc800-dd06-4f37-8376-61003231ad3d,Cindy,Howard,M,2004-04-21,2024-03-24,Mathematics
1,f0bdf425-dc3d-40e8-94be-f231e7ed9707,John,Owens,F,2005-03-06,2024-05-11,Economics
2,bd1553ce-16f5-46da-b5e8-7579d353606c,Theodore,Perkins,F,1998-08-02,2022-02-01,Biology
3,55473c21-fa77-45b4-aeb7-ca815ddb7a05,Rachel,Burton,F,2005-08-22,2023-07-16,Physics
4,70b08edd-dd31-4ee8-a3cc-2d74b59556c5,Christina,Crawford,F,2001-02-12,2021-05-25,Economics


In [17]:
# Generate Courses Data
courses = []
course_counter = {dept: 0 for dept in departments}
for department in departments:
    for course_name in course_names[department]:
        courses.append({
            'course_code': f"{department[:2].upper()}{course_counter[department]:03}",
            'course_name': course_name,
            'department': department
        })
        course_counter[department] += 1

In [18]:
courses_df = pd.DataFrame(courses)

In [19]:
# Display first few rows of dataframe to ensure correctness
courses_df.head()

Unnamed: 0,course_code,course_name,department
0,CO000,Introduction to Programming,Computer Science
1,CO001,Data Structures,Computer Science
2,CO002,Algorithms,Computer Science
3,CO003,Operating Systems,Computer Science
4,CO004,Databases,Computer Science


In [20]:
# Generate Grades Data
grades_data = []
for _ in range(num_students * 5):  # Assume each student takes 5 courses on average
    grades_data.append({
        'student_id': fake.random_element(elements=students_df['student_id']),
        'course_code': fake.random_element(elements=courses_df['course_code']),
        'grade': fake.random_element(elements=grades)
    })

In [21]:
grades_df = pd.DataFrame(grades_data)

In [22]:
# Display first few rows of dataframe to ensure correctness
grades_df.head()

Unnamed: 0,student_id,course_code,grade
0,6e172d90-2930-435a-9af0-45a796cb5f00,CO000,C
1,559446f6-c57d-4745-86a3-7988303e582d,HI000,A
2,b6dc87cb-a4a9-44b0-8463-6449922dc763,CO003,D
3,b74475c1-37d5-4919-85fe-9b94aa205989,EN004,B
4,3e20d4c5-9e97-43a3-88f8-374279911ff2,CO002,F


In [23]:
# Generate Faculty Data
faculty = []
for _ in range(num_faculty):
    faculty.append({
        'faculty_id': fake.unique.uuid4(),
        'first_name': fake.first_name(),
        'last_name': fake.last_name(),
        'department': fake.random_element(elements=departments),
        'courses_taught': fake.random_element(elements=courses_df['course_code'])
    })

In [24]:
faculty_df = pd.DataFrame(faculty)

In [25]:
# Display first few rows of dataframe to ensure correctness
faculty_df.head()

Unnamed: 0,faculty_id,first_name,last_name,department,courses_taught
0,5ef53663-77c0-47c0-b86f-4113a1421149,Rhonda,Hernandez,Physics,CH000
1,53f25fe4-b65b-48d5-afdc-f5f4806dfc8f,Garrett,Chen,Physics,BI000
2,3207a363-a29d-4e14-b1ac-6e29f5798867,Amber,Harris,Physics,EN001
3,c428326b-2751-4846-88be-3356087dd897,Karen,Crosby,English,CH001
4,a8cd14bb-4a31-4604-b88e-c4561d7793f7,Daniel,Harvey,Economics,EN000


In [26]:
# Generate Departmental Budgets
budgets = []
for department in departments:
    budgets.append({
        'department': department,
        'budget': fake.random_int(min=500000, max=2000000)
    })

In [27]:
budgets_df = pd.DataFrame(budgets)

In [28]:
# Display first few rows of dataframe to ensure correctness
budgets_df.head()

Unnamed: 0,department,budget
0,Computer Science,1248426
1,Mathematics,542462
2,Physics,1278026
3,Chemistry,1973943
4,Biology,1779033


In [29]:
# Save data to CSV files
students_df.to_csv('students.csv', index=False)
courses_df.to_csv('courses.csv', index=False)
grades_df.to_csv('grades.csv', index=False)
faculty_df.to_csv('faculty.csv', index=False)
budgets_df.to_csv('budgets.csv', index=False)

In [30]:
# Checking for the csv files
students = pd.read_csv('students.csv')
students

Unnamed: 0,student_id,first_name,last_name,gender,date_of_birth,enrollment_date,major
0,794dc800-dd06-4f37-8376-61003231ad3d,Cindy,Howard,M,2004-04-21,2024-03-24,Mathematics
1,f0bdf425-dc3d-40e8-94be-f231e7ed9707,John,Owens,F,2005-03-06,2024-05-11,Economics
2,bd1553ce-16f5-46da-b5e8-7579d353606c,Theodore,Perkins,F,1998-08-02,2022-02-01,Biology
3,55473c21-fa77-45b4-aeb7-ca815ddb7a05,Rachel,Burton,F,2005-08-22,2023-07-16,Physics
4,70b08edd-dd31-4ee8-a3cc-2d74b59556c5,Christina,Crawford,F,2001-02-12,2021-05-25,Economics
...,...,...,...,...,...,...,...
495,c93fa4a4-d0b4-4436-809f-8c72e44d1412,Ruben,Fernandez,F,2000-10-04,2023-05-16,Physics
496,24d7231b-3ad4-44cf-a24a-fb92d6cada06,Brad,Miller,M,1998-09-11,2020-07-28,History
497,1f8b6444-8e63-4e80-bc25-77779e89e936,Emily,Wallace,F,2006-01-25,2024-05-02,Chemistry
498,64f3f1cb-e2f8-4ad1-8d04-6699280c3e82,Ashley,Le,M,2004-05-16,2021-04-26,English


In [31]:
courses = pd.read_csv('courses.csv')
courses

Unnamed: 0,course_code,course_name,department
0,CO000,Introduction to Programming,Computer Science
1,CO001,Data Structures,Computer Science
2,CO002,Algorithms,Computer Science
3,CO003,Operating Systems,Computer Science
4,CO004,Databases,Computer Science
5,MA000,Calculus I,Mathematics
6,MA001,Calculus II,Mathematics
7,MA002,Linear Algebra,Mathematics
8,MA003,Differential Equations,Mathematics
9,MA004,Probability and Statistics,Mathematics


In [32]:
grades = pd.read_csv('grades.csv')
grades

Unnamed: 0,student_id,course_code,grade
0,6e172d90-2930-435a-9af0-45a796cb5f00,CO000,C
1,559446f6-c57d-4745-86a3-7988303e582d,HI000,A
2,b6dc87cb-a4a9-44b0-8463-6449922dc763,CO003,D
3,b74475c1-37d5-4919-85fe-9b94aa205989,EN004,B
4,3e20d4c5-9e97-43a3-88f8-374279911ff2,CO002,F
...,...,...,...
2495,7fea9989-2b7c-4597-8a91-3187e61ed16a,CO002,D
2496,fa9439f7-9733-4228-bd97-2c4e874de35f,BI001,C
2497,67bfb7de-15bf-475c-81d7-3fe5ff129643,BI000,C
2498,f3f91aca-e430-4b15-a50f-c379dc5f5e91,BI001,D


In [33]:
faculty = pd.read_csv('faculty.csv')
faculty

Unnamed: 0,faculty_id,first_name,last_name,department,courses_taught
0,5ef53663-77c0-47c0-b86f-4113a1421149,Rhonda,Hernandez,Physics,CH000
1,53f25fe4-b65b-48d5-afdc-f5f4806dfc8f,Garrett,Chen,Physics,BI000
2,3207a363-a29d-4e14-b1ac-6e29f5798867,Amber,Harris,Physics,EN001
3,c428326b-2751-4846-88be-3356087dd897,Karen,Crosby,English,CH001
4,a8cd14bb-4a31-4604-b88e-c4561d7793f7,Daniel,Harvey,Economics,EN000
5,11f79fa5-4183-46f0-bcde-d1ab889216df,Donna,Sullivan,Economics,EN001
6,085d813e-e657-4e6b-8ad1-b812a3c90033,Malik,Kaufman,Mathematics,EC000
7,7564271a-185b-4163-8d1a-f3674ddf88c9,Scott,Jones,Chemistry,EN003
8,c51ed14d-dffb-47e8-9ec8-2868b73b600c,Sabrina,Miller,Physics,HI002
9,8862465b-5462-476a-9d23-d6c77471ae34,Kimberly,Adkins,Mathematics,PH004


In [34]:
budgets = pd.read_csv('budgets.csv')
budgets

Unnamed: 0,department,budget
0,Computer Science,1248426
1,Mathematics,542462
2,Physics,1278026
3,Chemistry,1973943
4,Biology,1779033
5,English,1198269
6,History,1992343
7,Economics,1412946
