#### GENERATE THE FAKE DATA SET

In [4]:
!pip install faker

Collecting faker
  Downloading faker-37.4.2-py3-none-any.whl.metadata (15 kB)
Downloading faker-37.4.2-py3-none-any.whl (1.9 MB)
   ---------------------------------------- 0.0/1.9 MB ? eta -:--:--
   ---------------------------------------- 1.9/1.9 MB 15.8 MB/s eta 0:00:00
Installing collected packages: faker
Successfully installed faker-37.4.2


In [11]:
import pandas as pd
import numpy as np 
import random 
from faker import Faker

#### STUDENT TABLE

In [14]:
fake = Faker("en_IN")

def create_student_table(total_records): 
    students_table = []

    for _ in range(total_records):
        student = {}
        student["student_id"] = fake.unique.random_int(min=1, max=500)
        student['name'] = fake.name()
        
        student['age'] = fake.random_int(min=18, max=25)
        # For Gender we have to use different conditions 
        gender = random.choice(["Male", "Female"])
        student["gender"] = gender
        
        if gender == "Male":
            first_name = fake.first_name_male()
        else:
            first_name = fake.first_name_female()
                     
        last_name = fake.last_name()

        #Format the name with first and last name
        student["name"] = f"{first_name} {last_name}"
        
        student['email'] = fake.email()
        student['phone_num'] = fake.phone_number()
        student['city'] = fake.city()

        student['course_batch'] = fake.random_element(elements=list(['AIML']))

        # Randomly choose enrollment year
        enrollment_year = fake.random_element(elements=[2024])
        student['enrollment_year'] = enrollment_year
        student['graduation_year'] = enrollment_year + 1

        students_table.append(student)

    # Convert to DataFrame
    df = pd.DataFrame(students_table)

    # Sort by student_id in ascending order
    df = df.sort_values(by='student_id').reset_index(drop=True)

    return df


In [15]:
#Generate Student data
df_students = create_student_table(500)
df_students.to_csv("Student_Table.csv", index = False)

#### PROGRAMMING TABLE

In [23]:
fake = Faker("en_IN")
language = ['Python','SQL']

def create_programming_table(total_records):
    programming_table = []
    
    for _ in range(total_records):
        program = {}
        program['programming_id'] = fake.unique.random_int(min=1000, max=2000)
        program['student_id'] =  fake.unique.random_int(min=1, max=500)
        program['language'] = ','.join(random.sample(language, 2,))
        program['problems_solved'] = random.randint(40,80)
        program['assessments_completed'] =  random.randint(2,10)
        program['mini_projects'] = random.randint(1,10)
        program['certifications_earned'] =  random.randint(1,10)
        program['latest_project_score'] =  random.randint(30,100)
        
        programming_table.append(program)
        
        
    df= pd.DataFrame(programming_table)
    df = df.sort_values(by='student_id').reset_index(drop=True)
    return df


In [24]:
#Generate Programming data for each student
df_programming = create_programming_table(500)
df_programming.to_csv("Programming_Table.csv", index = False)

#### SOFT SKILLS TABLE

In [29]:
fake = Faker("en_IN")
def create_softskills_table(total_records):
    soft_skills_table = []
    for _ in range(total_records):
        skills = {}
        skills['soft_skill_id'] = fake.unique.random_int(min=2000, max=3000)
        skills['student_id'] = fake.unique.random_int(min=1, max=500)
        skills['communication'] = random.randint(30,100)
        skills['teamwork'] = random.randint(30,100)
        skills['presentation'] = random.randint(30,100)
        skills['leadership'] = random.randint(30,100)
        skills['critical_thinking'] = random.randint(30,100)
        skills['interpersonal_skills'] = random.randint(30,100)
        
        soft_skills_table.append(skills)
        
    df = pd.DataFrame(soft_skills_table)
    df = df.sort_values(by='student_id').reset_index(drop=True)
    return df
        

In [30]:
df_softskills = create_softskills_table(500)
df_softskills.to_csv("Softskills_Table.csv", index = False)

#### PLACEMENT TABLE

In [31]:
fake = Faker("en_IN")

def create_placement_table(total_records):
    placement_table = []
    
    for _ in range(total_records):
        placement = {}
        placement['placement_id'] = fake.unique.random_int(min=3000, max=4000)
        placement['student_id'] = fake.unique.random_int(min=1, max=500)
        
        mock_score = random.randint(40, 100)
        placement['mock_interview_score'] = mock_score
        
        placement['internships_completed'] = random.randint(1, 3)
        
        # Use mock score to check status
        if mock_score >= 60:
            placement_status = "Ready"
        else:
            placement_status = "Not Ready"
        placement['placement_status'] = placement_status

        if placement_status == "Ready" and random.random() > 0.3:
            placement['company_name'] = fake.company()
            placement['placement_package'] = random.randint(50000, 300000)
            placement['interview_rounds_cleared'] = random.randint(3, 7)
            placement['placement_date'] = fake.date_this_month()
        else:
            placement['company_name'] = None
            placement['placement_package'] = 0
            placement['interview_rounds_cleared'] = 0
            placement['placement_date'] = None

        placement_table.append(placement)

    df = pd.DataFrame(placement_table)
    df = df.sort_values(by='student_id').reset_index(drop=True)
    
    return df


In [32]:
df_placement = create_placement_table(500)
df_placement.to_csv("Placement_Table.csv", index = False)