In [8]:
import hashlib
import numpy as np
import pandas as pd


class StudentDataset:
    def __init__(self,num_students:int,username:str):
        
        # Generate seed for random numbers generater
        seed = int(hashlib.sha256(username.encode()).hexdigest(), 16) % (2**32)
        self.rng = np.random.default_rng(seed)
        self.num_students = num_students

        # Generate dataset once at initialization
        self.full_df = self.assemble_dataframe()

    def _add_watermark(self):
        plt.text(
            0.95 , 0.95 , self.username,
            ha = 'right', va = 'top' ,
            transform = plt.gca().transAxes ,
            fontsize =10 , color = 'gray' , alpha =0.7
        )
    

    def gender_generater(self) -> list[str]:

        # Generates random genders 

        return self.rng.choice(
            ["Male", "Female", "Other"],
            size=self.num_students,
            p=[0.65, 0.33, 0.02]
        )

    def generate_major(self) -> list[str]:

        # Generates Major for students

        return self.rng.choice(
            ["B.tech","MS","Phd"],
            size=self.num_students,
            p=[0.70,0.20,0.10]
        )

    def generate_program_with_major(self,majors : list [ str ]) -> list[str]:

        # Generate Program with major
        prog = []
        for major in majors:

            if major == "B.tech" :
                prog.append(self.rng.choice(["CSE","ECE","CHD","CND"],p=[0.40,0.40,0.10,0.10]))

            elif major == "MS" :
                prog.append(self.rng.choice(["CSE","ECE","CHD","CND"],p=[0.30,0.30,0.20,0.20]))

            else:
                prog.append(self.rng.choice(["CSE","ECE","CHD","CND"],p=[0.25,0.25,0.25,0.25]))
                
        return prog        

    def generate_gpa(self,majors : list [ str ]) -> list[str]:

        # Generate GPA with Major as parameter 

        gpa = []

        for major in majors:

            if major == "B.tech" :
                gpa.append(np.clip(self.rng.normal(7.0,1.0),4.0,10.0))

            elif major == "MS" :
                gpa.append(np.clip(self.rng.normal(8.0,0.7),4.0,10.0)) 

            else:
                gpa.append(np.clip(self.rng.normal(8.3,0.5),4.0,10.0))

        return gpa        

    def assemble_dataframe(self) -> pd.DataFrame:

        # Assemble all data into dataframe 

        majors = self.generate_major()
        df = pd.DataFrame({
            "Gender" : self.gender_generater(),
            "Major"  : majors,
            "Program": self.generate_program_with_major(majors),
            "GPA"    : self.generate_gpa(majors)
        })
        
        return df

    def get_full_dataframe(self) -> pd.DataFrame:

        return self.full_df    


In [9]:
dataset = StudentDataset(1000,"shriansh.sahu")
print(dataset.get_full_dataframe())

     Gender   Major Program       GPA
0      Male  B.tech     ECE  6.128834
1     Other  B.tech     CSE  6.988293
2    Female  B.tech     CSE  7.675095
3      Male     Phd     CSE  7.709525
4    Female  B.tech     CHD  8.663156
..      ...     ...     ...       ...
995  Female      MS     CSE  8.482928
996  Female  B.tech     CSE  6.530712
997    Male  B.tech     CSE  6.571792
998    Male  B.tech     ECE  6.682845
999    Male  B.tech     CHD  8.975075

[1000 rows x 4 columns]
