In [1]:
import random
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder

class FacultyData:
    def __init__(self, num_records=1000):
        self.num_records = num_records
        self.faculty_data = []
        self.df = None
        self.generate_data()

    def generate_data(self):
        designation_mapper = lambda exp: ("Professor", 1, 80000, 150000) if exp >= 20 else ("Associate Professor", 2, 50000, 80000) if exp >= 10 else ("Assistant Professor", 3, 30000, 50000)
        salary_mapper = lambda min_salary, max_salary: random.randint(min_salary, max_salary)
        patents_mapper = lambda publications: publications // 5

        for i in range(1, self.num_records + 1):
            experience = random.randint(1, 40)
            age = experience + random.randint(25, 35)
            designation, factor, min_salary, max_salary = designation_mapper(experience)
            salary = salary_mapper(min_salary, max_salary)
            publications = random.randint(1, 50)
            patents = patents_mapper(publications)
            gender = random.choice(["Male", "Female"])

            self.faculty_data.append({
                "Faculty ID": i,
                "Name": f"Faculty_{i}",
                "Experience": experience,
                "Age": age,
                "Designation": designation,
                "Designation Factor": factor,
                "Salary": salary,
                "Gender": gender,
                "Publications": publications,
                "Patents": patents,
            })

        self.df = pd.DataFrame(self.faculty_data)

    def convert_designation_factors(self):
        return {"Professor": 1, "Associate Professor": 2, "Assistant Professor": 3}

    def logistic_regression_analysis(self):
        label_encoder = LabelEncoder()
        self.df['Designation Factor Encoded'] = label_encoder.fit_transform(self.df['Designation'])


        X_exp = self.df[['Experience']]
        y_exp = self.df['Designation Factor Encoded']
        model_exp = LogisticRegression().fit(X_exp, y_exp)


        X_age = self.df[['Age']]
        model_age = LogisticRegression().fit(X_age, y_exp)


        sample_experience = [[5], [15], [25], [35]]
        sample_age = [[30], [40], [50], [60]]

        exp_predictions = model_exp.predict(sample_experience)
        age_predictions = model_age.predict(sample_age)

        print("\nPredicted Designation based on Experience:")
        for exp, pred in zip(sample_experience, exp_predictions):
            print(f"Experience {exp[0]} -> Predicted Designation Factor: {pred}")

        print("\nPredicted Designation based on Age:")
        for age, pred in zip(sample_age, age_predictions):
            print(f"Age {age[0]} -> Predicted Designation Factor: {pred}")


faculty = FacultyData()
faculty.logistic_regression_analysis()



Predicted Designation based on Experience:
Experience 5 -> Predicted Designation Factor: 0
Experience 15 -> Predicted Designation Factor: 1
Experience 25 -> Predicted Designation Factor: 2
Experience 35 -> Predicted Designation Factor: 2

Predicted Designation based on Age:
Age 30 -> Predicted Designation Factor: 0
Age 40 -> Predicted Designation Factor: 1
Age 50 -> Predicted Designation Factor: 2
Age 60 -> Predicted Designation Factor: 2


