In [1]:

import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
import matplotlib.pyplot as plt
import random


In [2]:
file_path = 'student_data_generated.csv'
data = pd.read_csv(file_path)

# Remove rows with all NaN values
data.dropna(how='all', inplace=True)

# Fill missing Department values with 'Software Engineering'
data['Department'].fillna('Software Engineering', inplace=True)

# Fill missing Batch values with '23'
data['Batch'].fillna('23', inplace=True)

# Strip leading and trailing whitespaces from all string columns
data = data.applymap(lambda x: x.strip() if isinstance(x, str) else x)

print(data)



              Name   Roll No  Batch               Department
0        Student_1     Roll1     23     Software Engineering
1        Student_2     Roll2     22         Computer Science
2        Student_3     Roll3     22         Computer Science
3        Student_4     Roll4     24       Business Analytics
4        Student_5     Roll5     24         Computer Science
...            ...       ...    ...                      ...
2495  Student_2496  Roll2496     23     Software Engineering
2496  Student_2497  Roll2497     21  Artificial Intelligence
2497  Student_2498  Roll2498     22     Software Engineering
2498  Student_2499  Roll2499     21       Business Analytics
2499  Student_2500  Roll2500     22  Artificial Intelligence

[2500 rows x 4 columns]


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['Department'].fillna('Software Engineering', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['Batch'].fillna('23', inplace=True)
  data = data.applymap(lambda x: x.strip() if isinstance(x, str) else x)


In [3]:
# Convert 'Department' and 'Batch' to one-hot encoded representation
encoder = OneHotEncoder()
department_batch_encoded = encoder.fit_transform(data[['Department', 'Batch']])
department_batch_encoded_df = pd.DataFrame(department_batch_encoded.toarray(), columns=encoder.get_feature_names_out(['Department', 'Batch']))

k = 5  # Number of clusters
kmeans = KMeans(n_clusters=k, random_state=42)
clusters = kmeans.fit_predict(department_batch_encoded_df)

data['Cluster'] = clusters

for cluster_label in sorted(data['Cluster'].unique()):
    cluster_data = data[data['Cluster'] == cluster_label].head(10)
    print(f"Cluster {cluster_label + 1}:\n{cluster_data}\n")

Cluster 1:
          Name Roll No  Batch              Department  Cluster
6    Student_7   Roll7     21  Electrical Engineering        0
13  Student_14  Roll14     21    Software Engineering        0
14  Student_15  Roll15     21  Electrical Engineering        0
15  Student_16  Roll16     21      Business Analytics        0
17  Student_18  Roll18     21  Electrical Engineering        0
31  Student_32  Roll32     21  Electrical Engineering        0
34  Student_35  Roll35     21      Business Analytics        0
41  Student_42  Roll42     21    Software Engineering        0
46  Student_47  Roll47     21    Software Engineering        0
52  Student_53  Roll53     21      Business Analytics        0

Cluster 2:
          Name Roll No  Batch               Department  Cluster
0    Student_1   Roll1     23     Software Engineering        1
9   Student_10  Roll10     23   Electrical Engineering        1
11  Student_12  Roll12     23  Artificial Intelligence        1
23  Student_24  Roll24     2

In [4]:
# Generate 30 rooms with random capacities
room_capacities = {f'Room{i}': random.randint(30, 35) for i in range(1, 31)}

# Print the generated room names and capacities
for room, capacity in room_capacities.items():
    print(f"{room}: Capacity - {capacity}")

# Define the assigned days for each department
assigned_days = {
    'Computer Science': 'Monday',
    'Software Engineering': 'Tuesday',
    'Electrical Engineering': 'Wednesday',
    'Business Analytics': 'Thursday',
    'Artificial Intelligence': 'Friday'
}

# Define the department for which you want to print the schedule
selected_department = 'Computer Science'

# Generate the schedule based on assigned days
schedule = {department: random.sample(list(room_capacities.keys()), min(len(room_capacities), len(room_capacities))) for department in assigned_days.keys()}

selected_schedule = schedule[selected_department]

# Retrieve the student data for the selected department
selected_students = data[data['Department'] == selected_department]

# Assign rooms to students
room_assignments = {}
room_index = 0
for index, student in selected_students.iterrows():
    if room_index >= len(selected_schedule):
        break
    room = selected_schedule[room_index]
    if room not in room_assignments:
        room_assignments[room] = []
    room_assignments[room].append(student)

    if len(room_assignments[room]) >= room_capacities[room]:
        room_index += 1

# Prepare data for tabular form
table_data = []
for room, students in room_assignments.items():
    for student in students:
        table_data.append([
            student['Name'],
            student['Roll No'],
            selected_department,
            room,
            assigned_days[selected_department]
        ])

# Print the student information in tabular form using pandas DataFrame
df = pd.DataFrame(table_data, columns=['Name', 'Roll No', 'Department', 'Room', 'Day'])
print(f"Schedule for {selected_department} ({assigned_days[selected_department]}):")
print(df)

Room1: Capacity - 32
Room2: Capacity - 30
Room3: Capacity - 31
Room4: Capacity - 32
Room5: Capacity - 32
Room6: Capacity - 31
Room7: Capacity - 34
Room8: Capacity - 30
Room9: Capacity - 31
Room10: Capacity - 32
Room11: Capacity - 35
Room12: Capacity - 32
Room13: Capacity - 31
Room14: Capacity - 34
Room15: Capacity - 33
Room16: Capacity - 35
Room17: Capacity - 32
Room18: Capacity - 32
Room19: Capacity - 31
Room20: Capacity - 33
Room21: Capacity - 34
Room22: Capacity - 30
Room23: Capacity - 34
Room24: Capacity - 32
Room25: Capacity - 35
Room26: Capacity - 33
Room27: Capacity - 33
Room28: Capacity - 33
Room29: Capacity - 34
Room30: Capacity - 30
Schedule for Computer Science (Monday):
             Name   Roll No        Department    Room     Day
0       Student_2     Roll2  Computer Science   Room7  Monday
1       Student_3     Roll3  Computer Science   Room7  Monday
2       Student_5     Roll5  Computer Science   Room7  Monday
3       Student_8     Roll8  Computer Science   Room7  Monday

In [5]:
# Load faculty data
faculty_data = pd.read_csv('teacher_data.csv')

# Assign teachers to each room based on their department
room_teacher_assignments = {}
for room, students in room_assignments.items():
    department = students[0]['Department']  # Accessing the department of the first student in the list
    relevant_faculty = faculty_data[faculty_data['Department'] == department]
    assigned_teacher = random.choice(relevant_faculty['Name'].tolist())
    room_teacher_assignments[room] = {'Teacher': assigned_teacher, 'Department': department}

# Convert room_teacher_assignments dictionary to DataFrame
df_teacher_assignments = pd.DataFrame(room_teacher_assignments.items(), columns=['Room', 'Data'])
df_teacher_assignments[['Teacher', 'Department']] = pd.DataFrame(df_teacher_assignments['Data'].tolist(), index=df_teacher_assignments.index)
df_teacher_assignments.drop(columns=['Data'], inplace=True)

print("**********************************        Fast NUCES       *****************************************")
print("\n Exam Schedule For Final Term\n")
table_data = []
for room, students in room_assignments.items():
    # Accessing the cluster of the first student in the list
    cluster = data.loc[data['Roll No'] == students[0]['Roll No'], 'Cluster'].values[0]
    teacher = df_teacher_assignments.loc[df_teacher_assignments['Room'] == room, 'Teacher'].values[0]
    department = df_teacher_assignments.loc[df_teacher_assignments['Room'] == room, 'Department'].values[0]
    day = assigned_days[department]
    for student in students:
        table_data.append([
            student['Name'],
            student['Roll No'],
            room,
            teacher,
            department,
            day
        ])

# Print the student information in tabular form using pandas DataFrame
df = pd.DataFrame(table_data, columns=['Name', 'Roll No', 'Room', 'Teacher', 'Department', 'Day'])
print(df.head(35))


**********************************        Fast NUCES       *****************************************

 Exam Schedule For Final Term

           Name  Roll No    Room   Teacher        Department     Day
0     Student_2    Roll2   Room7  Khadijah  Computer Science  Monday
1     Student_3    Roll3   Room7  Khadijah  Computer Science  Monday
2     Student_5    Roll5   Room7  Khadijah  Computer Science  Monday
3     Student_8    Roll8   Room7  Khadijah  Computer Science  Monday
4    Student_17   Roll17   Room7  Khadijah  Computer Science  Monday
5    Student_27   Roll27   Room7  Khadijah  Computer Science  Monday
6    Student_31   Roll31   Room7  Khadijah  Computer Science  Monday
7    Student_38   Roll38   Room7  Khadijah  Computer Science  Monday
8    Student_58   Roll58   Room7  Khadijah  Computer Science  Monday
9    Student_64   Roll64   Room7  Khadijah  Computer Science  Monday
10   Student_67   Roll67   Room7  Khadijah  Computer Science  Monday
11   Student_72   Roll72   Room7  Khadi