#### Introduction

This is a mini project for SC1003 Introduction to Computation Thinking and Programming in NTU. We are from FCSD-Group 4 consisting of Yan Jie, Samuel, Timothy, Keith and Run Ze.

#### Algorithmic Thinking
The program is a software that aims to handle large csv files containing student details of different tutorial groups and sort them accordingly into even and distributed teams based on their, in order of significance:
1. Average CGPA
2. Faculty
3. Gender

#### Flowchart

#### Libraries and Dependencies

In [360]:
# Import visualization libraries (Graphing and Analysis only)
import plotly.graph_objects as go

# Random library to generate random integers (Mainly used for sorting)
import random

# Copy library to allow deepcopy (Stops internal pointer references in dictionaries)
import copy

#### Basic File Management and Sorting Functions

In [361]:
class Student:
    def __init__(self, tgrp, id, faculty, name, gender, cgpa):
        self.tutorialGrp = int(tgrp)
        self.studID = id
        self.faculty = faculty
        self.name = name
        self.gender = gender
        self.cgpa = float(cgpa)
    
    def printStudent(self):
        print(f'{self.tutorialGrp}, {self.studID}, {self.faculty}, {self.name}, {self.gender}, {self.cgpa}, {self.team}')

In [362]:
# Initialize data
studentList = []
tgrpsTeamsList = {}

with open('records.csv') as records:
    records.__next__() # Skip the header row
    lines = records.read().strip().split('\n')
    
    for line in lines:
        line = line.split(',')
        line[0] = line[0][2:]
        
        student = Student(line[0], line[1], line[2], line[3], line[4], line[5])
        studentList.append(student)

In [363]:
# Sort dataList function by tutorial group
def sortData(data):
    sortedData = sorted(data, key=lambda Student: Student.tutorialGrp)
    return sortedData

# Sort dataList by tutorial group and CGPA
def sortCGPA(data):    
    sortedData = sorted(data, key=lambda Student: (Student.tutorialGrp, Student.cgpa))
    return sortedData

# A function that counts and returns the total number of unique tutorial groups
def getNumOfTgrps(studentList):
    tgrpList = []
    
    for student in studentList:
        if student.tutorialGrp not in tgrpList:
            tgrpList.append(student.tutorialGrp)
    
    return len(tgrpList)

# A function solely used for debugging
def printTeam(team):
    for student in team:
        print(f'{student.name} and {student.cgpa}\n')

#### Data Analysis with Graphs from Plotly

In [364]:
# Graph Visualization functions using Plotly
#TODO: Exception Handling
def showCGPAPerGroup():
    cgpa = []
    tgrp = []
    for student in sortCGPA(studentList):
        cgpa.append(student.cgpa)
        tgrp.append(student.tutorialGrp)
        # print(str(student[0]) + ' ' + str(student[5])) # Use for checking sorting

    # Make it into a readable box plot using the python arrays above
    boxTrace = go.Box(
        x = tgrp,
        y = cgpa,
        name = 'CGPA / Tutorial Group'
    )

    # Initialize the graph object with go.Figure()
    fig = go.Figure(boxTrace)
    fig.update_layout(
        title="CGPA / Tutorial Group",
        xaxis_title="Tutorial Group",
        yaxis_title="CGPA") # Name the title of the graph
    fig.show() # Show the graph
    
# showCGPAPerGroup()

In [365]:
#TODO: Exception Handling
def showStudentsPerGroup():
    # Graph on No. of Students Per Tutorial Group
    numPerGrp = {}
    studList = sortData(studentList)

    for student in studList:
        if student.tutorialGrp not in numPerGrp.keys():
            numPerGrp[student.tutorialGrp] = 0

    for updateStudent in studList:
        numPerGrp[updateStudent.tutorialGrp] += 1

    barTrace = go.Bar(
        x = list(numPerGrp.keys()),
        y = list(numPerGrp.values()),
        name = 'Number of Students / Tutorial Group'
    )

    # Initialize the graph object with go.Figure()
    fig = go.Figure(barTrace)
    fig.update_layout(
        title="No. of Students / Tutorial Group",
        xaxis_title="Tutorial Group",
        yaxis_title="No. Of Students") # Name the title of the graph
    fig.show() # Show the graph

# showStudentsPerGroup()

In [366]:
#TODO: Exception Handling
def showFacultyPerGroup(tutorialGrpNum):
    # Graph on Faculty Distribution per Tutorial Group
    facPerGrp = {}
    numPerGrp = {}
    studList = sortData(studentList)

    # Get the number of students per tutorial group
    for student in studList:
        if student.tutorialGrp not in numPerGrp.keys():
            numPerGrp[student.tutorialGrp] = 0
                
        if student.faculty not in facPerGrp.keys():
            facPerGrp[student.faculty] = 0

    for updateStudent in studList:
        numPerGrp[updateStudent.tutorialGrp] += 1

    # Input the tutorial group number to show the distribution for that group
    tgrpToShow = tutorialGrpNum

    # Get the number of students per faculty in a tutorial group
    for i in range(numPerGrp[tgrpToShow] * tgrpToShow - 50,
                numPerGrp[tgrpToShow] * tgrpToShow):
        facPerGrp[studList[i].faculty] += 1 # incremement the facPerGrp dict by referencing the indexed student and their faculty

    barTrace1 = go.Bar(
            x = list(facPerGrp.keys()),
            y = list(facPerGrp.values()),
            name = 'No. of Students / Faculty'
        )

    # Initialize the graph object with go.Figure()
    fig = go.Figure(barTrace1)
    fig.update_layout(
        title="No. of Students / Faculty",
        xaxis_title="Faculty",
        yaxis_title="No. Of Students") # Name the title of the graph
    fig.show() # Show the graph
    
# showFacultyPerGroup(1)

In [367]:
#TODO: Exception Handling
def showGenderPerGroup():
    # Graph on Gender Count per Tutorial Group 
    tgrp = []
    malePerGrp = {}
    femalePerGrp = {}
    studList = sortData(studentList)

    for student in studList:
        if student.tutorialGrp not in tgrp:
            tgrp.append(student.tutorialGrp)
            malePerGrp[student.tutorialGrp] = 0
            femalePerGrp[student.tutorialGrp] = 0
        
    for updateNum in studList:
        if updateNum.gender == 'Male':
            malePerGrp[updateNum.tutorialGrp] += 1
        if updateNum.gender == 'Female':
            femalePerGrp[updateNum.tutorialGrp] += 1

    maleTrace = go.Bar(
        x = tgrp,
        y = list(malePerGrp.values()),
        hovertext='(Tutorial Group, No. Of Males)',
        name = 'Male')

    femaleTrace = go.Bar(
        x = tgrp,
        y = list(femalePerGrp.values()),
        hovertext = '(Tutorial Group, No. Of Females)',
        name = 'Female')

    # Initialize the graph object with go.Figure()
    fig = go.Figure()
    fig.add_trace(maleTrace)
    fig.add_trace(femaleTrace)
    fig.update_layout(
        title = "Gender / Tutorial Group", # Name the title of the graph
        xaxis_title = "Tutorial Group",
        yaxis_title = "No. Of Students")
    fig.show() # Show the graph

# showGenderPerGroup()

#### First Algorithm

First we fix the teams based on an ideal gender ratio in each team

In [368]:
# A function to retrieve and return the sorted specified Tutorial Group by gpa
def retrieveTutorialGroup(studentList, groupNum):
    studList = sortCGPA(studentList)
    tgrpList = []
    
    for student in studList:
        if student.tutorialGrp == groupNum:
            tgrpList.append(student)
    
    return tgrpList

# Counts how many boys and girls there are in a tutorial group
def genderCount(tutorialGroup):
    boyCount = 0
    girlCount = 0

    for student in tutorialGroup:
        if student.gender == 'Male':
            boyCount += 1
        else:
            girlCount += 1
            
    return boyCount, girlCount

# Make it fluid . 1 b 1 g always. the spare fill them up. if more g, fill g, if more b, fill b. if run out, fill in remaining.
# Calculate no. of teams formed based on no. of ppl per team
# % gives remainder. add extra grp is thrs one. else ignore
def assignTeamsByGender(teamSize, studentList, tutorialGroupNum):
    tgrp = retrieveTutorialGroup(studentList, tutorialGroupNum)
    numm, numf = genderCount(retrieveTutorialGroup(studentList, 1))
    teamList = []
    
    totalNumStudents = numm + numf
    split = teamSize // 2 #determine max even no. of girls and boys in each grp. If max grp size is 5, thr will be 2 girls and 2 boys
    leftovers = totalNumStudents % teamSize #create a list if thrs leftovers
    numOfTeams = totalNumStudents // teamSize #create a list based on num team
    
    # Generate the calculated number of empty groups in a tutorial group based on requirements
    for _ in range (numOfTeams): 
        teamList.append([])

    # Separate the male and females into two seperate lists
    maleList = []
    femaleList = []
    for st in tgrp:
        if st.gender == 'Female':
            femaleList.append(st)
        elif st.gender == 'Male':
            maleList.append(st)

    # Start initializing the max even number of females and males per team first
    maxmale=0 # store max count
    maxfemale=0 # store max count
    icount=0 # add students sequentially
    
    for nTeam in range(len(teamList)):
        for _ in range(split): # add students according to split
            if len(maleList) == 0 and len(femaleList) == 0: # Error Handling
                print("No students found for both male/female list")
                break
            elif icount >= len(maleList) and icount >= len(femaleList):
                # if both hit, break. No one to add alr since both exceed list limit
                break
            elif icount >= len(maleList): #if male hit limit, # add female last iter
                maxfemale = icount #record down last iteration after adding last iter
                break
            elif icount >= len(femaleList): #if female hit limit, # add male last iter
                maxmale = icount #record down last iteration after adding last iter
                break
            else:
                # continue append if no issue, append both as we want to fulfill the even max
                teamList[nTeam].append(maleList[icount])
                teamList[nTeam].append(femaleList[icount])
                icount += 1
                
                if teamList[nTeam] == teamList[-1]: #if last grp and all grps filled up nice nice with no issue,
                    maxmale = icount #record down last iteration but doesnt add last iter
                    maxfemale = icount #record down last iteration but doesnt add last iter

    # check which team has hit the limit
    if maxfemale != 0 and maxmale != 0: #if both genders didnt hit limit
        for team in teamList:
            while len(team) != teamSize: #if grp full, next grp.
                if maxfemale < len(femaleList): #if doesnt exceed female. add female first
                    team.append(femaleList[maxfemale])
                    maxfemale += 1
                elif maxmale < len(maleList): #if female no more, add male but check if exceed malelist
                    team.append(maleList[maxmale]) 
                    maxmale += 1
                else:
                    print('No one else to add')
                    break
        
        # Check if there are any leftover students, if true generate the last team for the leftover students
        if leftovers != 0: 
            teamList.append([]) 
            while maxfemale < len(femaleList): #add remaining females/males to last grp
                teamList[-1].append(femaleList[maxfemale]) 
                maxfemale += 1
                
            while maxmale < len(maleList):
                teamList[-1].append(maleList[maxmale]) 
                maxmale += 1 

    elif maxfemale != 0: #if male hit limit, add from female list until full
        for grp in teamList:
            while len(grp) != teamSize: #if grp full, next grp
                if maxfemale < len(femaleList):
                    grp.append(femaleList[maxfemale])
                    maxfemale+=1
                else:
                    print('No one else to add')
                    break

        if leftovers !=0: #generate last grp for leftovers
            teamList.append([]) 
            while maxfemale < len(femaleList): #add remaining females to last grp
                teamList[-1].append(femaleList[maxfemale]) 
                maxfemale+=1

    elif maxmale !=0: #else if female hit limit, add from male list until full
        for grp in teamList:
            while len(grp) != teamSize: #if grp full, next grp
                if maxmale < len(maleList):
                    grp.append(maleList[maxmale])
                    maxmale+=1
                else:
                    print('No one else to add')
                    break

        if leftovers !=0: # generate last grp for leftovers
            teamList.append([]) 
            while maxmale < len(maleList): # add remaining males to last grp
                teamList[-1].append(maleList[maxmale]) 
                maxmale+=1
    else:
        print("No student found for both male/female list")
    
    return teamList

#### Second Algorithm - Sort By CGPA

In [369]:
# Check the current team average cgpa by summing each student's cgpa then dividing by the size of the team
def calcAverageGPA(team):
    return sum(student.cgpa for student in team) / len(team)

# We find the difference between the two teams, we use abs() to ensure a positive number difference
def calcAvgGPADifference(teamOne, teamTwo):
    return abs(calcAverageGPA(teamOne) - calcAverageGPA(teamTwo))

# TODO: Create teams purely by CGPA
def assignTeamsByCGPA():
    pass

#### Third Algorithm - Sort By Faculty

In [370]:
# TODO: create teams purely by Faculty (Use Keith's code)
def assignTeamsByFaculty():
    pass

#### Distribution Reduction Algorithm - Conditional Swapping

In [371]:
# ------ CGPA Distribution - 0 ------

# ------ Gender Distribution - 1 ------

# ------ Faculty Distribution - 2 ------

# The function reduces the faculty distribution between each team ensuring that they are well balanced
# There should not be a team that contains a dominant faculty e.g., A group of 5 having 4 CCDS students and 1 NBS student

# However, in cases where number of students in teams equals 2 or 3 students:
# 2 unique faculties in such teams are still justified as balanced e.g., 2 CCDS and 1 NBS / 1 CCCDS and 1 NBS

# For team sizes with 4 students or more, minimally we want 3 faculties minimum for a balanced distribution
# E.g., 2 CCDS, 1 NBS and 1 SoH / 1 CCDS, 1 NBS, 1 SoH, 1 EEE

# Assume that we are creating teams of 4 students and more
# We make use of python sets to define "faculty uniqueness" within each team, if the len(set) < 3 we will attempt to reorganize the teams by swapping students around

# ------ Distribution Code ------

# A function to attempt to reduce the distrubtion in the teams between the largest average CGPA and lowest average CGPA
# We do a random swap to attempt to reduce the distribution
def reduceTeamDist(teams, sortCondition, randAttempts = 1000):
    tempTeams = copy.deepcopy(teams)
    
    # Attempt a fixed number of random swaps
    currentAttempts = 0
    while currentAttempts < randAttempts: # Attempt the random swap at n attempts
        firstIndex = random.randint(0, len(teams) - 1)
        secIndex = random.randint(0, len(teams) - 1)
        
        # Ensure that both the generated integer indexes are not the same
        while firstIndex == secIndex:
            secIndex = random.randint(0, len(teams) - 1)
        
        # Reference the two randomly selected teams for comparison and swapping
        teamOne = tempTeams[firstIndex]
        teamTwo = tempTeams[secIndex]
        
        # Save the current best average CGPA difference
        bestDifference = calcAvgGPADifference(teamOne, teamTwo)
        
        # We first loop through the first team
        for st1 in tempTeams[firstIndex]:
            # We then index the current student in the first team
            st1Index = tempTeams[firstIndex].index(st1)
            
            # We now loop through the second team for comparison
            for st2 in tempTeams[secIndex]:
                # We index the current student in the second team
                st2Index = tempTeams[secIndex].index(st2)
                
                # We perform a 1-for-1 swap in positions of the two students, swapping them between the two teams
                teamOne[st1Index], teamTwo[st2Index] = teamTwo[st2Index], teamOne[st1Index]
                
                # Now we calculate the new cgpa difference between the two teams
                newDifference = calcAvgGPADifference(teamOne, teamTwo)
                
                match sortCondition:
                    case 0:
                        # We want to see if the new difference is smaller or not, if not we revert it back and try again with the other students in the two teams
                        if newDifference < bestDifference:
                            bestDifference = newDifference
                        else:
                            # Revert back
                            teamOne[st1Index], teamTwo[st2Index] = teamTwo[st2Index], teamOne[st1Index]
                    case 1:
                        # Additionally check if gender is the same, if not revert
                        if st1.gender == st2.gender and newDifference < bestDifference:
                            bestDifference = newDifference
                        else:
                            # Revert back
                            teamOne[st1Index], teamTwo[st2Index] = teamTwo[st2Index], teamOne[st1Index]
                    case 2:
                        # Additionally check if gender is the same, if not revert
                        if st1.gender == st2.gender and newDifference < bestDifference:
                            bestDifference = newDifference
                        else:
                            # Revert back
                            teamOne[st1Index], teamTwo[st2Index] = teamTwo[st2Index], teamOne[st1Index]
                    case _:
                        print("Invalid sort condition, please check again")
                        # Revert back
                        teamOne[st1Index], teamTwo[st2Index] = teamTwo[st2Index], teamOne[st1Index]
        
        # If this random selection doesn't work out, we try again until satisfied
        currentAttempts += 1

    # Once satisfied or attempts finished, we return the newest updated team list with a lower deviation
    return tempTeams

#### Main Body Code

In [None]:
# Main Body
numOfTgrps = getNumOfTgrps(studentList)
studentsPerTeam = 5

# Sort Conditions:
# 0: Reduce team distribution by considering only cgpa
# 1: Reduce team distribution by considering both cgpa and gender
# 2: Reduce team distribution by considering cgpa, gender and faculty

for i in range(1, 2):
    teams = reduceTeamDist(assignTeamsByGender(studentsPerTeam, studentList, i + 1), 1)
    tgrpsTeamsList[i + 1] = teams

# To display output for validation
for tgrps in tgrpsTeamsList:
    print(f'\n-------------- Tutorial Group {tgrps} --------------\n')
    for teams in tgrpsTeamsList[tgrps]:
        print(f'--------- Team {tgrpsTeamsList[tgrps].index(teams) + 1} ---------')
        totalCGPA = 0.00
        num = 0
        male = 0
        female = 0
        
        for student in teams:
            totalCGPA += student.cgpa
            num += 1
            
            if student.gender == "Male":
                male += 1
            else:
                female += 1

            print(f'{student.name} and {student.gender[:1]} and {student.faculty}')

        print(f'{totalCGPA / num:.3f}')
        print(f'Male:{male} Female: {female}')
        print()

#### Final CSV Output Code

In [373]:
# Output a new csv file with the new given list of students
def outputCSV(data):
    headers = ['Tutorial Group','Student ID','School','Name','Gender','CGPA','Team Assigned']
    
    rows = []
    
    for tgrp in tgrpsTeamsList:
        for teams in tgrpsTeamsList[tgrp]:
            for student in tgrpsTeamsList[tgrp][teams]:
                studDetails = f'G-{student.tutorialGrp},{student.studID},{student.faculty},{student.name},{student.gender},{student.cgpa},{teams}'
                rows.append(studDetails)

    file = open('new_teams.csv', 'w')

    header_line = ', '.join(str(h) for h in headers)
    file.write(header_line + '\n')

    for row in rows:
        file.write(row + '\n')

    file.close()
    
# outputCSV(tgrpsTeamsList)