# Design and Analysis of Algorithms - Assignment 1

---

### Dependencies

In [1]:
import re
import os
import math
import numpy as np
import pandas as pd

### Data Loading

In [2]:
df_original = pd.read_excel(io = "input_Make Groups.xlsx")

In [3]:
df_original.head()

Unnamed: 0,Roll,Name,Email,Unnamed: 3,Unique
0,1401AI01,Abhishek,Abhishek@mycollege.in,,AI.csv
1,1401AI02,Aman,Aman@mycollege.in,,CB.csv
2,1401AI03,Harsh,Harsh@mycollege.in,,CE.csv
3,1401AI04,Ayush,Ayush@mycollege.in,,CH.csv
4,1401AI05,Aditi,Aditi@mycollege.in,,CS.csv


**Dropping Two Columns: 'Unnamed: 3' and 'Unique'**

In [4]:
df_original = df_original.drop(labels = ['Unnamed: 3', 'Unique'],
                               axis   = 1)

In [5]:
df_original.head()

Unnamed: 0,Roll,Name,Email
0,1401AI01,Abhishek,Abhishek@mycollege.in
1,1401AI02,Aman,Aman@mycollege.in
2,1401AI03,Harsh,Harsh@mycollege.in
3,1401AI04,Ayush,Ayush@mycollege.in
4,1401AI05,Aditi,Aditi@mycollege.in


### Objective 1: full_branchwise

In [6]:
FILE_PATH = "full_brancwise/"

**Extracting unique Branch names from Roll Columns**

In [7]:
unique_branches = []
for i in df_original['Roll']:
    if i[4:6] not in unique_branches:
        unique_branches.append(i[4:6])

In [8]:
unique_branches

['AI', 'CB', 'CE', 'CH', 'CS', 'CT', 'EC', 'MC', 'MM', 'MT']

In [9]:
for i in unique_branches:
    df_new = df_original[df_original['Roll'].str.contains(i, case = False, na = False)]
    df_new.to_csv(path_or_buf = f"{FILE_PATH}{i}.csv",
                  index       = False)

**Branchwise Distribution**

In [10]:
dist = {}
for i in unique_branches:
    df_new = pd.read_csv(f"{FILE_PATH}{i}.csv")
    dist[i] = len(df_new)

In [11]:
dist

{'AI': 45,
 'CB': 50,
 'CE': 56,
 'CH': 84,
 'CS': 95,
 'CT': 64,
 'EC': 54,
 'MC': 75,
 'MM': 82,
 'MT': 78}

### Objective 2: group_branch_wise_mix

In [12]:
MIX_PATH = "group_branch_wise_mix/"

df_dict = {}
for i in unique_branches:
    df_dict[i] = pd.read_csv(f"{FILE_PATH}/{i}.csv")

In [13]:
n = int(input("Enter number of groups: "))
group_size = math.ceil(len(df_original)/n)
total_brch = len(unique_branches)

grps = []

current_brch_id = 0
current_std_round = 0

curr_grp = []

for i in range(len(df_original)):
    if len(curr_grp) == group_size:
        grps.append(curr_grp)
        curr_grp = []
    while 1:
        df_curr = df_dict[unique_branches[current_brch_id%total_brch]]
        current_std_round = current_brch_id//total_brch
        if len(df_curr) > current_std_round:
            row = df_curr.loc[current_std_round]
            current_brch_id += 1
            break
        else:
            current_brch_id += 1
            continue
    
    curr_grp.append(row)

    if i == (len(df_original) - 1):
        grps.append(curr_grp)

Enter number of groups:  15


In [14]:
for i in range(len(grps)):
    df_new = pd.DataFrame(data = grps[i])
    df_new.to_csv(f"{MIX_PATH}g{i+1}.csv", index = False)

### Objective 3: Uniform mix

In [15]:
desc_dist = sorted(dist.items(), key = lambda x: x[1], reverse=True)

In [16]:
desc_dist

[('CS', 95),
 ('CH', 84),
 ('MM', 82),
 ('MT', 78),
 ('MC', 75),
 ('CT', 64),
 ('CE', 56),
 ('EC', 54),
 ('CB', 50),
 ('AI', 45)]

In [17]:
UNIFORM_PATH = "group_uniform_mix/"

In [27]:
n = int(input("Enter number of groups: "))
total_students = len(df_original)
group_size = math.ceil(total_students/n)

final_groups = [[] for _ in range(n)]
desc_dist = sorted(dist.items(), key = lambda x: x[1], reverse=True)

for i in range(n):
    remaining = group_size
    while remaining > 0 and sum([count for idx, count in desc_dist]):
        desc_dist.sort(key = lambda x: x[1], reverse=True)
        idx, count = desc_dist[0]

        if count == 0:
            break

        take = min(count, remaining)
        final_groups[i].append((idx, take))

        desc_dist[0] = (idx, count-take)
        remaining -= take

new_dist = dist.copy()
df_dict = {}
for i in unique_branches:
    df_dict[i] = pd.read_csv(f"{FILE_PATH}/{i}.csv")

for _, i in enumerate(final_groups):
    df_new = pd.DataFrame(columns=df_original.columns)
    for branch, students in i:
        df_new = pd.concat([df_new, df_dict[branch][:students]], axis = 0)
        df_dict[branch] = df_dict[branch].drop([j for j in range(students)]).reset_index(drop=True)
        df_new.to_csv(f"{UNIFORM_PATH}g{_+1}.csv", index = False)

Enter number of groups:  15


In [21]:
df_uniform = pd.DataFrame(columns=df_original.columns)
for i in range(15):
    df_temp = pd.read_csv(f"{UNIFORM_PATH}g{i}.csv")
    df_uniform = pd.concat([df_uniform, df_temp])

### Objective 4: Stats

In [37]:
df_stat = pd.DataFrame(columns=unique_branches)
df_stat.loc['g1'] = {'AI': 45,
 'CB': 50,
 'CE': 56,
 'CH': 84,
 'CS': 95,
 'CT': 64,
 'EC': 54,
 'MC': 75,
 'MM': 82,
 'MT': 78}

In [38]:
df_stat

Unnamed: 0,AI,CB,CE,CH,CS,CT,EC,MC,MM,MT
g1,45,50,56,84,95,64,54,75,82,78


In [49]:
PATHS = ["group_uniform_mix/", "group_branch_wise_mix/"]

for path in PATHS:
    df_stat = pd.DataFrame(columns=unique_branches)
    for i in range(n):
        count_by_branch = {}
        for branch in unique_branches:
            count_by_branch[branch] = 0
        df_temp = pd.read_csv(f"{path}g{i+1}.csv")
        for j in df_temp['Roll']:
            count_by_branch[j[4:6]] += 1
        df_stat.loc[f'g{i+1}'] = count_by_branch

    df_stat.to_csv(f"{path}stat.csv")