# SVT 2025 Matching

In [156]:
import json
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

In [157]:
with open('transformed_data.json', 'r') as f:
    data = json.load(f)

tutors = [d for d in data if d['Type'] == 'tutor']
students = [d for d in data if d['Type'] == 'student']

## BFI and VARK feature extraction

In [158]:
def feature_vector(entity):
    vark = entity['VARK Results']
    bfi = entity['BFI Results']
    return np.array([
        vark['V'], vark['A'], vark['R'], vark['K'],
        bfi['Openness'], bfi['Conscientiousness'], bfi['Extroversion'],
        bfi['Agreeableness'], bfi['Neuroticism']
    ], dtype=float)

In [159]:
tutor_features = {t['Name']: feature_vector(t) for t in tutors}
student_features = {s['Name']: feature_vector(s) for s in students}

## Getting Tutor Pairing Limits

In [160]:
capacity = {t['Name']: t['Pair Limit'] for t in tutors}

## Getting Student Quartile

In [161]:
def quartile_num(q):
    return int(str(q)[0])

In [162]:
# Sort students by lowest quartile first
sorted_students = sorted(students, key=lambda s: s['Quartile'])

In [163]:
sorted_students

[{'Name': 'Alab, Precious Pearl',
  'Type': 'student',
  'VARK Results': {'V': 4, 'A': 2, 'R': 5, 'K': 7},
  'BFI Results': {'Openness': 3.75,
   'Conscientiousness': 3.25,
   'Extroversion': 3.75,
   'Agreeableness': 5.0,
   'Neuroticism': 4.0},
  'Quartile': '2b',
  'Schedule': {'Monday': {'intervals': [{'start': '08:00', 'end': '21:00'}]},
   'Tuesday': {'intervals': [{'start': '08:00', 'end': '21:00'}]},
   'Wednesday': {'intervals': [{'start': '08:00', 'end': '21:00'}]},
   'Thursday': {'intervals': [{'start': '08:00', 'end': '21:00'}]},
   'Friday': {'intervals': [{'start': '08:00', 'end': '21:00'}]}}},
 {'Name': 'Alviar, Loissah Mharie',
  'Type': 'student',
  'VARK Results': {'V': 6, 'A': 4, 'R': 5, 'K': 1},
  'BFI Results': {'Openness': 3.5,
   'Conscientiousness': 3.25,
   'Extroversion': 4.0,
   'Agreeableness': 3.0,
   'Neuroticism': 4.5},
  'Quartile': '2b',
  'Schedule': {'Monday': {'intervals': [{'start': '08:00', 'end': '21:00'}]},
   'Tuesday': {'intervals': [{'start':

## Assign student to tutors

In [164]:
def matching_function():
    assignments = []
    for s in sorted_students:
        name = s['Name']
        q = quartile_num(s['Quartile'])
        vec = student_features[name].reshape(1, -1)

        # Select candidates based on rules
        candidates = [t for t in tutors if capacity[t['Name']] > 0]

        # if q == 2:
        #     # Only tutors with Pair Limit == 1
        #     candidates = [t for t in tutors if capacity[t['Name']] == 1]
        # else:
        #     # Any tutor with available capacity
        #     candidates = [t for t in tutors if capacity[t['Name']] > 0]

        # If no tutor candidates
        if not candidates:
            assignments.append({'Student': name, 'Tutor': None, 'Similarity': None})
            continue

        # Compute similarities
        sims = [
            (t['Name'], cosine_similarity(vec, tutor_features[t['Name']].reshape(1, -1))[0][0])
            for t in candidates
        ]

        # Sort sims by cosine similarity
        sorted_sims = sorted(sims, key=lambda x: x[1], reverse=True)
        print(f"Name: {name}, Q: {q}, Similarity: {sorted_sims}")
        best_tutor, sim_score = max(sims, key=lambda x: x[1])

        assignments.append({
            'Student': name,
            'Tutor': best_tutor,
            'Similarity': round(sim_score, 3)
        })

        # capacity[best_tutor] -= 1

    results = pd.DataFrame(assignments)
    return results

In [165]:
matching_function()

Name: Alab, Precious Pearl, Q: 2, Similarity: [('Leon Antonio C. Besar', np.float64(0.9128881946063088)), ('Luis Miguel Antonio Razon', np.float64(0.9116963311125649)), ('Axl Roel Andaya', np.float64(0.9109163921242867)), ('Bianca Clarizze Sollesta', np.float64(0.8740914165301013)), ('Kevin Matthew Panuelos', np.float64(0.8568895764866192)), ('Raymund Sison', np.float64(0.8345727849868285)), ('Bea Claire Sollesta', np.float64(0.8250932080946093)), ('BORJA, Jacob M', np.float64(0.8070533372957108))]
Name: Alviar, Loissah Mharie, Q: 2, Similarity: [('Raymund Sison', np.float64(0.8848741291144082)), ('Bea Claire Sollesta', np.float64(0.8700959938343993)), ('Bianca Clarizze Sollesta', np.float64(0.8335075418559169)), ('Axl Roel Andaya', np.float64(0.8046525757142029)), ('Kevin Matthew Panuelos', np.float64(0.7171806099508198)), ('Leon Antonio C. Besar', np.float64(0.6982224124339607)), ('BORJA, Jacob M', np.float64(0.685250781044602)), ('Luis Miguel Antonio Razon', np.float64(0.64488894074

Unnamed: 0,Student,Tutor,Similarity
0,"Alab, Precious Pearl",Leon Antonio C. Besar,0.913
1,"Alviar, Loissah Mharie",Raymund Sison,0.885
2,"Capillano, Mhigz Genrei",Bianca Clarizze Sollesta,0.955
3,"Orain, Mark Angelo",Axl Roel Andaya,0.919
4,"Baloaloa, Akhira Charlotte",Axl Roel Andaya,0.936
5,"Llanto, Didrei Keira",Raymund Sison,0.877
6,"Panaguiton, Lei Janrey",Bianca Clarizze Sollesta,0.984
7,"Panopio, Kyrie Caleb",Axl Roel Andaya,0.957
8,"Poblete, Erica",Axl Roel Andaya,0.923
9,"Alcoran, Mark Jhei",Luis Miguel Antonio Razon,0.905


## Normalizing and adding weights to BFI and VARK Features

In [166]:
def normalize(vec):
    norm = np.linalg.norm(vec)
    return vec / norm if norm > 0 else vec


In [167]:
def feature_vector(entity, weight_vark=0.5, weight_bfi=0.5):
    vark = entity['VARK Results']
    bfi = entity['BFI Results']
    vark_vals = np.array([vark['V'], vark['A'], vark['R'], vark['K']], dtype=float)
    bfi_vals = np.array([
        bfi['Openness'], bfi['Conscientiousness'], bfi['Extroversion'],
        bfi['Agreeableness'], bfi['Neuroticism']
    ], dtype=float)

    # Normalize np arrays
    vark_norm = normalize(vark_vals)
    bfi_norm = normalize(bfi_vals)

    return np.concatenate([weight_vark * vark_norm, weight_bfi * bfi_norm])




In [168]:
capacity = {t['Name']: t['Pair Limit'] for t in tutors}
tutor_features = {t['Name']: feature_vector(t) for t in tutors}
student_features = {s['Name']: feature_vector(s) for s in students}

In [169]:
matching_function()

Name: Alab, Precious Pearl, Q: 2, Similarity: [('Bianca Clarizze Sollesta', np.float64(0.9691422680279167)), ('Luis Miguel Antonio Razon', np.float64(0.964887698957011)), ('Leon Antonio C. Besar', np.float64(0.9628337906491484)), ('Axl Roel Andaya', np.float64(0.9506074179946851)), ('Kevin Matthew Panuelos', np.float64(0.9399465345473967)), ('Raymund Sison', np.float64(0.9160360838419489)), ('Bea Claire Sollesta', np.float64(0.9157277812431209)), ('BORJA, Jacob M', np.float64(0.90529249008087))]
Name: Alviar, Loissah Mharie, Q: 2, Similarity: [('Raymund Sison', np.float64(0.9483656772704956)), ('Bea Claire Sollesta', np.float64(0.9470843644324689)), ('Bianca Clarizze Sollesta', np.float64(0.9366581098101437)), ('Axl Roel Andaya', np.float64(0.8656595240690159)), ('Kevin Matthew Panuelos', np.float64(0.8376989958575134)), ('BORJA, Jacob M', np.float64(0.8116609368195709)), ('Leon Antonio C. Besar', np.float64(0.7944920634170677)), ('Luis Miguel Antonio Razon', np.float64(0.7557226553405

Unnamed: 0,Student,Tutor,Similarity
0,"Alab, Precious Pearl",Bianca Clarizze Sollesta,0.969
1,"Alviar, Loissah Mharie",Raymund Sison,0.948
2,"Capillano, Mhigz Genrei",Bianca Clarizze Sollesta,0.976
3,"Orain, Mark Angelo",Bianca Clarizze Sollesta,0.973
4,"Baloaloa, Akhira Charlotte","BORJA, Jacob M",0.984
5,"Llanto, Didrei Keira",Raymund Sison,0.947
6,"Panaguiton, Lei Janrey",Bianca Clarizze Sollesta,0.993
7,"Panopio, Kyrie Caleb",Axl Roel Andaya,0.965
8,"Poblete, Erica",Bianca Clarizze Sollesta,0.989
9,"Alcoran, Mark Jhei",Luis Miguel Antonio Razon,0.967


## Decoupling

In [170]:
def decouple_and_rerun(student_name, tutor_name, student_tutor_pairs, tutor_pair_count):
      student_tutor_pairs = [pair for pair in student_tutor_pairs if pair[0] != student_name or pair[1] != tutor_name]
      tutor_pair_count[tutor_name] -= 1
