In [1]:
from student_model import Student
from tutor_model import Tutor, TutorTypes

In [2]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from typing import List, Dict, Tuple
from datetime import time

## Fake Data
The test tutors were divided into two (focus on style, personality, and competency), with one tutor being more similar to the student's learning style and personality but lower in competency (mathematics) while the other one being less similar to the student's learning style and personality, but higher in competency.

In [3]:
sample_student = Student(
    id="student1",
    learning_style={"visual": 0.8, "auditory": 0.4, "read_write": 0.3, "kinesthetic": 0.2},
    personality={"openness": 0.7, "conscientiousness": 0.6, "extraversion": 0.5, "agreeableness": 0.8,
                 "neuroticism": 0.3},
    competencies={"mathematics": 0.3, "science": 0.6, "english": 0.7},
    availability_schedule={
        "Monday": [(time(10, 0), time(12, 0))],   # 10:00 AM - 12:00 PM
        "Wednesday": [(time(14, 0), time(16, 0))] # 2:00 PM - 4:00 PM
    },
    tutor_ratings={}
)

sample_tutors = [
    Tutor(
        id="tutor1",
        teaching_style={"visual": 0.3, "auditory": 0.8, "read_write": 0.7, "kinesthetic": 0.5},
        personality={"openness": 0.6, "conscientiousness": 0.8, "extraversion": 0.4, "agreeableness": 0.9,
                     "neuroticism": 0.3},
        competencies={"mathematics": 0.9, "science": 0.1, "english": 0.5},
        tutor_type=TutorTypes.professional,
        availability_schedule={
            "Monday": [(time(10, 0), time(12, 0))],   # 11:00 AM - 1:00 PM (overlaps with student)
            "Wednesday": [(time(14, 0), time(16, 0))]     # 9:00 AM - 11:00 AM (no overlap)
        },
        student_ratings={}
    ),
    Tutor(
        id="tutor2",
        teaching_style={"visual": 0.9, "auditory": 0.5, "read_write": 0.4, "kinesthetic": 0.1},
        personality={"openness": 0.8, "conscientiousness": 0.7, "extraversion": 0.6, "agreeableness": 0.7,
             "neuroticism": 0.2},
        competencies={"mathematics": 0.1, "science": 0.8, "english": 0.6},
        tutor_type=TutorTypes.paraprofessional,
        availability_schedule={
            "Wednesday": [(time(15, 0), time(17, 0))], # 3:00 PM - 5:00 PM (overlaps with student)
            "Thursday": [(time(10, 0), time(12, 0))]  # 10:00 AM - 12:00 PM (no overlap)
        },
        student_ratings={}
    ),
]

In [4]:
sample_student.extract_features()

array([0.4 , 0.2 , 0.15, 0.1 , 0.35, 0.3 , 0.25, 0.4 , 0.15, 3.  , 6.  ,
       7.  ])

## Content-based recommender system using Cosine Similarity
For the first approach, cosine similarity is calculated between the tutor and student for all three characteristics: competency, personality, and style. Weights were introduced wherein personality and style were valued less (0.5x multiplier) while competency is highly valued (x10 multiplier)

For this, tutor 1 would be more preferred since although they are dissimilar with the students, they are highly proficient in the required competency (mathematics)

In [5]:
def calculate_similarity(student: Student, tutor: Tutor) -> float:

    student_features = student.extract_features()

    # set compentency filter the same as the student
    tutor.set_competency_filter(student.competency_filter)
    tutor_features = tutor.extract_features()

    # Reshape vectors to 2D arrays as required by sklearn's cosine_similarity
    student_features_2d = student_features.reshape(1, -1)
    tutor_features_2d = tutor_features.reshape(1, -1)

    # Calculate cosine similarity
    similarity = cosine_similarity(student_features_2d, tutor_features_2d)[0][0]

    return similarity


In [6]:
calculate_similarity(sample_student, sample_tutors[0])

np.float64(0.6796691668278363)

In [7]:
def recommend(student: Student, tutors: List[Tutor], top_n = 5) -> List[Tuple[Tutor, float]]:

    tutor_similarities = []
    for tutor in tutors:
        similarity = calculate_similarity(student, tutor)
        tutor_similarities.append((tutor, similarity))

    tutor_similarities.sort(key=lambda x: x[1], reverse=True)

    return tutor_similarities[:top_n]

In [8]:
sample_student.set_competency_filter(["mathematics"])
sample_student.get_filtered_competencies()

array([0.3])

In [9]:
for tutor in sample_tutors:
    tutor.set_competency_filter(["mathematics"])
    print(tutor.extract_features())

[0.15 0.4  0.35 0.25 0.3  0.4  0.2  0.45 0.15 9.  ]
[0.45 0.25 0.2  0.05 0.4  0.35 0.3  0.35 0.1  1.  ]


In [10]:
recommend(sample_student, sample_tutors)

[(Tutor(id='tutor1', availability_schedule={'Monday': [(datetime.time(10, 0), datetime.time(12, 0))], 'Wednesday': [(datetime.time(14, 0), datetime.time(16, 0))]}, competencies={'mathematics': 0.9, 'science': 0.1, 'english': 0.5}, competency_filter=['mathematics'], tutor_type=<TutorTypes.professional: 'professional'>, teaching_style={'visual': 0.3, 'auditory': 0.8, 'read_write': 0.7, 'kinesthetic': 0.5}, personality={'openness': 0.6, 'conscientiousness': 0.8, 'extraversion': 0.4, 'agreeableness': 0.9, 'neuroticism': 0.3}, student_ratings={}),
  np.float64(0.983204673020371)),
 (Tutor(id='tutor2', availability_schedule={'Wednesday': [(datetime.time(15, 0), datetime.time(17, 0))], 'Thursday': [(datetime.time(10, 0), datetime.time(12, 0))]}, competencies={'mathematics': 0.1, 'science': 0.8, 'english': 0.6}, competency_filter=['mathematics'], tutor_type=<TutorTypes.paraprofessional: 'paraprofessional'>, teaching_style={'visual': 0.9, 'auditory': 0.5, 'read_write': 0.4, 'kinesthetic': 0.1},

### 2nd Approach

For this approach, we explore what happens if competency is not considered. This could be useful when only using the recommender system to specifically look for tutors similar to students and then just sorting the results later based on competency.

As expected, tutor 2 was preferred over tutor 1 due to being more similar to the student in terms of style and personality

In [11]:
# Different recommendation approach
def calculate_similarity_without_competency(student: Student, tutor: Tutor) -> float:
    student_features = student.extract_features()[:9]

    # set compentency filter the same as the student
    tutor.set_competency_filter(student.competency_filter)
    tutor_features = tutor.extract_features()[:9]

    # Reshape vectors to 2D arrays as required by sklearn's cosine_similarity
    student_features_2d = student_features.reshape(1, -1)
    tutor_features_2d = tutor_features.reshape(1, -1)

    # Calculate cosine similarity
    similarity = cosine_similarity(student_features_2d, tutor_features_2d)[0][0]

    return similarity

In [12]:
def recommend_without_competency(student: Student, tutors: List[Tutor], top_n = 5) -> List[Tuple[Tutor, float]]:
    tutor_similarities = []
    for tutor in tutors:
        similarity = calculate_similarity_without_competency(student, tutor)
        tutor_similarities.append((tutor, similarity))

    tutor_similarities = sorted(tutor_similarities, key=lambda x: x[1], reverse=True)
    return tutor_similarities[:top_n]


In [13]:
recommend_without_competency(sample_student, sample_tutors)

[(Tutor(id='tutor2', availability_schedule={'Wednesday': [(datetime.time(15, 0), datetime.time(17, 0))], 'Thursday': [(datetime.time(10, 0), datetime.time(12, 0))]}, competencies={'mathematics': 0.1, 'science': 0.8, 'english': 0.6}, competency_filter=['mathematics'], tutor_type=<TutorTypes.paraprofessional: 'paraprofessional'>, teaching_style={'visual': 0.9, 'auditory': 0.5, 'read_write': 0.4, 'kinesthetic': 0.1}, personality={'openness': 0.8, 'conscientiousness': 0.7, 'extraversion': 0.6, 'agreeableness': 0.7, 'neuroticism': 0.2}, student_ratings={}),
  np.float64(0.9883152336753832)),
 (Tutor(id='tutor1', availability_schedule={'Monday': [(datetime.time(10, 0), datetime.time(12, 0))], 'Wednesday': [(datetime.time(14, 0), datetime.time(16, 0))]}, competencies={'mathematics': 0.9, 'science': 0.1, 'english': 0.5}, competency_filter=['mathematics'], tutor_type=<TutorTypes.professional: 'professional'>, teaching_style={'visual': 0.3, 'auditory': 0.8, 'read_write': 0.7, 'kinesthetic': 0.5}

In [14]:
sample_student

Student(id='student1', availability_schedule={'Monday': [(datetime.time(10, 0), datetime.time(12, 0))], 'Wednesday': [(datetime.time(14, 0), datetime.time(16, 0))]}, competencies={'mathematics': 0.3, 'science': 0.6, 'english': 0.7}, competency_filter=['mathematics'], learning_style={'visual': 0.8, 'auditory': 0.4, 'read_write': 0.3, 'kinesthetic': 0.2}, personality={'openness': 0.7, 'conscientiousness': 0.6, 'extraversion': 0.5, 'agreeableness': 0.8, 'neuroticism': 0.3}, tutor_ratings={})

## Scheduling
Help :)

In [15]:
def get_weekly_availability_overlap(student: Student, tutor: Tutor, min_overlap=90) -> int:
    overlap_count = 0  # Counter for valid overlaps

    for day in student.availability_schedule:
        if day in tutor.availability_schedule:
            for student_start, student_end in student.availability_schedule[day]:
                for tutor_start, tutor_end in tutor.availability_schedule[day]:
                    # Convert time objects to minutes since midnight
                    student_start_min = student_start.hour * 60 + student_start.minute
                    student_end_min = student_end.hour * 60 + student_end.minute
                    tutor_start_min = tutor_start.hour * 60 + tutor_start.minute
                    tutor_end_min = tutor_end.hour * 60 + tutor_end.minute

                    # Find the overlap
                    overlap_start = max(student_start_min, tutor_start_min)
                    overlap_end = min(student_end_min, tutor_end_min)

                    if overlap_start < overlap_end:  # Ensure there is an actual overlap
                        overlap_duration = overlap_end - overlap_start
                        if overlap_duration >= min_overlap:
                            overlap_count += 1

    return overlap_count

In [16]:
get_weekly_availability_overlap(sample_student, sample_tutors[0], 90)

2

In [17]:
get_weekly_availability_overlap(sample_student, sample_tutors[1], 60)

1

In [18]:
def calculate_similarity_with_scheduling(student: Student, tutor: Tutor) -> float:
    #* Using the weekly overlap as weights, makes sense since if 0, everything will be 0 since they would not be able to meet anyways
    weekly_overlap = get_weekly_availability_overlap(student, tutor)
    student_features = student.extract_features() * weekly_overlap

    tutor.set_competency_filter(student.competency_filter)
    tutor_features = tutor.extract_features() * weekly_overlap

    student_features_2d = student_features.reshape(1, -1)
    tutor_features_2d = tutor_features.reshape(1, -1)

    similarity = cosine_similarity(student_features_2d, tutor_features_2d)[0][0]

    return similarity



def recommend_with_scheduling(student: Student, tutors: List[Tutor], top_n = 5) -> List[Tuple[Tutor, float]]:
    tutor_similarities = []
    for tutor in tutors:
        similarity = calculate_similarity_with_scheduling(student, tutor)
        tutor_similarities.append((tutor, similarity))

    tutor_similarities.sort(key=lambda x: x[1], reverse=True)
    return tutor_similarities[:top_n]


In [19]:
recommend_with_scheduling(sample_student, sample_tutors)

[(Tutor(id='tutor1', availability_schedule={'Monday': [(datetime.time(10, 0), datetime.time(12, 0))], 'Wednesday': [(datetime.time(14, 0), datetime.time(16, 0))]}, competencies={'mathematics': 0.9, 'science': 0.1, 'english': 0.5}, competency_filter=['mathematics'], tutor_type=<TutorTypes.professional: 'professional'>, teaching_style={'visual': 0.3, 'auditory': 0.8, 'read_write': 0.7, 'kinesthetic': 0.5}, personality={'openness': 0.6, 'conscientiousness': 0.8, 'extraversion': 0.4, 'agreeableness': 0.9, 'neuroticism': 0.3}, student_ratings={}),
  np.float64(0.983204673020371)),
 (Tutor(id='tutor2', availability_schedule={'Wednesday': [(datetime.time(15, 0), datetime.time(17, 0))], 'Thursday': [(datetime.time(10, 0), datetime.time(12, 0))]}, competencies={'mathematics': 0.1, 'science': 0.8, 'english': 0.6}, competency_filter=['mathematics'], tutor_type=<TutorTypes.paraprofessional: 'paraprofessional'>, teaching_style={'visual': 0.9, 'auditory': 0.5, 'read_write': 0.4, 'kinesthetic': 0.1},