In [None]:
!pip install fuzzywuzzy

Collecting fuzzywuzzy
  Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl.metadata (4.9 kB)
Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl (18 kB)
Installing collected packages: fuzzywuzzy
Successfully installed fuzzywuzzy-0.18.0


In [None]:
!pip install gensim spacy
!python -m spacy download en_core_web_md

Collecting en-core-web-md==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.7.1/en_core_web_md-3.7.1-py3-none-any.whl (42.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.8/42.8 MB[0m [31m21.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: en-core-web-md
Successfully installed en-core-web-md-3.7.1
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_md')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [None]:
!pip install sentence-transformers



In [39]:
import sqlite3
import numpy as np
import pandas as pd
import nltk
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import normalize

from nltk.corpus import stopwords
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
from nltk.corpus import wordnet
from nltk.tokenize import word_tokenize

import gensim
import spacy

from sentence_transformers import SentenceTransformer

nltk.download('punkt_tab')
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')

nlp = spacy.load("en_core_web_md")

model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

def connect_to_database():

    conn = sqlite3.connect('/content/drive/MyDrive/PreProcessAssignment Alcovia/students_mentors_workshops.db')
    return conn

def get_new_student_input(conn):
    name = input("Enter student's name: ")
    strengths = input("Enter student's strengths (comma-separated): ")
    weaknesses = input("Enter student's weaknesses (comma-separated): ")
    interests = input("Enter student's interests (comma-separated): ")
    learning_style = input("Enter student's learning style: ")
    preferred_topics = input("Enter student's preferred topics (comma-separated): ")
    availability = input("Enter student's availability (e.g., Monday 9-11 AM): ")

    cursor = conn.cursor()
    cursor.execute('''INSERT INTO Students (Name, Strengths, Weaknesses, Interests, LearningStyle, PreferredTopics, Availability)
                      VALUES (?, ?, ?, ?, ?, ?, ?)''', (name, strengths, weaknesses, interests, learning_style, preferred_topics, availability))
    conn.commit()

    query = "SELECT * FROM Students ORDER BY ID DESC LIMIT 1"
    students_df = pd.read_sql_query(query, conn)
    return students_df

def preprocess_text(text):
    text = text.lower()
    tokens = word_tokenize(text)
    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word not in stop_words]
    return ' '.join(tokens)

def process_text_with_sentence_bert(text):
    return model.encode(text)

def cosine_sim(vec1, vec2):
    return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))

def synonym_match(word1, word2):
    if word1.lower() == word2.lower():
        return True
    synsets1 = wordnet.synsets(word1)
    synsets2 = wordnet.synsets(word2)
    for synset1 in synsets1:
        for synset2 in synsets2:
            if synset1.wup_similarity(synset2) > 0.8:
                return True
    return False

def calculate_recommendations(conn, student_id):

    query = f"SELECT * FROM Students WHERE ID = {student_id}"
    student_data = pd.read_sql_query(query, conn)

    if student_data.empty:
        print(f"No data found for student ID: {student_id}")
        return pd.DataFrame(), pd.DataFrame(), pd.DataFrame()

    strengths = student_data['Strengths'][0]
    weaknesses = student_data['Weaknesses'][0]
    interests = student_data['Interests'][0]
    preferred_topics = student_data['PreferredTopics'][0]
    student_profile = f"{strengths}, {weaknesses}, {interests}"

    student_vec = process_text_with_sentence_bert(student_profile)


    mentors_query = "SELECT * FROM Mentors"
    mentors_df = pd.read_sql_query(mentors_query, conn)


    mentors_df['mentor_profile'] = mentors_df['Expertise'].str.lower() + ", " + mentors_df['Interests'].str.lower()
    mentors_df['mentor_profile'] = mentors_df['mentor_profile'].apply(preprocess_text)


    mentor_vectors = [process_text_with_sentence_bert(profile) for profile in mentors_df['mentor_profile']]


    mentor_similarity = [cosine_sim(student_vec, mentor_vec) for mentor_vec in mentor_vectors]


    student_availability = student_data['Availability'][0].lower()
    available_mentors = mentors_df[mentors_df['Availability'].str.lower().str.contains(student_availability)]


    filtered_similarity = [mentor_similarity[i] for i in available_mentors.index]


    mentor_recommendations = pd.DataFrame({
        'Mentor': available_mentors['Name'],
        'Similarity Score': filtered_similarity,
        'Expertise': available_mentors['Expertise'],
        'Interests': available_mentors['Interests'],
        'Availability': available_mentors['Availability']
    }).sort_values(by='Similarity Score', ascending=False)

    workshops_query = "SELECT * FROM Workshops"
    workshops_df = pd.read_sql_query(workshops_query, conn)


    workshops_df['workshop_profile'] = workshops_df['FocusArea'].str.lower()
    workshops_df['workshop_profile'] = workshops_df['workshop_profile'].apply(preprocess_text)


    workshop_vectors = [process_text_with_sentence_bert(focus_area) for focus_area in workshops_df['workshop_profile']]


    workshop_similarity = []

    for idx, row in workshops_df.iterrows():
        workshop_text = row['workshop_profile']
        sim = cosine_sim(student_vec, process_text_with_sentence_bert(workshop_text))


        weakness_match = any(synonym_match(w, row['FocusArea'].lower()) for w in weaknesses.split(','))
        if weakness_match:
            sim *= 1.7


        preferred_topic_match = any(synonym_match(p, row['FocusArea'].lower()) for p in preferred_topics.split(','))
        if preferred_topic_match:
            sim *= 1.6


        interest_match = any(synonym_match(i, row['FocusArea'].lower()) for i in interests.split(','))
        if interest_match:
            sim *= 1.2


        strength_match = any(synonym_match(s, row['FocusArea'].lower()) for s in strengths.split(','))
        if strength_match:
            sim *= 1.1

        workshop_similarity.append(sim)

    workshop_recommendations = pd.DataFrame({
        'Workshop': workshops_df['Topic'],
        'Similarity Score': workshop_similarity
    }).sort_values(by='Similarity Score', ascending=False)


    tasks = ['Complete a robotics quiz', 'Write a productivity journal', 'Perform a short play', 'Build a mini app']
    task_similarity = []

    for task in tasks:
        task_sim = cosine_sim(student_vec, process_text_with_sentence_bert(task))
        task_similarity.append(task_sim)

    task_recommendations = pd.DataFrame({
        'Task': tasks,
        'Similarity Score': task_similarity
    }).sort_values(by='Similarity Score', ascending=False)

    return mentor_recommendations, workshop_recommendations, task_recommendations

if __name__ == '__main__':
    conn = connect_to_database()

    students_df = get_new_student_input(conn)
    print(students_df)

    student_id = students_df['ID'][0]
    mentor_recommendations, workshop_recommendations, task_recommendations = calculate_recommendations(conn, student_id)
    print("\nMentor Recommendations:")
    print(mentor_recommendations)
    print("\nWorkshop Recommendations:")
    print(workshop_recommendations)
    print("\nDaily Task Recommendations:")
    print(task_recommendations)

    conn.close()


[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Enter student's name: Micheal
Enter student's strengths (comma-separated): Robotics
Enter student's weaknesses (comma-separated): Engineering
Enter student's interests (comma-separated): AI
Enter student's learning style: Visual
Enter student's preferred topics (comma-separated): Photography
Enter student's availability (e.g., Monday 9-11 AM): Monday 9:00-11:00 AM
   ID     Name Strengths   Weaknesses Interests LearningStyle PreferredTopics  \
0  24  Micheal  Robotics  Engineering        AI        Visual     Photography   

           Availability  
0  Monday 9:00-11:00 AM  

Mentor Recommendations:
        Mentor  Similarity Score              Expertise  \
0    Dr. Smith          0.985111  Robotics, Engineering   
12  Dr. Orange          0.292977     Economics, Finance   

                               Interests          Availability  
0                           Robotics, AI  Monday 9:00-11:00 AM  
12  Investment Strategies, Market Trends  Monday 9:00-11:00 AM  

Workshop Recommenda