## Import Required Packages

In [1]:
import re
import numpy as np
import pandas as pd
import joblib
import warnings

warnings.filterwarnings("ignore")

## Text Preprocessing

In [2]:
def preprocess_text(text):
    """
    Preprocesses the input text by converting to lowercase, removing special characters and punctuation,
    and removing extra whitespaces.

    Args:
    text (str): The input text to preprocess.

    Returns:
    str: The preprocessed text.
    """
    if isinstance(text, str):
        # Convert to lowercase
        text = text.lower()
        # Remove special characters and punctuation
        text = re.sub(r"[^a-zA-Z0-9\s]", "", text)
        # Remove extra whitespaces
        text = re.sub(r"\s+", " ", text).strip()
    return text

## Recommendation Call

In [3]:
def recommend_course(input_string, df, cv_file, similarity_file):
    """
    Recommends courses based on the input string.

    Args:
    input_string (str): The input string representing the user's query.
    df (DataFrame): The DataFrame containing course information.
    cv_file (str): The file path to the saved CountVectorizer object.
    similarity_file (str): The file path to the saved similarity matrix.

    Returns:
    list: A list of tuples containing the top recommended courses along with their similarity scores.
    """
    # Preprocess input string
    input_string = preprocess_text(input_string)

    # Load CountVectorizer object
    cv = joblib.load(cv_file)

    # Transform input string using the loaded CountVectorizer
    input_vector = cv.transform([input_string]).toarray()

    # Load similarity matrix
    similarity_matrix = np.load(similarity_file)

    # Calculate similarity with input string
    input_similarity = np.dot(input_vector, similarity_matrix.T)

    # Find top similar courses
    similar_courses = []
    for i in range(len(input_similarity[0])):
        similar_courses.append((df.iloc[i], input_similarity[0][i]))

    # Sort and return top recommendations
    similar_courses = sorted(similar_courses, reverse=True, key=lambda x: x[1])
    return similar_courses[:3]

## Recommendation Test

In [4]:
# Load DataFrame containing course information
df = pd.read_csv("../data/courses.csv")

# File paths for CountVectorizer and similarity matrix
cv_file = "../models/count_vectorizer.pkl"
similarity_file = "../models/similarity_matrix.npy"

# Get recommendations
recommendations = recommend_course("Java", df, cv_file, similarity_file)
print("Top recommended courses:")
for course, similarity_score in recommendations:
    for column_name, value in course.items():
        print(f"{column_name}: {value}")
    print()

Top recommended courses:
course_title: Android App Development
course_organization: Vanderbilt University
course_certificate_type: Specialization
course_time: 3 - 6 Months
course_rating: 4.8
course_reviews_num: 3.5k
course_difficulty: Beginner
course_url: https://www.coursera.org/specializations/android-app-development
course_students_enrolled: 67,777
course_skills: ['Software Testing', 'Android Software Development', 'Java Programming', 'Junit']
course_summary: ['Understand the major organ systems, their functions and relationships within the body.']
course_description: In this Specialization you will explore human anatomy using a systems approach, and a vast library of multimedia materials, so you may understand the features of different organ systems in relation to the human body’s form and function.
This Specialization covers the following organ systems:
   Integumentary System
   Musculoskeletal System
   Cardiovascular System
   Respiratory System
   Urinary System
   Nervous Sys

## Chatbot Building

In [5]:
import gradio as gr
import textwrap


def chatbot(message, history):
    """
    Responds to user messages by recommending courses based on the message content.

    Args:
        message (str): The user's message.
        history (list): History of previous messages (not used in this function).

    Returns:
        str: Response message recommending courses or asking for a valid message.
    """
    if message.strip() != "":
        # Call the recommend_course function to get course recommendations
        recommendations = recommend_course(message, df, cv_file, similarity_file)
        if recommendations:
            response = "Top recommended courses:\n"
            # Iterate over recommended courses and format the response message
            for i, (course, similarity_score) in enumerate(recommendations, 1):
                for column_name, value in course.items():
                    response += f"   {column_name.capitalize()}: {value}\n"
                response += "\n"
            return response
        else:
            return "No courses found matching your query."
    else:
        return "Please enter a valid message."


iface = gr.ChatInterface(
    chatbot,
    chatbot=gr.Chatbot(height=300),
    textbox=gr.Textbox(
        placeholder="Ask me a yes or no question", container=False, scale=7
    ),
    description="Ask questions about Jobs & Courses",
    examples=[
        "Do you have any suggestions for a Python course?",
        "What are the best courses for learning data science?",
        "Can you recommend a course for web development?",
        "I'm interested in machine learning. Any recommendations?",
    ],
    retry_btn=None,
    undo_btn="Delete Previous",
    clear_btn="Clear",
    title="Chat with Bot",
)
iface.launch(share=True)

Running on local URL:  http://127.0.0.1:7860
Running on public URL: https://c68a2daebfda5dcb0d.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


