In [41]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
import re
from sklearn.neighbors import NearestNeighbors

# Load your dataset
data = pd.read_csv('Coursera.csv')
data = data[data['Course Rating'] != 'Not Calibrated']
data['Course Rating'] = data['Course Rating'].apply(lambda x: float(re.findall(r'\d+\.\d+', str(x))[0]) if re.findall(r'\d+\.\d+', str(x)) else 0.0)

# Preprocess data (handle missing values, encoding, and text features)
data['Difficulty Level'].fillna('Unknown', inplace=True)
data['Course Description'].fillna('', inplace=True)
data['Skills'].fillna('', inplace=True)

# Encoding categorical features
label_encoder = LabelEncoder()
data['Difficulty Level'] = label_encoder.fit_transform(data['Difficulty Level'])

# Create TF-IDF vectors for text data with lowercase and whitespace-separated words
tfidf_vectorizer = TfidfVectorizer(stop_words='english', lowercase=True, token_pattern=r'\w+|\s+')
text_features = tfidf_vectorizer.fit_transform(data['Course Description'] + ' ' + data['Skills'])

# Nearest Neighbors (Item-Item Collaborative Filtering) Model
knn_model = NearestNeighbors(n_neighbors=15, metric='cosine')
knn_model.fit(text_features)

# Function to get item-item collaborative filtering recommendations
# Function to get item-item collaborative filtering recommendations
def recommend_item_item_collaborative_filtering(course_index, top_n=10):
    distances, indices = knn_model.kneighbors(text_features[course_index])
    
    # Exclude the item itself and filter by rating and difficulty level
    filtered_recommendations = data.iloc[indices[0][1:top_n + 1]]
    filtered_recommendations = filtered_recommendations[
        (filtered_recommendations['Course Rating'] > 0) &
        (filtered_recommendations['Difficulty Level'] >= 1)  # Assuming 1 represents beginner
    ]
    
    # Sort the filtered recommendations by course rating and difficulty level
    sorted_recommendations = filtered_recommendations.sort_values(by=['Course Rating', 'Difficulty Level'], ascending=[False, True])
    
    return sorted_recommendations

# Example usage with user input
# Assuming the user input is stored in a variable called 'user_input'
user_input = input("Enter your search query: ")

result_index = data[data['Course Name'].str.contains(user_input, case=False)].index
if not result_index.empty:
    first_index = result_index[0]
    # print(data.loc[first_index])
else:
    print("No matching results found.")

# print(first_index)

recommendations = recommend_item_item_collaborative_filtering(first_index)

# Displaying sorted recommendations as a dataframe
df_recommendations = pd.DataFrame(recommendations)
# df_recommendations.sort_values(by='Difficulty Level', ascending=True, inplace=True)
df_recommendations

Unnamed: 0,Course Name,University,Difficulty Level,Course Rating,Course URL,Course Description,Skills
1131,Digital Product Management: Modern Fundamentals,University of Virginia,1,4.7,https://www.coursera.org/learn/uva-darden-digi...,"Not so long ago, the job of product manager wa...",lean startup Leadership and Management Produ...
1638,Digital Product Management: Modern Fundamentals,University of Virginia,1,4.7,https://www.coursera.org/learn/uva-darden-digi...,"Not so long ago, the job of product manager wa...",lean startup Leadership and Management Produ...
1060,Introduction to Software Product Management,University of Alberta,1,4.6,https://www.coursera.org/learn/introduction-to...,This course highlights the importance and role...,project Software Engineering Product Managem...
2391,Agile Software Development,University of Minnesota,1,4.6,https://www.coursera.org/learn/agile-software-...,Software industry is going crazy on agile meth...,scrum (software development) agile manifesto ...
2790,Introduction to Project Management,Coursera Project Network,2,4.5,https://www.coursera.org/learn/introduction-pr...,This course is designed to give you the fundam...,Planning Leadership and Management project p...
1029,Engineering Practices for Building Quality Sof...,University of Minnesota,2,4.1,https://www.coursera.org/learn/engineering-pra...,Agile embraces change which means that team sh...,code coverage version control systems softwa...


In [39]:
df_recommendations.head()

Unnamed: 0,Course Name,University,Difficulty Level,Course Rating,Course URL,Course Description,Skills
1658,"Career planning: resume/CV, cover letter, inte...",National Research Tomsk State University,1,4.8,https://www.coursera.org/learn/career-planning...,Do you want a job? A first job or a better job...,job attitude Writing career r&d management ...
812,Internet Giants: The Law and Economics of Medi...,The University of Chicago,1,4.8,https://www.coursera.org/learn/internetgiants,This seven-week course will explore the relati...,intellectual property Patent Law intellectua...
806,"Photography Techniques: Light, Content, and Sh...",Michigan State University,3,4.8,https://www.coursera.org/learn/photography-tec...,Welcome to Course FOUR! In Modules 1-4 you w...,film adobe photoshop digital image digital ...
2225,Career Options: Exploring a New Career,University System of Georgia,3,3.8,https://www.coursera.org/learn/career-exploration,This course is for you if you are interested i...,career college salary Planning relative ch...
1279,How to Get Skilled: Introduction to Individual...,The State University of New York,1,3.5,https://www.coursera.org/learn/skills-management,Do you want to gain a competitive edge on the ...,Human Resources training and development Pro...


In [45]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

# Load your dataset
data = pd.read_csv('Coursera.csv')

# Preprocess data (handle missing values, encoding, etc.)
# You may need to convert the 'Course Rating' column to float as previously shown.

# Combine text features (Course Description and Skills)
data['TextFeatures'] = data['Course Description'] + ' ' + data['Skills']

# Create a TF-IDF vectorizer
tfidf_vectorizer = TfidfVectorizer(stop_words='english', lowercase=True)
tfidf_matrix = tfidf_vectorizer.fit_transform(data['TextFeatures'])

# Calculate the cosine similarity between courses
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

# Example: Recommend courses based on a search query
search_query = 'R'  # Replace with your search query

# Find the courses related to the search query
related_courses_indices = data[data['TextFeatures'].str.contains(search_query, case=False)].index

# Calculate the average cosine similarity for each course
average_cosine_similarity = cosine_sim[related_courses_indices].mean(axis=0)

# Sort the courses by average cosine similarity (descending order)
recommended_course_indices = average_cosine_similarity.argsort()[::-1]

# Get the top N recommendations (e.g., top 10)
top_n = 10
top_recommendations = data.iloc[recommended_course_indices[:top_n]]

# Print the top recommendations
print(top_recommendations[['Course Name', 'Course Rating']])

                                            Course Name Course Rating
1660                          Exploratory Data Analysis           4.3
1204                     Introduction to Data Analytics           4.7
576                              Getting Started with R           4.3
2159  Data Analytics:  Scraping Data using Hadley Wi...           4.2
918   Stock Analysis:  Create a Buy Signal Filter us...           4.7
2886                     Python for Data Science and AI           4.5
2519                     Python for Data Science and AI           4.5
2518                     Python for Data Science and AI           4.5
2039                     Python for Data Science and AI           4.5
2811    Analyze Box Office Data with Seaborn and Python           4.5
