In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import pickle

# Load the dataset
df = pd.read_csv('Student_course_data.csv')

# Drop unnecessary columns
df = df.drop(columns=['User_ID', 'Name', 'Courses Explored', 'Completion Status', 'Feedback Rating'])

# Preprocessing: One-hot encode categorical variables
df_encoded = pd.get_dummies(df, columns=['Experience Level', 'Preferred Learning Mode', 'Area of Interest', 'Technologies of Interest'])

# Label encode the target column (Courses Enrolled)
label_encoder = LabelEncoder()
df_encoded['Courses Enrolled'] = label_encoder.fit_transform(df_encoded['Courses Enrolled'])



In [2]:
# Define features (X) and target (y)
X = df_encoded.drop(columns=['Courses Enrolled'])
y = df_encoded['Courses Enrolled']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a RandomForestClassifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

pickle.dump(clf,open("model.pkl","wb"))


In [3]:
model = pickle.load(open('model.pkl', 'rb'))

In [4]:
predicted_from_pickle = model.predict(X_test)

In [5]:
X_test.shape

(100, 15)

In [6]:
predicted_from_pickle.shape

(100,)

In [7]:


# Predict on the test set
y_pred = clf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Model accuracy: {accuracy * 100:.2f}%")

Model accuracy: 29.00%


In [8]:

# Function to get top 2 recommended courses for a user
def recommend_courses(user_data, model, encoder, num_recommendations=2):
    # Predict course probabilities for the user
    course_probs = model.predict_proba([user_data])[0]
    
    # Get the indices of the top recommended courses
    top_courses_indices = course_probs.argsort()[-num_recommendations:][::-1]
    
    # Convert back to course names
    recommended_courses = encoder.inverse_transform(top_courses_indices)
    
    return recommended_courses

# Example usage: Predict for a single user (from the test set)


In [9]:
user_index = 1  # change this index to test with different users
user_data = X_test.iloc[user_index]
recommended_courses = recommend_courses(user_data, clf, label_encoder)

print(f"Top 2 recommended courses for user {user_index}: {recommended_courses}")

Top 2 recommended courses for user 1: ['JavaScript Essentials' 'Intro to HTML, CSS']


