In [44]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import pickle

# Load the dataset
df = pd.read_csv('Student_course_data.csv')

# Drop unnecessary columns
df = df.drop(columns=['User_ID', 'Name', 'Courses Explored', 'Completion Status', 'Feedback Rating'])

# Preprocessing: One-hot encode categorical variables
df_encoded = pd.get_dummies(df, columns=['Experience Level', 'Preferred Learning Mode', 'Area of Interest', 'Technologies of Interest'])

# Create a manual mapping for the course names to indices
course_mapping = {
    0: "Java Basics",
    1: "Intro to HTML, CSS",
    2: "JavaScript Essentials",
    3: "Full-Stack fundamentals",
    4: "Data Science with Python"
}


In [45]:
df_encoded["Courses Enrolled"].unique()

array(['Java Basics', 'Intro to HTML, CSS', 'JavaScript Essentials',
       'Full-Stack fundamentals', 'Data Science with Python'],
      dtype=object)

In [46]:

# Assuming 'Courses Enrolled' column has course names, map to indices manually
df_encoded['Courses Enrolled'] = df['Courses Enrolled'].map({v: k for k, v in course_mapping.items()})

# Define features (X) and target (y)
X = df_encoded.drop(columns=['Courses Enrolled'])
y = df_encoded['Courses Enrolled']

In [49]:
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a RandomForestClassifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Save the model
pickle.dump(clf, open("model.pkl", "wb"))


In [59]:
X_test.head()

Unnamed: 0,Time commitment (hrs/week),Experience Level_Advanced,Experience Level_Beginner,Experience Level_Intermediate,Preferred Learning Mode_Hybrid,Preferred Learning Mode_Instructor-led,Preferred Learning Mode_Self-paced,Area of Interest_Data Science,Area of Interest_Front-End Development,Area of Interest_Full-Stack Development,Area of Interest_Software Development,"Technologies of Interest_HTML, CSS, JavaScript","Technologies of Interest_Java, Python","Technologies of Interest_JavaScript, Node.js, React","Technologies of Interest_Python, R, SQL"
361,10,False,True,False,True,False,False,False,True,False,False,False,False,True,False
73,12,True,False,False,False,False,True,False,True,False,False,False,True,False,False
374,9,True,False,False,False,False,True,False,True,False,False,False,False,False,True
155,13,True,False,False,True,False,False,True,False,False,False,True,False,False,False
104,17,False,False,True,True,False,False,False,False,True,False,False,False,False,True


In [58]:
y_train.head()

249    2
433    1
19     1
322    1
332    0
Name: Courses Enrolled, dtype: int64

In [60]:
# Function to predict courses from a list of independent variables
def recommend_course(input_list, model, course_mapping):
    # Ensure input is in the right format for prediction (2D array)
    input_data = [input_list]
    
    # Predict course index
    predicted_course_index = model.predict(input_data)[0]
    
    # Map the predicted index to course name
    recommended_course = course_mapping[predicted_course_index]
    
    return recommended_course

# Example of using the model to predict for a new user
# Replace these values with actual values from the independent variables (same order as X columns)
# new_user_data = [3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 5.0]  # Example list of independent variables


new_user_data = [
    3.0,  # Time commitment (hrs/week)
    1.0,  # Experience Level_Advanced
    0.0,  # Experience Level_Beginner
    0.0,  # Experience Level_Intermediate
    0.0,  # Preferred Learning Mode_Hybrid
    1.0,  # Preferred Learning Mode_Instructor-led
    0.0,  # Preferred Learning Mode_Self-paced
    0.0,  # Area of Interest_Data Science
    1.0,  # Area of Interest_Front-End Development
    0.0,  # Area of Interest_Full-Stack Development
    0.0,  # Area of Interest_Software Development
    0.0,  # Technologies of Interest_HTML, CSS, JavaScript
    1.0,  # Technologies of Interest_Java, Python
    0.0,  # Technologies of Interest_JavaScript, Node.js, React
    0.0   # Technologies of Interest_Python, R, SQL
]
# Load the model from the pickle file
model = pickle.load(open("model.pkl", "rb"))

# Get the recommended course for the new user
recommended_course = recommend_course(new_user_data, model, course_mapping)

print(f"Recommended course: {recommended_course}")

Recommended course: Data Science with Python


