In [19]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
import pandas as pd

file_path = "college_recommendation_dataset.csv"
df = pd.read_csv(file_path)



In [20]:
label_encoders = {}
for column in df.select_dtypes(include="object").columns:
    if column != "Target College":  
        le = LabelEncoder()
        df[column] = le.fit_transform(df[column])
        label_encoders[column] = le

target_encoder = LabelEncoder()
df["Target College"] = target_encoder.fit_transform(df["Target College"])

X = df.drop("Target College", axis=1)  
y = df["Target College"] 

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier(n_estimators=100, random_state=42)

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, target_names=target_encoder.classes_)

print(f"Accuracy: {accuracy}")
print("Classification Report:")
print(report)



Accuracy: 0.9675
Classification Report:
                               precision    recall  f1-score   support

              Business School       1.00      0.94      0.97       155
       Design and Arts School       0.96      0.91      0.93       348
          Engineering College       1.00      0.94      0.97       135
         Liberal Arts College       0.99      1.00      1.00       258
              Medical College       1.00      1.00      1.00       307
Technology-focused University       0.94      1.00      0.97       660
Vocational Training Institute       0.96      0.90      0.93       137

                     accuracy                           0.97      2000
                    macro avg       0.98      0.95      0.97      2000
                 weighted avg       0.97      0.97      0.97      2000



In [21]:
import joblib

model_path = "college_recommendation_model.pkl"
label_encoders_path = "college_label_encoders.pkl"
target_encoder_path = "college_target_encoder.pkl"

joblib.dump(model, model_path)
joblib.dump(label_encoders, label_encoders_path)
joblib.dump(target_encoder, target_encoder_path)

print(f"Model saved at: {model_path}")
print(f"Label encoders saved at: {label_encoders_path}")
print(f"Target encoder saved at: {target_encoder_path}")


Model saved at: college_recommendation_model.pkl
Label encoders saved at: college_label_encoders.pkl
Target encoder saved at: college_target_encoder.pkl
