In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import joblib
from sklearn.preprocessing import LabelEncoder
from collections import Counter
import xgboost as xgb
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
import sklearn
from sklearn.metrics import precision_score, recall_score, f1_score


In [None]:
sklearn.__version__

'1.2.2'

In [None]:
df = pd.read_csv('/content/courses_new_new.csv')
college_df = pd.read_csv('/content/Book1.csv')

In [None]:
X = df[['Logical Reasoning', 'Communication Skills', 'Quantitative Skills',
        'Analytical Skills', 'English','Maths','Physics','Biology','Chemistry','Accounting','Economics','Business Studies','History','Sociology','Politics','High School CGPA', 'Stream_Point']]
y = df['Recommended Course']

# Encode the target labels to numerical values
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Create and train XGBoost classifier
xgb_classifier = xgb.XGBClassifier(random_state=42)
xgb_classifier.fit(X_train, y_train)

# Make predictions on the test data using the XGBoost model
y_pred_xgb = xgb_classifier.predict(X_test)

# Create and train K-Nearest Neighbors classifier
knn_classifier = KNeighborsClassifier(n_neighbors=3)
knn_classifier.fit(X_train, y_train)

# Make predictions on the test data using the K-Nearest Neighbors model
y_pred_knn = knn_classifier.predict(X_test)

# Create and train Support Vector Machine classifier
svm_classifier = SVC(kernel='linear', random_state=42)
svm_classifier.fit(X_train, y_train)

# Make predictions on the test data using the Support Vector Machine model
y_pred_svm = svm_classifier.predict(X_test)

accuracy_xgb = accuracy_score(y_test, y_pred_xgb)
accuracy_knn = accuracy_score(y_test, y_pred_knn)
accuracy_svm = accuracy_score(y_test, y_pred_svm)
# Create an ensemble model to combine predictions using majority voting
ensemble_predictions = []
for i in range(len(y_test)):
    predictions = [y_pred_xgb[i], y_pred_knn[i], y_pred_svm[i]]
    majority_vote = Counter(predictions).most_common(1)[0][0]
    ensemble_predictions.append(majority_vote)


# Calculate the accuracy of the ensemble model
print(f"Accuracy of XGBoost classifier: {accuracy_xgb:.2f}")
print(f"Accuracy of K-Nearest Neighbors classifier: {accuracy_knn:.2f}")
print(f"Accuracy of Support Vector Machine classifier: {accuracy_svm:.2f}")
accuracy = accuracy_score(y_test, ensemble_predictions)
print(f"Accuracy of the Ensemble model: {accuracy:.2f}")


import tabulate

def predict_and_recommend(input_data):
    # Make predictions on the input data using individual classifiers
    new_data_predictions_xgb = xgb_classifier.predict(input_data)
    new_data_predictions_knn = knn_classifier.predict(input_data)
    new_data_predictions_svm = svm_classifier.predict(input_data)

    # Combine the predictions using majority voting for the new data
    predictions = [new_data_predictions_xgb[0], new_data_predictions_knn[0], new_data_predictions_svm[0]]
    top_three = [x[0] for x in Counter(predictions).most_common(3)]

    # Convert the predicted labels back to their original class labels
    predicted_courses = label_encoder.inverse_transform(top_three)

    # Get the top three recommended courses
    recommended_courses = list(predicted_courses)


    # Print recommended courses
    print("Recommended Courses:")
    for course in recommended_courses:
        print(course)

    # Filter recommended colleges based on the top three predicted courses
    recommended_colleges = []

    for predicted_course in recommended_courses:
        colleges_for_course = college_df[college_df['Recommended Course'] == predicted_course]
        recommended_colleges.append(colleges_for_course)

    # Sort the recommended colleges by NIRF ranking for each course
    sorted_recommended_colleges = [colleges.sort_values(by='NIRF_Ranking_2023') for colleges in recommended_colleges]

    # Print colleges for each recommended course in a table format
    for i, course in enumerate(recommended_courses):
        print(f"\nTop 5 Colleges for {course}:")
        college_table = sorted_recommended_colleges[i][['College', 'NIRF_Ranking_2023']].head(5)
        #print(tabulate.tabulate(college_table, headers='keys', tablefmt='grid'))
        print(college_table)

new_data = [[10, 10, 10, 0,100,100,100,100,100,0,0,0,0,0,0, 9, 8]]
predict_and_recommend(new_data)




#joblib.dump(predict_and_recommend, 'course_recommendation_modell.pkl')


Accuracy of XGBoost classifier: 0.85
Accuracy of K-Nearest Neighbors classifier: 0.08
Accuracy of Support Vector Machine classifier: 0.23
Accuracy of the Ensemble model: 0.80
Recommended Courses:
Physics (B.Sc)
Psychology (B.Sc)

Top 5 Colleges for Physics (B.Sc):
                                               College  NIRF_Ranking_2023
110            Indian Institute of Science (Bangalore)                  1
111                        University of Delhi (Delhi)                  2
112    Indian Institute of Technology Madras (Chennai)                  3
113     Indian Institute of Technology Bombay (Mumbai)                  4
114  Indian Institute of Technology Kharagpur (Khar...                  6

Top 5 Colleges for Psychology (B.Sc):
                                          College  NIRF_Ranking_2023
105  Indian Institute of Technology Delhi (Delhi)                  4
107                 St. Stephen's College (Delhi)                  5
106                         Miranda House (De



In [10]:
joblib.dump(xgb_classifier, 'xgb_classifier.pkl')
joblib.dump(knn_classifier, 'knn_classifier.pkl')
joblib.dump(svm_classifier, 'svm_classifier.pkl')


['svm_classifier.pkl']