In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load the data
data = pd.read_excel("Data_set.xlsx")

# Split the data into features (X) and target (y)
X = data.drop(columns=['Uni_name', 'Status'])
y = data['Uni_name']

# Convert categorical variables into dummy/indicator variables
X = pd.get_dummies(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Hyperparameter tuning using GridSearchCV
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_features': ['auto', 'sqrt', 'log2'],
    'max_depth': [10, 20, 30, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

clf = RandomForestClassifier(random_state=42)
grid_search = GridSearchCV(estimator=clf, param_grid=param_grid, cv=5, n_jobs=-1, verbose=2)
grid_search.fit(X_train, y_train)

# Best parameters from GridSearchCV
best_clf = grid_search.best_estimator_

# Calculate the accuracy of the best model on the test set
y_pred = best_clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy after tuning: {accuracy:.2f}")

def recommend_universities():
    # Gather student details
    student_details = {}
    student_details['Subject'] = input("Subject: ")
    student_details['Prev_Uni'] = input("Previous University: ")
    student_details['CGPA'] = float(input("CGPA: "))
    student_details['12th %'] = float(input("12th Percentage: "))
    student_details['10th %'] = float(input("10th Percentage: "))
    student_details['IELTS Listening'] = float(input("IELTS Listening: "))
    student_details['IELTS Reading'] = float(input("IELTS Reading: "))
    student_details['IELTS Writing'] = float(input("IELTS Writing: "))
    student_details['IELTS Speaking'] = float(input("IELTS Speaking: "))
    student_details['Overall Bands'] = float(input("Overall Bands: "))
    student_details['Uni_fees'] = float(input("University Fees: "))
    student_details['Location'] = input("Location: ")
    student_details['Course'] = input("Course: ")

    # Convert the student details into a DataFrame and encode it
    student_df = pd.DataFrame([student_details])
    student_encoded = pd.get_dummies(student_df)
    student_encoded = student_encoded.reindex(columns=X_train.columns, fill_value=0)

    # Predict the university for the student
    prediction = best_clf.predict(student_encoded)
    return prediction

# Recommend universities
recommended_universities = recommend_universities()
print("Recommended Universities:", recommended_universities)


Fitting 5 folds for each of 324 candidates, totalling 1620 fits
