In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import pandas as pd

df = pd.read_csv("College Data.csv")

df["Female"] = df["Total Students"] - df["Male"]
df["Female"] = df["Female"].apply(lambda x: max(x, 0))
df["CGPA"] = df["CGPA"].clip(lower=4.0, upper=10.0)
df = df[df["Faculty Count"] <= df["Total Students"] / 10]

features = ["Total Students", "Male", "Female", "CGPA", "Annual Family Income", 
            "Research Papers Published", "Placement Rate", "Faculty Count"]
X = df[features]
y = df["Country"]


le = LabelEncoder()
y = le.fit_transform(y)


scaler = StandardScaler()
X = scaler.fit_transform(X)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

def predict_country(student_data):
    """
    student_data: List of values in the same order as feature columns
    """
    student_data = np.array(student_data).reshape(1, -1)
    student_data = scaler.transform(student_data)
    prediction = model.predict(student_data)
    return le.inverse_transform(prediction)[0]


print("Enter your details to find the best-suited country:")
total_students = int(input("Total number of students in your preferred college: "))
male_students = int(input("Expected number of male students: "))
female_students = total_students - male_students
cgpa = float(input("Your CGPA (4.0 - 10.0 scale): "))
family_income = int(input("Your annual family income: "))
research_papers = int(input("Number of research papers you have published: "))
placement_rate = float(input("Expected placement rate in percentage (e.g., 90.0 for 90%): "))
faculty_count = int(input("Expected faculty count in the college: "))


example_student = [total_students, male_students, female_students, cgpa, family_income, research_papers, placement_rate, faculty_count]
print("Best-suited country:", predict_country(example_student))