In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Sample data generation (You should replace this with your actual dataset)
data = {
    'class_10_percentage': [85, 78, 92, 65, 74, 88, 90, 55, 79, 94],
    'class_12_percentage': [82, 75, 89, 68, 72, 85, 87, 58, 76, 90],
    'college_cgpa_year1': [8.4, 7.6, 9.1, 6.4, 7.3, 8.7, 8.9, 5.3, 7.8, 9.3],
    'college_cgpa_year2': [8.5, 7.7, 9.2, 6.5, 7.4, 8.8, 9.0, 5.4, 7.9, 9.4],
    'college_cgpa_year3': [8.6, 7.8, 9.3, 6.6, 7.5, 8.9, 9.1, 5.5, 8.0, 9.5],
    'number_of_internships': [2, 1, 3, 0, 1, 2, 3, 0, 1, 3],
    'gender': ['Male', 'Female', 'Male', 'Female', 'Female', 'Male', 'Male', 'Female', 'Male', 'Female'],
    'number_of_backlogs': [0, 2, 0, 3, 1, 0, 0, 4, 1, 0],
    'number_of_certifications': [2, 1, 3, 0, 1, 2, 3, 0, 1, 3],
    'branch': ['CSE', 'ECE', 'CSE', 'IT', 'AIDS', 'CSE', 'ECE', 'IT', 'AIDS', 'CSE'],
    'placed': [1, 0, 1, 0, 0, 1, 1, 0, 0, 1]  # 1: Placed, 0: Not Placed
}

# Create DataFrame
df = pd.DataFrame(data)

# Calculate average college CGPA
df['college_cgpa'] = df[['college_cgpa_year1', 'college_cgpa_year2', 'college_cgpa_year3']].mean(axis=1)

# Convert categorical gender and branch features to numerical
df['gender'] = df['gender'].map({'Male': 1, 'Female': 0})
df['branch'] = df['branch'].str.upper()  # Convert branch to uppercase for consistency

df = pd.get_dummies(df, columns=['branch'])  # Convert branch to one-hot encoding

# Features and target variable
X = df[['class_10_percentage', 'class_12_percentage', 'college_cgpa', 'number_of_internships', 'gender', 'number_of_backlogs', 'number_of_certifications', 'branch_CSE', 'branch_ECE', 'branch_IT', 'branch_AIDS']]
y = df['placed']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy * 100:.2f}%')
print('Classification Report:')
print(report)

# Function to predict placement probability
def predict_placement(class_10_percentage, class_12_percentage, college_cgpa_year1, college_cgpa_year2, college_cgpa_year3, number_of_internships, gender, number_of_backlogs, number_of_certifications, branch):
    gender = 1 if gender.lower() == 'male' else 0
    college_cgpa = (college_cgpa_year1 + college_cgpa_year2 + college_cgpa_year3) / 3
    branch = branch.upper()  # Convert branch input to uppercase
    branch_columns = ['branch_CSE', 'branch_ECE', 'branch_IT', 'branch_AIDS']  # Adjust based on your branch categories
    branch_index = branch_columns.index(f'branch_{branch}')
    features = [[class_10_percentage, class_12_percentage, college_cgpa, number_of_internships, gender, number_of_backlogs, number_of_certifications] + [0] * len(branch_columns)]
    features[0][6 + branch_index] = 1
    probability = model.predict_proba(features)[0][1]
    return probability * 100

# Function to get valid user input
def get_input(prompt, type_func, condition):
    while True:
        try:
            value = type_func(input(prompt))
            if condition(value):
                return value
            else:
                print("Invalid input. Please try again.")
        except ValueError:
            print("Invalid input. Please try again.")

# Get user inputs
class_10_percentage = get_input("Enter Class 10 percentage: ", float, lambda x: 0 <= x <= 100)
class_12_percentage = get_input("Enter Class 12 percentage: ", float, lambda x: 0 <= x <= 100)
college_cgpa_year1 = get_input("Enter College CGPA for Year 1: ", float, lambda x: 0 <= x <= 10)
college_cgpa_year2 = get_input("Enter College CGPA for Year 2: ", float, lambda x: 0 <= x <= 10)
college_cgpa_year3 = get_input("Enter College CGPA for Year 3: ", float, lambda x: 0 <= x <= 10)
number_of_internships = get_input("Enter Number of Internships: ", int, lambda x: x >= 0)
gender = get_input("Enter Gender (Male/Female): ", str, lambda x: x.lower() in ['male', 'female'])
number_of_backlogs = get_input("Enter Number of Backlogs: ", int, lambda x: x >= 0)
number_of_certifications = get_input("Enter Number of Certification Courses: ", int, lambda x: x >= 0)
branch = get_input("Enter Branch (CSE/ECE/IT/AIDS): ", str, lambda x: x.upper() in ['CSE', 'ECE', 'IT', 'AIDS'])

# Predict and display result
probability = predict_placement(class_10_percentage, class_12_percentage, college_cgpa_year1, college_cgpa_year2, college_cgpa_year3, number_of_internships, gender, number_of_backlogs, number_of_certifications, branch)
print(f'The student has a {probability:.2f}% chance of being placed.')


Accuracy: 100.00%
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         2

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2



Enter Class 10 percentage:  23
Enter Class 12 percentage:  11
Enter College CGPA for Year 1:  2.4
Enter College CGPA for Year 2:  4.5
Enter College CGPA for Year 3:  3.2
Enter Number of Internships:  0
Enter Gender (Male/Female):  male
Enter Number of Backlogs:  2
Enter Number of Certification Courses:  0
Enter Branch (CSE/ECE/IT/AIDS):  CSE


The student has a 17.00% chance of being placed.


