In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Example dataset
data = pd.DataFrame({
    'GPA': [3.2, 3.8, 2.7, 3.4, 3.0],
    'attendance_rate': [85, 92, 70, 88, 78],
    'income': [30000, 50000, 20000, 40000, 25000],
    'demographics': ['urban', 'rural', 'urban', 'suburban', 'rural'],
    'enrollment_status': [1, 1, 0, 1, 0]
})

# Preprocessing: Convert categorical features to numeric
data['demographics'] = data['demographics'].astype('category').cat.codes

# Features and target variable
X = data[['GPA', 'attendance_rate', 'income', 'demographics']]
y = data['enrollment_status']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the model
model = RandomForestClassifier(random_state=42)

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Get feature importances
importances = pd.DataFrame({
    'Feature': X.columns,
    'Importance': model.feature_importances_
}).sort_values(by='Importance', ascending=False)

print("\nFeature Importances:\n", importances)
