<a href="https://colab.research.google.com/github/NellyKe/CapstoneProject/blob/main/Untitled10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder

# Load the synthetic data
file_path = "/content/Student_Enrollment_Prediction.csv"  # Ensure the file is in the same directory or update the path
data = pd.read_csv(file_path)

# Encode categorical variables
label_encoders = {}
for column in ["Gender", "Socioeconomic_Status", "Program_Interest"]:
    le = LabelEncoder()
    data[column] = le.fit_transform(data[column])
    label_encoders[column] = le

# Split features and target
features = [
    "Age",
    "Gender",
    "Socioeconomic_Status",
    "Previous_Grades",
    "Attendance_Rate",
    "Test_Scores",
    "Program_Interest",
    "Communication_Frequency",
]
X = data[features]
y_enroll = data["Enrolled"]
y_graduate = data["Graduated"]

# Split data for enrollment prediction
X_train_enroll, X_test_enroll, y_train_enroll, y_test_enroll = train_test_split(
    X, y_enroll, test_size=0.2, random_state=42
)

# Split data for graduation prediction
X_train_grad, X_test_grad, y_train_grad, y_test_grad = train_test_split(
    X, y_graduate, test_size=0.2, random_state=42
)

# Train a Random Forest Classifier for enrollment prediction
rf_enroll = RandomForestClassifier(random_state=42)
rf_enroll.fit(X_train_enroll, y_train_enroll)

# Train a Random Forest Classifier for graduation prediction
rf_grad = RandomForestClassifier(random_state=42)
rf_grad.fit(X_train_grad, y_train_grad)

# Make predictions for enrollment
y_pred_enroll = rf_enroll.predict(X_test_enroll)

# Make predictions for graduation
y_pred_grad = rf_grad.predict(X_test_grad)

# Evaluate models
enroll_report = classification_report(y_test_enroll, y_pred_enroll)
graduate_report = classification_report(y_test_grad, y_pred_grad)

# Save results to DataFrame
results = pd.DataFrame({
    "Actual_Enrollment": y_test_enroll.values,
    "Predicted_Enrollment": y_pred_enroll,
    "Actual_Graduation": y_test_grad.values,
    "Predicted_Graduation": y_pred_grad,
})

# Display Predictions and Reports
print("\n--- Enrollment and Graduation Predictions ---")
print(results.head())

print("\n--- Enrollment Prediction Report ---")
print(enroll_report)

print("\n--- Graduation Prediction Report ---")
print(graduate_report)

# Save to CSV
results_file_path = "Enrollment_Graduation_Predictions.csv"
results.to_csv(results_file_path, index=False)
print(f"\nPredictions saved to: {results_file_path}")


--- Enrollment and Graduation Predictions ---
   Actual_Enrollment  Predicted_Enrollment  Actual_Graduation  \
0                  0                     1                  0   
1                  0                     1                  1   
2                  1                     1                  1   
3                  0                     1                  1   
4                  1                     1                  0   

   Predicted_Graduation  
0                     1  
1                     1  
2                     1  
3                     1  
4                     0  

--- Enrollment Prediction Report ---
              precision    recall  f1-score   support

           0       0.25      0.07      0.11        29
           1       0.71      0.92      0.80        71

    accuracy                           0.67       100
   macro avg       0.48      0.49      0.45       100
weighted avg       0.57      0.67      0.60       100


--- Graduation Prediction Report ---
   