In [21]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, classification_report
import joblib

In [22]:
# Load your dataset 
dataset = pd.read_csv('data.csv')

In [23]:
#load the dataset
# 1. Identify the target variable and features
X = dataset[['age', 'gender', 'score', 'other_features']]
y = dataset['enrollment_status']  # Binary: 1 for enrolled, 0 for not enrolled


In [27]:
# 2. Preprocess the data
# Use one-hot encoding for categorical variables like 'gender' and 'other_features'
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), ['age', 'score']),
        ('cat_gender', OneHotEncoder(), ['gender']),
        ('cat_other_features', OneHotEncoder(), ['other_features'])
    ])


In [28]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)


In [31]:
# Convert the transformed data back to a DataFrame
X_train = pd.DataFrame(preprocessor.fit_transform(X_train), columns=['age', 'score', 'gender_M', 'gender_F', 'other_feature_1', 'other_feature_2'])
X_test = pd.DataFrame(preprocessor.transform(X_test), columns=['age', 'score', 'gender_M', 'gender_F', 'other_feature_1', 'other_feature_2'])

ValueError: Specifying the columns using strings is only supported for pandas DataFrames

In [None]:
# 3. Train the model
classifier = RandomForestClassifier(n_estimators=200, random_state=0)
classifier.fit(X_train, y_train)

In [None]:
# 4. Make predictions and evaluate the model
y_pred = classifier.predict(X_test)

In [None]:
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))


In [None]:
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

In [None]:
# 5. Save the model for future use
joblib.dump(classifier, 'enrollment_model.pkl')

In [None]:
# Optional: Load the model later if needed
loaded_model = joblib.load('enrollment_model.pkl')