**Importing The Packages**

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import joblib
from matplotlib.gridspec import GridSpec
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, classification_report, precision_score,precision_recall_fscore_support, roc_auc_score, log_loss, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import plot_tree

**Reading The Dataset**

In [3]:
data = pd.read_csv(r'F:\brightfuture\ml_models\dropout_prediction\dropout_dataset_final.csv')

columns = ['Marital status','Application mode','Application order','Course','Previous qualification','Nacionality','Mother\'s qualification','Father\'s qualification','Mother\'s occupation',
           'Father\'s occupation','Displaced','Educational special needs','Debtor','Gender','Age at enrollment','International','Curricular units 1st sem (credited)',
           'Curricular units 1st sem (enrolled)','Curricular units 1st sem (evaluations)','Curricular units 1st sem (approved)','Curricular units 1st sem (without evaluations)',
           'Curricular units 2nd sem (credited)','Curricular units 2nd sem (enrolled)','Curricular units 2nd sem (evaluations)','Curricular units 2nd sem (approved)','Curricular units 2nd sem (without evaluations)',
           'Unemployment rate','Inflation rate','GDP','Scholarship holder'
           ]

data.drop(columns,axis=1,inplace = True)
data.head(5)

Unnamed: 0,Daytime/evening attendance,Tuition fees up to date,Curricular units 1st sem (grade),Curricular units 2nd sem (grade),Target
0,1,1,0.0,0.0,Dropout
1,1,0,4.666667,4.555556,Graduate
2,1,0,0.0,0.0,Dropout
3,1,1,4.47619,4.133333,Graduate
4,0,1,4.111111,4.333333,Graduate


**Data Preprocessing**

In [4]:
#Removed rows which the target variable set to enrolled
data_filtered = data[data['Target'] != 'Enrolled']
data_filtered.shape

(3630, 5)

In [5]:
#Mapping binary values 0 and 1 to the target variables
data_no_outliers = data_filtered.copy()

data_no_outliers['Target'] = data_no_outliers['Target'].map({
    'Dropout': 0,
    'Graduate': 1
})
data_no_outliers.head(5)

Unnamed: 0,Daytime/evening attendance,Tuition fees up to date,Curricular units 1st sem (grade),Curricular units 2nd sem (grade),Target
0,1,1,0.0,0.0,0
1,1,0,4.666667,4.555556,1
2,1,0,0.0,0.0,0
3,1,1,4.47619,4.133333,1
4,0,1,4.111111,4.333333,1


**Model Selection and Training**

In [6]:
X = data_no_outliers.drop('Target', axis=1)
y = data_no_outliers['Target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

Names=list(X_train.columns.values)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

**Random Forest**

In [7]:
rf = RandomForestClassifier()
param_grid = {'n_estimators': [50, 100, 150, 200], 'max_depth': [5, 10, 20, 30]}
grid_search = GridSearchCV(rf, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
print("Best Parameters:", best_params)

best_rf = RandomForestClassifier(n_estimators=best_params['n_estimators'], max_depth=best_params['max_depth'])
best_rf.fit(X_train, y_train)

#Predicting the model
y_pred = best_rf.predict(X_test)

#Printing the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
precision, recall, f1, _ = precision_recall_fscore_support(y_test, y_pred, average='weighted')

print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-score: {f1:.2f}")

Best Parameters: {'max_depth': 5, 'n_estimators': 100}
Accuracy: 0.86
Precision: 0.86
Recall: 0.86
F1-score: 0.85


In [8]:
#Saving the model
joblib.dump(best_rf,"dropout-model.joblib")

['dropout-model.joblib']