In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
import sklearn.datasets
import sklearn.metrics
from pprint import pprint
import autosklearn.classification

In [2]:
df_sample = pd.read_csv("sample_submission.csv")
df_test = pd.read_csv("test.csv")
df_train = pd.read_csv("train.csv")
df_validation = pd.read_csv("validation.csv")
X_train = df_train.drop(columns="Category")
y_train = df_train["Category"]
submission_id = df_sample["Id"]

In [3]:
automl = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=120,
    per_run_time_limit=30,
    tmp_folder="/tmp/autosklearn_resampling_example_tmp",
    disable_evaluator_output=False,
    resampling_strategy="cv",
    resampling_strategy_arguments={"folds": 5},
)
automl.fit(X_train, y_train, dataset_name="student_dropout")

AutoSklearnClassifier(ensemble_class=<class 'autosklearn.ensembles.ensemble_selection.EnsembleSelection'>,
                      per_run_time_limit=30, resampling_strategy='cv',
                      resampling_strategy_arguments={'folds': 5},
                      time_left_for_this_task=120,
                      tmp_folder='/tmp/autosklearn_resampling_example_tmp')

In [4]:
print(automl.leaderboard())

          rank  ensemble_weight                type      cost   duration
model_id                                                                
2            1             0.62       random_forest  0.223344   8.079734
3            2             0.16       random_forest  0.249192  10.253532
4            3             0.02   gradient_boosting  0.255250  27.758085
5            4             0.10   gradient_boosting  0.256058   2.694634
7            5             0.10  passive_aggressive  0.262520   2.042094


In [None]:
pprint(automl.cv_results_)

In [5]:
predictions = automl.predict(X_train)
print("Accuracy score:", sklearn.metrics.accuracy_score(y_train, predictions))

Accuracy score: 0.9721324717285945


In [6]:
print("After re-fit")
automl.refit(X_train.copy(), y_train.copy())
predictions = automl.predict(X_train)
print("Accuracy score CV", sklearn.metrics.accuracy_score(y_train, predictions))

After re-fit
Accuracy score CV 0.9923263327948304


In [7]:
predictions = automl.predict(df_test)
print(predictions)
df_prediction = pd.DataFrame({"Id": submission_id, "Category": predictions})
df_prediction.to_csv("submission.csv", index=False)

['Dropout' 'Graduate' 'Graduate' 'Dropout' 'Graduate' 'Graduate'
 'Enrolled' 'Dropout' 'Graduate' 'Enrolled' 'Graduate' 'Graduate'
 'Graduate' 'Graduate' 'Dropout' 'Dropout' 'Graduate' 'Graduate'
 'Enrolled' 'Dropout' 'Graduate' 'Dropout' 'Graduate' 'Graduate'
 'Enrolled' 'Dropout' 'Dropout' 'Graduate' 'Graduate' 'Graduate'
 'Graduate' 'Graduate' 'Dropout' 'Dropout' 'Enrolled' 'Graduate'
 'Graduate' 'Dropout' 'Enrolled' 'Graduate' 'Graduate' 'Graduate'
 'Graduate' 'Graduate' 'Graduate' 'Graduate' 'Graduate' 'Graduate'
 'Graduate' 'Dropout' 'Dropout' 'Graduate' 'Graduate' 'Enrolled' 'Dropout'
 'Dropout' 'Graduate' 'Graduate' 'Graduate' 'Enrolled' 'Graduate'
 'Graduate' 'Graduate' 'Dropout' 'Graduate' 'Dropout' 'Dropout' 'Dropout'
 'Graduate' 'Graduate' 'Graduate' 'Dropout' 'Graduate' 'Graduate'
 'Dropout' 'Dropout' 'Dropout' 'Graduate' 'Graduate' 'Graduate' 'Graduate'
 'Enrolled' 'Enrolled' 'Dropout' 'Graduate' 'Dropout' 'Graduate'
 'Graduate' 'Dropout' 'Graduate' 'Dropout' 'Graduate' '

In [10]:
df_train

Unnamed: 0,Id,Marital status,Application mode,Application order,Course,Daytime/evening attendance,Previous qualification,Nacionality,Mother's qualification,Father's qualification,...,Curricular units 2nd sem (credited),Curricular units 2nd sem (enrolled),Curricular units 2nd sem (evaluations),Curricular units 2nd sem (approved),Curricular units 2nd sem (grade),Curricular units 2nd sem (without evaluations),Unemployment rate,Inflation rate,GDP,Category
0,3222,1,1,1,14,1,1,1,13,28,...,0,6,7,6,14.714286,0,11.1,0.6,2.02,Graduate
1,2388,1,8,4,9,1,1,1,23,14,...,0,5,10,2,12.500000,0,11.1,0.6,2.02,Dropout
2,1865,1,12,1,17,0,1,1,1,1,...,0,5,5,5,12.400000,0,15.5,2.8,-4.06,Graduate
3,652,1,12,1,12,1,12,1,1,5,...,0,8,11,7,12.614286,0,12.7,3.7,-1.70,Graduate
4,4220,1,1,1,14,1,1,1,19,28,...,0,5,5,0,0.000000,0,9.4,-0.8,-3.12,Dropout
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2471,4286,1,1,1,6,1,1,1,22,14,...,0,5,9,4,13.666667,1,10.8,1.4,1.74,Graduate
2472,4399,1,1,1,11,1,1,1,22,14,...,0,6,6,6,14.333333,0,9.4,-0.8,-3.12,Graduate
2473,3526,2,12,1,4,1,1,1,1,3,...,2,7,7,5,11.200000,0,8.9,1.4,3.51,Dropout
2474,1050,1,1,1,2,1,1,1,1,28,...,0,0,0,0,0.000000,0,13.9,-0.3,0.79,Graduate
