## Import Dataset

In [1]:
import numpy as np
import pandas as pd

In [2]:
data = pd.read_csv(r'data/student-mat.csv', sep = ';')

In [3]:
data.head()

Unnamed: 0,school,sex,age,address,famsize,Pstatus,Medu,Fedu,Mjob,Fjob,...,famrel,freetime,goout,Dalc,Walc,health,absences,G1,G2,G3
0,GP,F,18,U,GT3,A,4,4,at_home,teacher,...,4,3,4,1,1,3,6,5,6,6
1,GP,F,17,U,GT3,T,1,1,at_home,other,...,5,3,3,1,1,3,4,5,5,6
2,GP,F,15,U,LE3,T,1,1,at_home,other,...,4,3,2,2,3,3,10,7,8,10
3,GP,F,15,U,GT3,T,4,2,health,services,...,3,2,2,1,1,5,2,15,14,15
4,GP,F,16,U,GT3,T,3,3,other,other,...,4,3,2,1,2,5,4,6,10,10


## Pre-processing

In [4]:
data = pd.get_dummies(data)

In [5]:
data.head()

Unnamed: 0,age,Medu,Fedu,traveltime,studytime,failures,famrel,freetime,goout,Dalc,...,activities_no,activities_yes,nursery_no,nursery_yes,higher_no,higher_yes,internet_no,internet_yes,romantic_no,romantic_yes
0,18,4,4,2,2,0,4,3,4,1,...,1,0,0,1,0,1,1,0,1,0
1,17,1,1,1,2,0,5,3,3,1,...,1,0,1,0,0,1,0,1,1,0
2,15,1,1,1,2,3,4,3,2,2,...,1,0,0,1,0,1,0,1,1,0
3,15,4,2,1,3,0,3,2,2,1,...,0,1,0,1,0,1,0,1,0,1
4,16,3,3,1,2,0,4,3,2,1,...,1,0,0,1,0,1,1,0,1,0


In [6]:
X = data.drop(['G3'], axis = 1)

In [7]:
y = data['G3']

In [8]:
X.head()

Unnamed: 0,age,Medu,Fedu,traveltime,studytime,failures,famrel,freetime,goout,Dalc,...,activities_no,activities_yes,nursery_no,nursery_yes,higher_no,higher_yes,internet_no,internet_yes,romantic_no,romantic_yes
0,18,4,4,2,2,0,4,3,4,1,...,1,0,0,1,0,1,1,0,1,0
1,17,1,1,1,2,0,5,3,3,1,...,1,0,1,0,0,1,0,1,1,0
2,15,1,1,1,2,3,4,3,2,2,...,1,0,0,1,0,1,0,1,1,0
3,15,4,2,1,3,0,3,2,2,1,...,0,1,0,1,0,1,0,1,0,1
4,16,3,3,1,2,0,4,3,2,1,...,1,0,0,1,0,1,1,0,1,0


In [9]:
y.head()

0     6
1     6
2    10
3    15
4    10
Name: G3, dtype: int64

## Classifiers

In [10]:
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB

In [11]:
names = ["Nearest Neighbors", "Linear SVM", "RBF SVM",
         "Decision Tree", "Random Forest", "Neural Net", "AdaBoost",
         "Naive Bayes"]

In [12]:
classifiers = [
    KNeighborsClassifier(3),
    SVC(kernel="linear", C=0.025),
    SVC(gamma=2, C=1),
    DecisionTreeClassifier(max_depth=5),
    RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
    MLPClassifier(alpha=1, hidden_layer_sizes = (50, 20)),
    AdaBoostClassifier(),
    GaussianNB()]

In [13]:
from sklearn.model_selection import cross_val_score

In [14]:
for name, clf in zip(names, classifiers):
    scores = cross_val_score(clf, X, y, cv=5)
    print(name, scores)



Nearest Neighbors [0.31460674 0.25609756 0.2987013  0.31081081 0.2739726 ]
Linear SVM [0.40449438 0.40243902 0.51948052 0.39189189 0.43835616]




RBF SVM [0.13483146 0.13414634 0.14285714 0.14864865 0.15068493]
Decision Tree [0.35955056 0.42682927 0.42857143 0.41891892 0.52054795]




Random Forest [0.14606742 0.17073171 0.16883117 0.17567568 0.24657534]




Neural Net [0.34831461 0.29268293 0.38961039 0.32432432 0.36986301]
AdaBoost [0.30337079 0.36585366 0.28571429 0.25675676 0.24657534]
Naive Bayes [0.14606742 0.18292683 0.12987013 0.13513514 0.10958904]


