## Import Dataset

In [1]:
import numpy as np
import pandas as pd

In [2]:
data = pd.read_csv(r'data/student-mat.csv', sep = ';')

In [3]:
data.head()

Unnamed: 0,school,sex,age,address,famsize,Pstatus,Medu,Fedu,Mjob,Fjob,...,famrel,freetime,goout,Dalc,Walc,health,absences,G1,G2,G3
0,GP,F,18,U,GT3,A,4,4,at_home,teacher,...,4,3,4,1,1,3,6,5,6,6
1,GP,F,17,U,GT3,T,1,1,at_home,other,...,5,3,3,1,1,3,4,5,5,6
2,GP,F,15,U,LE3,T,1,1,at_home,other,...,4,3,2,2,3,3,10,7,8,10
3,GP,F,15,U,GT3,T,4,2,health,services,...,3,2,2,1,1,5,2,15,14,15
4,GP,F,16,U,GT3,T,3,3,other,other,...,4,3,2,1,2,5,4,6,10,10


## Pre-processing

In [4]:
from sklearn import preprocessing

In [5]:
le = preprocessing.LabelEncoder()

In [6]:
for column in data.columns:
    if isinstance(data[column][0], str):
        le.fit(data[column])
        data[column] = le.transform(data[column])

In [7]:
data.head()

Unnamed: 0,school,sex,age,address,famsize,Pstatus,Medu,Fedu,Mjob,Fjob,...,famrel,freetime,goout,Dalc,Walc,health,absences,G1,G2,G3
0,0,0,18,1,0,0,4,4,0,4,...,4,3,4,1,1,3,6,5,6,6
1,0,0,17,1,0,1,1,1,0,2,...,5,3,3,1,1,3,4,5,5,6
2,0,0,15,1,1,1,1,1,0,2,...,4,3,2,2,3,3,10,7,8,10
3,0,0,15,1,0,1,4,2,1,3,...,3,2,2,1,1,5,2,15,14,15
4,0,0,16,1,0,1,3,3,2,2,...,4,3,2,1,2,5,4,6,10,10


In [8]:
X = data.drop(['G3'], axis = 1)

In [9]:
y = data['G3']

In [10]:
X.head()

Unnamed: 0,school,sex,age,address,famsize,Pstatus,Medu,Fedu,Mjob,Fjob,...,romantic,famrel,freetime,goout,Dalc,Walc,health,absences,G1,G2
0,0,0,18,1,0,0,4,4,0,4,...,0,4,3,4,1,1,3,6,5,6
1,0,0,17,1,0,1,1,1,0,2,...,0,5,3,3,1,1,3,4,5,5
2,0,0,15,1,1,1,1,1,0,2,...,0,4,3,2,2,3,3,10,7,8
3,0,0,15,1,0,1,4,2,1,3,...,1,3,2,2,1,1,5,2,15,14
4,0,0,16,1,0,1,3,3,2,2,...,0,4,3,2,1,2,5,4,6,10


In [11]:
y.head()

0     6
1     6
2    10
3    15
4    10
Name: G3, dtype: int64

## Classifiers

In [12]:
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB

In [13]:
names = ["Nearest Neighbors", "Linear SVM", "RBF SVM",
         "Decision Tree", "Random Forest", "Neural Net", "AdaBoost",
         "Naive Bayes"]

In [14]:
classifiers = [
    KNeighborsClassifier(3),
    SVC(kernel="linear", C=0.025),
    SVC(gamma=2, C=1),
    DecisionTreeClassifier(max_depth=5),
    RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
    MLPClassifier(alpha=1, hidden_layer_sizes = (50, 20)),
    AdaBoostClassifier(),
    GaussianNB()]

In [15]:
from sklearn.model_selection import cross_val_score

In [16]:
for name, clf in zip(names, classifiers):
    scores = cross_val_score(clf, X, y, cv=5)
    print(name, scores)



Nearest Neighbors [0.23595506 0.29268293 0.36363636 0.36486486 0.24657534]
Linear SVM [0.43820225 0.3902439  0.46753247 0.40540541 0.50684932]
RBF SVM [0.13483146 0.13414634 0.14285714 0.14864865 0.15068493]
Decision Tree [0.35955056 0.46341463 0.4025974  0.37837838 0.52054795]
Random Forest [0.17977528 0.12195122 0.20779221 0.24324324 0.34246575]




Neural Net [0.37078652 0.31707317 0.36363636 0.32432432 0.31506849]
AdaBoost [0.30337079 0.36585366 0.28571429 0.25675676 0.24657534]
Naive Bayes [0.28089888 0.35365854 0.18181818 0.28378378 0.17808219]


