In [25]:
# import useful libraries

import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt

In [26]:
# read csv file
df = pd.read_csv('Students.csv')

In [27]:
#remove columns not needed for analysis
df = df[[ 'Gender', 'Age', 'Attendance', 'TestScore']]

In [28]:
#  convert required columns to preferred data types
df['TestScore'] = df['TestScore'].astype(int)

# change the 'male' and 'female' values to 0's and 1's
df['Gender'] = df['Gender'].replace({ 'male': 0, 'female': 1 })

df

Unnamed: 0,Gender,Age,Attendance,TestScore
0,0,58,0.81,64
1,0,54,0.55,35
2,0,50,0.62,60
3,0,34,0.84,69
4,0,58,0.59,36
...,...,...,...,...
9875,0,55,0.59,43
9876,0,70,0.47,26
9877,1,48,0.83,65
9878,0,52,0.81,59


In [29]:
# Binning the data
bin = [0, 56, 72, 100]
df['TestScoreRange'] = pd.cut(df['TestScore'] , bin, right=True)
df

Unnamed: 0,Gender,Age,Attendance,TestScore,TestScoreRange
0,0,58,0.81,64,"(56, 72]"
1,0,54,0.55,35,"(0, 56]"
2,0,50,0.62,60,"(56, 72]"
3,0,34,0.84,69,"(56, 72]"
4,0,58,0.59,36,"(0, 56]"
...,...,...,...,...,...
9875,0,55,0.59,43,"(0, 56]"
9876,0,70,0.47,26,"(0, 56]"
9877,1,48,0.83,65,"(56, 72]"
9878,0,52,0.81,59,"(56, 72]"


In [30]:
# create train and test data

x = df.drop( ['TestScore', 'TestScoreRange'] , axis = 1)
y = df['TestScore']
z = df['TestScoreRange']
x_train, x_test, y_train, y_test, z_train, z_test = train_test_split(x, y, z, test_size = 0.1)

In [31]:
# train the model
model = DecisionTreeClassifier()
model.fit(x_train, y_train)
predicted = model.predict(x_test)
result = pd.DataFrame({  'Actual': y_test, 'Predicted': predicted, 'Range': z_test })
result

Unnamed: 0,Actual,Predicted,Range
9524,51,38,"(0, 56]"
7124,63,87,"(56, 72]"
6398,82,82,"(72, 100]"
9747,84,99,"(72, 100]"
9653,98,44,"(72, 100]"
...,...,...,...
5656,57,58,"(56, 72]"
6145,47,34,"(0, 56]"
46,65,75,"(56, 72]"
7258,72,62,"(56, 72]"


In [32]:
matrix = confusion_matrix(y_test, predicted)
print(matrix)

[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
