### Importing the Needed Libraries

In [63]:
#!pip install pydotplus
import pandas as pd
import numpy as np
from matplotlib import pyplot

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import DecisionTreeRegressor
import random
random.seed(0)

from six import StringIO
from IPython.display import Image
from sklearn.tree import export_graphviz
import pydotplus

import utils

# First: Using Regression Decision Tree as the Dataset Indicates

### Getting the Dataset

In [90]:
Admission_Predict_Dataset=pd.read_csv('Admission_Predict.csv')
Admission_Predict_Dataset

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,1,337,118,4,4.5,4.5,9.65,1,0.92
1,2,324,107,4,4.0,4.5,8.87,1,0.76
2,3,316,104,3,3.0,3.5,8.00,1,0.72
3,4,322,110,3,3.5,2.5,8.67,1,0.80
4,5,314,103,2,2.0,3.0,8.21,0,0.65
...,...,...,...,...,...,...,...,...,...
395,396,324,110,3,3.5,3.5,9.04,1,0.82
396,397,325,107,3,3.0,3.5,9.11,1,0.84
397,398,330,116,4,5.0,4.5,9.45,1,0.91
398,399,312,103,3,3.5,4.0,8.78,0,0.67


### Splitting the Dataset into Features & Labels

In [91]:
features = Admission_Predict_Dataset.drop(['Chance of Admit','Serial No.'],axis=1)
labels = Admission_Predict_Dataset['Chance of Admit']

### Splitting the Dataset into Training, Testing, and Validation Data

In [92]:
features_train, features_validation_test, labels_train, labels_validation_test = train_test_split(
    features, labels, test_size=0.4, random_state=100)

In [93]:
features_validation, features_test, labels_validation, labels_test = train_test_split(
    features_validation_test, labels_validation_test, test_size=0.5, random_state=100)

In [68]:
print(len(features_train))
print(len(features_validation))
print(len(features_test))
print(len(labels_train))
print(len(labels_validation))
print(len(labels_test))

240
80
80
240
80
80


## Decision Tree (with max depth)

In [69]:
dt_regressor = DecisionTreeRegressor(max_depth=4)
dt_regressor.fit(features_train, labels_train)
reg_test_score=dt_regressor.score(features_test, labels_test)
reg_validation_score=dt_regressor.score(features_validation, labels_validation)
print(reg_test_score)
print(reg_validation_score)

0.6162559527402047
0.8497838914837949


### Building a Predictive System

In [70]:
# predict

print(dt_regressor.predict([[335,115,4,5,4.5,9,1]]))

print(dt_regressor.predict([[310,100,3,2,3,8,0]]))

[0.786875]
[0.554]




### Displaying the Decision Tree

In [71]:
utils.display_tree(dt_regressor)

InvocationException: GraphViz's executables not found

## Decision Tree (without any restrictions)

In [72]:
dt_regressor2 = DecisionTreeRegressor()
dt_regressor2.fit(features_train, labels_train)
reg_test_score2=dt_regressor2.score(features_test, labels_test)
reg_validation_score2=dt_regressor2.score(features_validation, labels_validation)
print(reg_test_score2)
print(reg_validation_score2)

0.42490031313304943
0.7606733748383107


### Building a Predictive System

In [73]:
# predict

print(dt_regressor2.predict([[335,115,4,5,4.5,9,1]]))

print(dt_regressor2.predict([[310,100,3,2,3,8,0]]))

[0.81]
[0.62]




### Displaying the Decision Tree

In [74]:
utils.display_tree(dt_regressor2)

InvocationException: GraphViz's executables not found

## Decision Tree (using squared error index)

In [100]:
dt_regressor3 = DecisionTreeRegressor(criterion='squared_error')  #{'absolute_error', 'friedman_mse', 'poisson', 'squared_error'}
dt_regressor3.fit(features_train, labels_train)
reg_test_score3=dt_regressor3.score(features_test, labels_test)
reg_validation_score3=dt_regressor3.score(features_validation, labels_validation)
print(reg_test_score3)
print(reg_validation_score3)

0.3847222081778622
0.7682568282803103


### Building a Predictive System

In [101]:
# predict

print(dt_regressor3.predict([[335,115,4,5,4.5,9,1]]))

print(dt_regressor3.predict([[310,100,3,2,3,8,0]]))

[0.81]
[0.63]




### Displaying the Decision Tree

In [None]:
utils.display_tree(dt_regressor3)

InvocationException: GraphViz's executables not found

# Second: Using Classification Decision Tree After Modifying the Dataset

### Mapping the Dataset

In [76]:
#changing the chance of admition column into categorical data to apply classification decision tree
Admission_Predict_Dataset['Chance of Admit'] = np.where(Admission_Predict_Dataset['Chance of Admit'] < 0.5, 'No', np.where(Admission_Predict_Dataset['Chance of Admit'] == 0.5, 'Unknown', 'Yes'))
Admission_Predict_Dataset

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,1,337,118,4,4.5,4.5,9.65,1,Yes
1,2,324,107,4,4.0,4.5,8.87,1,Yes
2,3,316,104,3,3.0,3.5,8.00,1,Yes
3,4,322,110,3,3.5,2.5,8.67,1,Yes
4,5,314,103,2,2.0,3.0,8.21,0,Yes
...,...,...,...,...,...,...,...,...,...
395,396,324,110,3,3.5,3.5,9.04,1,Yes
396,397,325,107,3,3.0,3.5,9.11,1,Yes
397,398,330,116,4,5.0,4.5,9.45,1,Yes
398,399,312,103,3,3.5,4.0,8.78,0,Yes


### Splitting the Dataset into Features & Labels

In [77]:
features = Admission_Predict_Dataset.drop(['Chance of Admit','Serial No.'],axis=1)
labels = Admission_Predict_Dataset['Chance of Admit']

### Splitting the Dataset into Training, Testing, and Validation Data

In [78]:
features_train, features_validation_test, labels_train, labels_validation_test = train_test_split(
    features, labels, test_size=0.4, random_state=100)

In [79]:
features_validation, features_test, labels_validation, labels_test = train_test_split(
    features_validation_test, labels_validation_test, test_size=0.5, random_state=100)

In [80]:
print(len(features_train))
print(len(features_validation))
print(len(features_test))
print(len(labels_train))
print(len(labels_validation))
print(len(labels_test))

240
80
80
240
80
80


## Decision Tree (using max depth)

In [81]:
dt_classifier = DecisionTreeClassifier(max_depth=7)
dt_classifier.fit(features_train,labels_train)
cls_test_score=dt_classifier.score(features_test,labels_test)
cls_validation_score=dt_classifier.score(features_validation, labels_validation)
print(cls_test_score)
print(cls_validation_score)

0.9125
0.9375


### Building a Predictive System

In [82]:
# predict

print(dt_classifier.predict([[335,115,4,5,4.5,9,1]]))

print(dt_classifier.predict([[310,100,3,2,3,8,0]]))

['Yes']
['No']




### Displaying the Decision Tree

In [83]:
utils.display_tree(dt_classifier)

InvocationException: GraphViz's executables not found

## Decision Tree (using Gini index)

In [84]:
dt_classifier2 = DecisionTreeClassifier()
dt_classifier2.fit(features_train, labels_train)
cls_test_score2=dt_classifier2.score(features_test, labels_test)
cls_validation_score2=dt_classifier2.score(features_validation, labels_validation)
print(cls_test_score2)
print(cls_validation_score2)

0.9
0.9375


### Building a Predictive System

In [85]:
# predict

print(dt_classifier2.predict([[335,115,4,5,4.5,9,1]]))

print(dt_classifier2.predict([[310,100,3,2,3,8,0]]))

['Yes']
['No']




### Displaying the Decision Tree

In [86]:
utils.display_tree(dt_classifier2)

InvocationException: GraphViz's executables not found

## Decision Tree (using entropy index)

In [87]:
dt_classifier3 = DecisionTreeClassifier(criterion='entropy')
dt_classifier3.fit(features_train, labels_train)
cls_test_score3=dt_classifier3.score(features_test, labels_test)
cls_validation_score3=dt_classifier3.score(features_validation, labels_validation)
print(cls_test_score3)
print(cls_validation_score3)

0.8875
0.9125


### Building a Predictive System

In [88]:
# predict

print(dt_classifier3.predict([[335,115,4,5,4.5,9,1]]))

print(dt_classifier3.predict([[310,100,3,2,3,8,0]]))

['Yes']
['Yes']




### Displaying the Decision Tree

In [89]:
utils.display_tree(dt_classifier3)

InvocationException: GraphViz's executables not found