In [1]:
import pandas as pd

In [2]:
# Download the dataset

import os
import wget

dataset_url = 'https://github.com/GopalSaraf/Practicals/releases/download/ML-Datasets/admissions.csv'

if not os.path.exists('admissions.csv'):
    wget.download(dataset_url)

In [3]:
# Create dataframe
df = pd.read_csv("admissions.csv")

In [4]:
df

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,1,337,118,4,4.5,4.5,9.65,1,0.92
1,2,324,107,4,4.0,4.5,8.87,1,0.76
2,3,316,104,3,3.0,3.5,8.00,1,0.72
3,4,322,110,3,3.5,2.5,8.67,1,0.80
4,5,314,103,2,2.0,3.0,8.21,0,0.65
...,...,...,...,...,...,...,...,...,...
395,396,324,110,3,3.5,3.5,9.04,1,0.82
396,397,325,107,3,3.0,3.5,9.11,1,0.84
397,398,330,116,4,5.0,4.5,9.45,1,0.91
398,399,312,103,3,3.5,4.0,8.78,0,0.67


In [5]:
# check null values
df.isnull().sum()

Serial No.           0
GRE Score            0
TOEFL Score          0
University Rating    0
SOP                  0
LOR                  0
CGPA                 0
Research             0
Chance of Admit      0
dtype: int64

In [6]:
X = df.iloc[:, 1:-1]
X

Unnamed: 0,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research
0,337,118,4,4.5,4.5,9.65,1
1,324,107,4,4.0,4.5,8.87,1
2,316,104,3,3.0,3.5,8.00,1
3,322,110,3,3.5,2.5,8.67,1
4,314,103,2,2.0,3.0,8.21,0
...,...,...,...,...,...,...,...
395,324,110,3,3.5,3.5,9.04,1
396,325,107,3,3.0,3.5,9.11,1
397,330,116,4,5.0,4.5,9.45,1
398,312,103,3,3.5,4.0,8.78,0


In [7]:
y = df.iloc[:, -1]
y

0      0.92
1      0.76
2      0.72
3      0.80
4      0.65
       ... 
395    0.82
396    0.84
397    0.91
398    0.67
399    0.95
Name: Chance of Admit , Length: 400, dtype: float64

In [8]:
y = y.mask(y >= 0.8, 1)
y = y.mask(y < 0.8, 0)

y

0      1.0
1      0.0
2      0.0
3      1.0
4      0.0
      ... 
395    1.0
396    1.0
397    1.0
398    0.0
399    1.0
Name: Chance of Admit , Length: 400, dtype: float64

In [9]:
from sklearn.model_selection import train_test_split

# Splitting data
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.25)

In [10]:
from sklearn.tree import DecisionTreeClassifier

# Creating Decision Tree classifer object
clf = DecisionTreeClassifier()

In [11]:
# Training Decision Tree Classifer
clf = clf.fit(X_train, Y_train)

In [12]:
#Predicting for the test data
y_pred = clf.predict(X_test)

In [13]:
from sklearn import metrics

print("confusion matrix:\n")
print(metrics.confusion_matrix(Y_test, y_pred))

confusion matrix:

[[61  6]
 [10 23]]


In [14]:
print("1. Accuracy Score:", metrics.accuracy_score(Y_test, y_pred))
print("2. Precision Score:",metrics.precision_score(Y_test, y_pred))
print("3. Recall Score:", metrics.recall_score(Y_test, y_pred))
print("4. f1 Score:", metrics.f1_score(Y_test, y_pred))

1. Accuracy Score: 0.84
2. Precision Score: 0.7931034482758621
3. Recall Score: 0.696969696969697
4. f1 Score: 0.7419354838709677
