In [1]:
# import the required packages/modules
import pandas as pd
from sklearn.tree import export_graphviz
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
import graphviz

In [2]:
# load the dataset
df = pd.read_excel("Datasets\datakelulusanmahasiswa.xls")

In [3]:
# display dataframe info
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 379 entries, 0 to 378
Data columns (total 15 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   NAMA              379 non-null    object 
 1   JENIS KELAMIN     379 non-null    object 
 2   STATUS MAHASISWA  379 non-null    object 
 3   UMUR              379 non-null    int64  
 4   STATUS NIKAH      379 non-null    object 
 5   IPS 1             379 non-null    float64
 6   IPS 2             379 non-null    float64
 7   IPS 3             379 non-null    float64
 8   IPS 4             379 non-null    float64
 9   IPS 5             379 non-null    float64
 10  IPS 6             379 non-null    float64
 11  IPS 7             379 non-null    float64
 12  IPS 8             372 non-null    float64
 13  IPK               376 non-null    float64
 14  STATUS KELULUSAN  379 non-null    object 
dtypes: float64(9), int64(1), object(5)
memory usage: 44.5+ KB


In [4]:
# remove rows which that contain empty cells
df.dropna(inplace = True)
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 369 entries, 0 to 378
Data columns (total 15 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   NAMA              369 non-null    object 
 1   JENIS KELAMIN     369 non-null    object 
 2   STATUS MAHASISWA  369 non-null    object 
 3   UMUR              369 non-null    int64  
 4   STATUS NIKAH      369 non-null    object 
 5   IPS 1             369 non-null    float64
 6   IPS 2             369 non-null    float64
 7   IPS 3             369 non-null    float64
 8   IPS 4             369 non-null    float64
 9   IPS 5             369 non-null    float64
 10  IPS 6             369 non-null    float64
 11  IPS 7             369 non-null    float64
 12  IPS 8             369 non-null    float64
 13  IPK               369 non-null    float64
 14  STATUS KELULUSAN  369 non-null    object 
dtypes: float64(9), int64(1), object(5)
memory usage: 46.1+ KB


In [5]:
# convert categorical data into numerical data
d = {'LAKI - LAKI': 0, 'PEREMPUAN': 1}
df['JENIS KELAMIN'] = df['JENIS KELAMIN'].map(d)
d = {'MAHASISWA': 0, 'BEKERJA': 1}
df['STATUS MAHASISWA'] = df['STATUS MAHASISWA'].map(d)
d = {'BELUM MENIKAH': 0, 'MENIKAH': 1}
df['STATUS NIKAH'] = df['STATUS NIKAH'].map(d)


In [6]:
# print dataset in string format
print(df.to_string())

                               NAMA  JENIS KELAMIN  STATUS MAHASISWA  UMUR  STATUS NIKAH  IPS 1  IPS 2  IPS 3  IPS 4  IPS 5  IPS 6  IPS 7  IPS 8  IPK  STATUS KELULUSAN
0                    ANIK WIDAYANTI              1                 1    28             0   2.76   2.80   3.20   3.17   2.98   3.00   3.03   0.00  3.07        TERLAMBAT
1           DWI HESTYNA PRIHASTANTY              1                 0    32             0   3.00   3.30   3.14   3.14   2.84   3.13   3.25   0.00  3.17        TERLAMBAT
2                MURYA ARIEF BASUKI              1                 1    29             0   3.50   3.30   3.70   3.29   3.53   3.72   3.73   0.00  3.54        TERLAMBAT
3                     NANIK SUSANTI              1                 0    27             0   3.17   3.41   3.61   3.36   3.48   3.63   3.46   0.00  3.41        TERLAMBAT
4                 RIFKA ISTIQFARINA              1                 1    29             0   2.90   2.89   3.30   2.85   2.98   3.00   3.08   0.00  3.09        TE

In [7]:
# separate the target variable (Y) from the features (X)
X = df.values[:, 1:14]
Y = df.values[:, 14]

In [8]:
# split the dataset into train and test
X_train, X_test, y_train, y_test = train_test_split(X, Y, 
                                                    test_size=0.2, 
                                                    random_state=100)

In [9]:
# create the decision tree model based on entropy
dt_entropy = DecisionTreeClassifier(criterion="entropy",
                                    random_state=100, 
                                    max_depth=4, 
                                    min_samples_leaf=1)

In [10]:
# perform the training
dt_entropy.fit(X_train, y_train)

DecisionTreeClassifier(criterion='entropy', max_depth=4, random_state=100)

In [11]:
# visualize the decision tree
dot_data = export_graphviz(dt_entropy, out_file=None,
                           feature_names=df.columns[1:14],
                           class_names=['TEPAT', 'TERLAMBAT'],
                           filled=True, rounded=True, 
                           special_characters=True)
graph = graphviz.Source(dot_data)
graph.render("Decision Tree")

'Decision Tree.pdf'

In [12]:
# prediction on data testing
y_pred = dt_entropy.predict(X_test)
print("Predicted values:")
print(y_pred)

Predicted values:
['TEPAT' 'TERLAMBAT' 'TEPAT' 'TEPAT' 'TEPAT' 'TERLAMBAT' 'TERLAMBAT'
 'TEPAT' 'TEPAT' 'TEPAT' 'TERLAMBAT' 'TERLAMBAT' 'TERLAMBAT' 'TEPAT'
 'TEPAT' 'TEPAT' 'TERLAMBAT' 'TEPAT' 'TEPAT' 'TERLAMBAT' 'TEPAT' 'TEPAT'
 'TEPAT' 'TERLAMBAT' 'TEPAT' 'TEPAT' 'TERLAMBAT' 'TEPAT' 'TEPAT' 'TEPAT'
 'TEPAT' 'TERLAMBAT' 'TEPAT' 'TEPAT' 'TEPAT' 'TERLAMBAT' 'TEPAT'
 'TERLAMBAT' 'TERLAMBAT' 'TEPAT' 'TEPAT' 'TERLAMBAT' 'TEPAT' 'TEPAT'
 'TEPAT' 'TERLAMBAT' 'TERLAMBAT' 'TEPAT' 'TEPAT' 'TERLAMBAT' 'TEPAT'
 'TEPAT' 'TEPAT' 'TERLAMBAT' 'TEPAT' 'TEPAT' 'TERLAMBAT' 'TERLAMBAT'
 'TERLAMBAT' 'TERLAMBAT' 'TEPAT' 'TEPAT' 'TEPAT' 'TEPAT' 'TERLAMBAT'
 'TEPAT' 'TEPAT' 'TEPAT' 'TEPAT' 'TEPAT' 'TEPAT' 'TEPAT' 'TERLAMBAT'
 'TEPAT']


In [13]:
# print the assessment of the model
print("Confusion Matrix: \n", confusion_matrix(y_test, y_pred, labels=['TEPAT', 'TERLAMBAT'])) 
print ("Accuracy : ", accuracy_score(y_test, y_pred)*100) 
print("Report : \n", classification_report(y_test, y_pred)) 

Confusion Matrix: 
 [[45  4]
 [ 4 21]]
Accuracy :  89.1891891891892
Report : 
               precision    recall  f1-score   support

       TEPAT       0.92      0.92      0.92        49
   TERLAMBAT       0.84      0.84      0.84        25

    accuracy                           0.89        74
   macro avg       0.88      0.88      0.88        74
weighted avg       0.89      0.89      0.89        74



In [14]:
# import library to make gui in python
import tkinter as tk

In [15]:
def get_response(jk, sm, u, sn, ip1, ip2, ip3, ip4, ip5, ip6, ip7, ip8, ipk):
    if sm.lower() == "mahasiswa":
        sm = 0
    elif sm.lower() == "bekerja":
        sm = 1
    
    if sn.lower() == "belum menikah":
        sn = 0
    elif sn.lower() == "menikah":
        sn = 1

    prediction = dt_entropy.predict([[jk, sm, u, sn, ip1, ip2, ip3, ip4, ip5, ip6, ip7, ip8, ipk]])
    label_result.config(text=prediction)
    

In [16]:
# gui code program
HEIGHT = 500
WIDTH = 600

root = tk.Tk()

canvas = tk.Canvas(root, height=HEIGHT, width=WIDTH)
canvas.pack()

frame = tk.Frame(root, bg='#80c1ff')
frame.place(relwidth=1, relheight=1)

var = tk.IntVar()
label_jk = tk.Label(frame, text="Jenis Kelamin: ", bg='#80c1ff', font=('helvetica', 14), anchor='w')
label_jk.place(relwidth=0.2, relx=0, rely=0)

rb_jk1 = tk.Radiobutton(frame, text="Laki-laki", bg='#80c1ff', font=('helvetica', 14), variable=var, value=0)
rb_jk1.place(relx=0.2, rely=0)

rb_jk2 = tk.Radiobutton(frame, text="Perempuan", bg='#80c1ff', font=('helvetica', 14), variable=var, value=1)
rb_jk2.place(relx=0.35, rely=0)

label_sm = tk.Label(frame, text="Status Mahasiswa (Mahasiswa atau Bekerja): ", font=('helvetica', 10), bg='#80c1ff', anchor='w')
label_sm.place(relwidth=0.2, relx=0, rely=0.08)

entry_sm = tk.Entry(frame)
entry_sm.place(relwidth=0.2, relx=0.2, rely=0.08)

label_u = tk.Label(frame, text="Umur: ", font=('helvetica', 14), bg='#80c1ff', anchor='w')
label_u.place(relwidth=0.2, relx=0, rely=0.15)

entry_u = tk.Entry(frame)
entry_u.place(relwidth=0.2, relx=0.2, rely=0.15)

label_sn = tk.Label(frame, text="Status Nikah (Menikah atau Belum Menikah): ", font=('helvetica', 10), bg='#80c1ff', anchor='w')
label_sn.place(relwidth=0.2, relx=0, rely=0.23)

entry_sn = tk.Entry(frame)
entry_sn.place(relwidth=0.2, relx=0.2, rely=0.23)

label_ip1 = tk.Label(frame, text="IPS 1: ", font=('helvetica', 14), bg='#80c1ff', anchor='w')
label_ip1.place(relwidth=0.2, relx=0, rely=0.29)

entry_ip1 = tk.Entry(frame)
entry_ip1.place(relwidth=0.2, relx=0.2, rely=0.29)

label_ip2 = tk.Label(frame, text="IPS 2: ", font=('helvetica', 14), bg='#80c1ff', anchor='w')
label_ip2.place(relwidth=0.2, relx=0, rely=0.35)

entry_ip2 = tk.Entry(frame)
entry_ip2.place(relwidth=0.2, relx=0.2, rely=0.35)

label_ip3 = tk.Label(frame, text="IPS 3: ", font=('helvetica', 14), bg='#80c1ff', anchor='w')
label_ip3.place(relwidth=0.2, relx=0, rely=0.41)

entry_ip3 = tk.Entry(frame)
entry_ip3.place(relwidth=0.2, relx=0.2, rely=0.41)

label_ip4 = tk.Label(frame, text="IPS 4: ", font=('helvetica', 14), bg='#80c1ff', anchor='w')
label_ip4.place(relwidth=0.2, relx=0, rely=0.47)

entry_ip4 = tk.Entry(frame)
entry_ip4.place(relwidth=0.2, relx=0.2, rely=0.47)

label_ip5 = tk.Label(frame, text="IPS 5: ", font=('helvetica', 14), bg='#80c1ff', anchor='w')
label_ip5.place(relwidth=0.2, relx=0.6, rely=0)

entry_ip5 = tk.Entry(frame)
entry_ip5.place(relwidth=0.2, relx=0.7, rely=0)

label_ip6 = tk.Label(frame, text="IPS 6: ", font=('helvetica', 14), bg='#80c1ff', anchor='w')
label_ip6.place(relwidth=0.2, relx=0.6, rely=0.08)

entry_ip6 = tk.Entry(frame)
entry_ip6.place(relwidth=0.2, relx=0.7, rely=0.08)

label_ip7 = tk.Label(frame, text="IPS 7: ", font=('helvetica', 14), bg='#80c1ff', anchor='w')
label_ip7.place(relwidth=0.2, relx=0.6, rely=0.15)

entry_ip7 = tk.Entry(frame)
entry_ip7.place(relwidth=0.2, relx=0.7, rely=0.15)

label_ip8 = tk.Label(frame, text="IPS 8: ", font=('helvetica', 14), bg='#80c1ff', anchor='w')
label_ip8.place(relwidth=0.2, relx=0.6, rely=0.23)

entry_ip8 = tk.Entry(frame)
entry_ip8.place(relwidth=0.2, relx=0.7, rely=0.23)

label_ipk = tk.Label(frame, text="IPK: ", font=('helvetica', 14), bg='#80c1ff', anchor='w')
label_ipk.place(relwidth=0.2, relx=0.6, rely=0.29)

entry_ipk = tk.Entry(frame)
entry_ipk.place(relwidth=0.2, relx=0.7, rely=0.29)

button = tk.Button(frame, text="Get Prediction", font=('helvetica', 14), command=lambda: get_response(var.get(), entry_sm.get(), entry_u.get(),
                                                                          entry_sn.get(), entry_ip1.get(), entry_ip2.get(), 
                                                                          entry_ip3.get(), entry_ip4.get(), entry_ip5.get(), 
                                                                          entry_ip6.get(), entry_ip7.get(), entry_ip8.get(), 
                                                                          entry_ipk.get()))
button.place(relx=0.45, rely=0.6)

label_result = tk.Label(frame, bg='#80c1ff', font=('helvetica', 16, 'bold'))
label_result.place(relx=0.46, rely=0.8)

root.mainloop()