In [6]:
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
import pickle

In [7]:
# Reading the anxiety screening data
dataset = pd.read_excel('Bipolar_Screening.xlsx')
dataset.reset_index()
dataset.info()
dataset.head(5)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 125 entries, 0 to 124
Data columns (total 21 columns):
 #   Column                                                                                                                                 Non-Null Count  Dtype 
---  ------                                                                                                                                 --------------  ----- 
 0   Feel very “up,” “high,” elated, or irritable or touchy                                                                                 125 non-null    int64 
 1   Feel very sad, “down,” empty, worried, or hopeless                                                                                     125 non-null    int64 
 2   Feel “jumpy” or “wired”                                                                                                                125 non-null    int64 
 3   Feel slowed down or restless                                               

Unnamed: 0,"Feel very “up,” “high,” elated, or irritable or touchy","Feel very sad, “down,” empty, worried, or hopeless",Feel “jumpy” or “wired”,Feel slowed down or restless,Have a decreased need for sleep,"Have trouble falling asleep, wake up too early, or sleep too much",Have a loss of appetite,Experience increased appetite and weight gain,Talk very fast about a lot of different things,"Talk very slowly, feel like they have 0thing to say, forget a lot",...,Have trouble concentrating or making decisions,Think they can do a lot of things at once,Feel unable to do even simple things,"Do risky things that show poor judgment, such as eat and drink excessively, spend or give away a lot of money, or have reckless sex","Have little interest in almost all activities, a decreased or absent sex drive, or an inability to experience pleasure (“anhedonia”)","Feel like they are unusually important, talented, or powerful","Feel hopeless or worthless, think about death or suicide","If you checked 1 to more than one of the above, have several of these ever happened during the same period of time?","""How much of a problem did any of these cause you?\nFor eg: having family, money or legal troubles; getting into arguments or fights?""",Diagnosis
0,1,0,1,0,1,0,1,0,1,0,...,0,1,0,1,0,1,0,1,3,Yes
1,0,1,0,1,0,1,0,1,0,1,...,1,0,1,0,1,0,1,1,3,Yes
2,1,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,No
3,0,1,1,1,0,0,0,1,0,0,...,0,0,0,0,0,0,0,1,1,Yes
4,1,0,1,0,1,1,1,0,1,0,...,1,1,0,1,0,1,0,1,3,No


In [8]:
def change(arg):
    """Changes the arg into numeric value.
    
    PARAMETERS
    -----------
    arg
        either Bipolar, or no Bipolar
    
    RETURNS
    ----------
    number
        either 0 or 1
    
    AUTHOR
    ----------
    Shreeja Dahal
    
    DATE
    ----------
    09/29/2021 8:30 AM
    
    """
    if arg == "Yes":
        return 1
    elif arg == 'No':
        return 0
    
dataset['Diagnosis'] = dataset['Diagnosis'].apply(change)


(125, 20)


Unnamed: 0,"Feel very “up,” “high,” elated, or irritable or touchy","Feel very sad, “down,” empty, worried, or hopeless",Feel “jumpy” or “wired”,Feel slowed down or restless,Have a decreased need for sleep,"Have trouble falling asleep, wake up too early, or sleep too much",Have a loss of appetite,Experience increased appetite and weight gain,Talk very fast about a lot of different things,"Talk very slowly, feel like they have 0thing to say, forget a lot",...,Have trouble concentrating or making decisions,Think they can do a lot of things at once,Feel unable to do even simple things,"Do risky things that show poor judgment, such as eat and drink excessively, spend or give away a lot of money, or have reckless sex","Have little interest in almost all activities, a decreased or absent sex drive, or an inability to experience pleasure (“anhedonia”)","Feel like they are unusually important, talented, or powerful","Feel hopeless or worthless, think about death or suicide","If you checked 1 to more than one of the above, have several of these ever happened during the same period of time?","""How much of a problem did any of these cause you?\nFor eg: having family, money or legal troubles; getting into arguments or fights?""",Diagnosis
0,1,0,1,0,1,0,1,0,1,0,...,0,1,0,1,0,1,0,1,3,1
1,0,1,0,1,0,1,0,1,0,1,...,1,0,1,0,1,0,1,1,3,1
2,1,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,1,1,1,0,0,0,1,0,0,...,0,0,0,0,0,0,0,1,1,1
4,1,0,1,0,1,1,1,0,1,0,...,1,1,0,1,0,1,0,1,3,0


In [None]:
# X contains all the variables except the Diagnosis
X = dataset.drop('Diagnosis', 1)
# y contains the Diagnosis
y = dataset['Diagnosis']
# splitting the data into training set and testing set,
# 75% into training data and 25% into testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 47, test_size = 0.25)

In [9]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn import tree


# If the sample is completely homogeneous the entropy is zero 
# and if the sample is an equally divided it has entropy of one.
clf = tree.DecisionTreeClassifier(criterion = 'entropy')

# fitting the training data
clf = clf.fit(X_train, y_train)

# y_pred is the predicted data
y_pred = clf.predict(X_test)

#calculating the decision tree accuracy score
decisionTree_score = accuracy_score(y_test, y_pred)

# Accuracy, Precision, Recall, F-1 score, and support 
# are factors that measure the performance of a model
print("Accuracy score: ", decisionTree_score)
print(classification_report(y_test, y_pred))

Accuracy score:  0.96875
              precision    recall  f1-score   support

           0       0.94      1.00      0.97        15
           1       1.00      0.94      0.97        17

    accuracy                           0.97        32
   macro avg       0.97      0.97      0.97        32
weighted avg       0.97      0.97      0.97        32



In [10]:
# Saving the model as a serialized object pickle
# named model_bipolar.pkl
with open('model_bipolar.pkl', 'wb') as file:
    pickle.dump(clf,file)