## IMPORT THE MODULES

In [1]:
#import the necessary modules
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from scipy.cluster.hierarchy import dendrogram, linkage
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
import missingno as msno
from IPython.display import display
from sklearn.preprocessing import OrdinalEncoder
sns.set()
import warnings
warnings.filterwarnings('ignore')

## LOAD THE DATASET

In [2]:
#load the dataset
data = pd.read_csv('./DentalData611.csv')
data.sample(5)

Unnamed: 0,Timestamp,Patient Number,Chief complaint,Nature of Pain,Severity of pain,Onset and mode of pain,Factors which worsens the pain,Is the swelling painful?,Has the swelling changed since it was first noticed? If yes how quickly?,"Does the swelling changes during normal activities such as eating, speaking, etc?",...,Is there bleeding from the ulcer,Is there discharge from the ulcer?,Is there a foul smell from the ulcer?,Do the ulcers interfere with daily activities,Has the ulcer changed since first noticed?,Have you had similar ulcers?,Is there bleeding in the gums?,Is there pain in the gums,"If any tooth/teeth is/are mobile, what is the degree of mobility",Final Diagnosis
85,06/11/2023 12:02:49,F11,Pain,aching,Mild,lingering pain,Cold foods or drinks,,,,...,,,,,,,,,,Reversible pulpits
28,03/11/2023 10:49:45,M76,Pain,throbbing,Severe,The pain came suddenly,Hot foods or drinks,No,No,No,...,No,No,No,No,No,No,No,No,No,Acute pulpits
32,03/11/2023 11:01:21,F76,Pain,aching,Moderate,The pain came suddenly,Laying down,No,No,No,...,No,No,No,No,No,No,No,No,No,Acute pulpits
25,03/11/2023 10:12:05,F43,Pain,aching,Mild,The pain came suddenly,Cold foods or drinks,No,No,No,...,No,No,No,No,No,No,No,No,No,Reversible pulpits
43,03/11/2023 11:22:16,F35,Swelling,No,No,No,No,No,No,No,...,No,No,No,No,No,No,No,No,No,Chronic pulpits with swelling


## DATA PREPROCESSING

In [3]:
# look out for null values
data.isnull().sum()

Timestamp                                                                             0
Patient Number                                                                        0
Chief complaint                                                                       0
Nature of Pain                                                                        4
Severity of pain                                                                      4
Onset  and mode of pain                                                               7
Factors which worsens the pain                                                        8
Is the swelling painful?                                                             34
Has the swelling changed since it was first noticed? If yes how quickly?             33
Does the swelling changes during normal activities such as eating, speaking, etc?    33
Is the ulcer painful                                                                 40
Is there bleeding from the ulcer

In [4]:
# filling in missing values and checking again
data.fillna("No",inplace=True)
print(data.isnull().sum())
data.sample(10)


Timestamp                                                                            0
Patient Number                                                                       0
Chief complaint                                                                      0
Nature of Pain                                                                       0
Severity of pain                                                                     0
Onset  and mode of pain                                                              0
Factors which worsens the pain                                                       0
Is the swelling painful?                                                             0
Has the swelling changed since it was first noticed? If yes how quickly?             0
Does the swelling changes during normal activities such as eating, speaking, etc?    0
Is the ulcer painful                                                                 0
Is there bleeding from the ulcer           

Unnamed: 0,Timestamp,Patient Number,Chief complaint,Nature of Pain,Severity of pain,Onset and mode of pain,Factors which worsens the pain,Is the swelling painful?,Has the swelling changed since it was first noticed? If yes how quickly?,"Does the swelling changes during normal activities such as eating, speaking, etc?",...,Is there bleeding from the ulcer,Is there discharge from the ulcer?,Is there a foul smell from the ulcer?,Do the ulcers interfere with daily activities,Has the ulcer changed since first noticed?,Have you had similar ulcers?,Is there bleeding in the gums?,Is there pain in the gums,"If any tooth/teeth is/are mobile, what is the degree of mobility",Final Diagnosis
45,03/11/2023 11:24:33,M41,Pain,aching,Mild,The pain came suddenly,Eating,No,No,No,...,No,No,No,No,No,No,No,No,No,Dental caries with food impaction
33,03/11/2023 11:02:10,F23,Pain,aching,Mild,The pain came suddenly,Cold foods or drinks,No,No,No,...,No,No,No,No,No,No,No,No,No,Reversible pulpits
47,03/11/2023 11:26:15,F27,Pain,throbbing,Severe,The pain came suddenly,Laying down,No,No,No,...,No,No,No,No,No,No,No,No,No,Reversible pulpits
3,31/10/2023 09:29:47,F34,Pain,throbbing,Severe,The pain came suddenly,Hot foods or drinks,No,No,No,...,No,No,No,No,No,No,No,No,No,Acute pulpitis
46,03/11/2023 11:25:24,F64,Pain,throbbing,Moderate,lingering pain,Hot foods or drinks,No,Slow,No,...,No,No,No,No,No,No,No,No,No,Pulpits with swelling
62,06/11/2023 10:17:34,F38,Pain,throbbing,Mild,The pain came suddenly,Cold foods or drinks,No,No,No,...,No,No,No,No,No,No,No,No,No,Reversible pulpits
58,06/11/2023 10:13:39,F48,Pain,throbbing,No,No,No,No,No,No,...,No,No,No,No,No,No,Yes,Yes,2,Periodontitis with grade 2 tooth mobility
20,03/11/2023 09:23:40,M80,Pain,throbbing,Severe,The pain came suddenly,Hot foods or drinks,No,No,No,...,No,No,No,No,No,No,No,No,No,Acute pulpits
69,06/11/2023 10:43:08,M29,Swelling,throbbing,Moderate,No,No,Yes,No,No,...,No,No,No,No,No,No,No,No,1,Dental abcess
34,03/11/2023 11:02:46,M18,Pain,aching,Moderate,lingering pain,Hot foods or drinks,No,No,No,...,No,No,No,No,No,No,No,No,No,Acute pulpits


In [5]:
#Dropping unnecessary fields
data.drop('Timestamp',axis=1, inplace=True)
data.sample(5)

Unnamed: 0,Patient Number,Chief complaint,Nature of Pain,Severity of pain,Onset and mode of pain,Factors which worsens the pain,Is the swelling painful?,Has the swelling changed since it was first noticed? If yes how quickly?,"Does the swelling changes during normal activities such as eating, speaking, etc?",Is the ulcer painful,Is there bleeding from the ulcer,Is there discharge from the ulcer?,Is there a foul smell from the ulcer?,Do the ulcers interfere with daily activities,Has the ulcer changed since first noticed?,Have you had similar ulcers?,Is there bleeding in the gums?,Is there pain in the gums,"If any tooth/teeth is/are mobile, what is the degree of mobility",Final Diagnosis
20,M80,Pain,throbbing,Severe,The pain came suddenly,Hot foods or drinks,No,No,No,No,No,No,No,No,No,No,No,No,No,Acute pulpits
51,F25,Pain,aching,Mild,The pain came suddenly,Cold foods or drinks,No,No,No,No,No,No,No,No,No,No,No,No,No,Reversible pulpits
34,M18,Pain,aching,Moderate,lingering pain,Hot foods or drinks,No,No,No,No,No,No,No,No,No,No,No,No,No,Acute pulpits
75,F60,Pain,stabbing,Severe,The pain came suddenly,Laying down,No,No,No,No,No,No,No,No,No,No,No,No,No,Irreversible pulpits
80,M43,Pain,aching,Mild,lingering pain,Cold foods or drinks,No,No,No,No,No,No,No,No,No,No,No,No,No,Reversible pulpits


In [6]:
#split the patient number into gender and age
data['Gender'] = data['Patient Number'].str[0]
data['Age'] = data['Patient Number'].str[1:].astype(int)
data.sample(5)

Unnamed: 0,Patient Number,Chief complaint,Nature of Pain,Severity of pain,Onset and mode of pain,Factors which worsens the pain,Is the swelling painful?,Has the swelling changed since it was first noticed? If yes how quickly?,"Does the swelling changes during normal activities such as eating, speaking, etc?",Is the ulcer painful,...,Is there a foul smell from the ulcer?,Do the ulcers interfere with daily activities,Has the ulcer changed since first noticed?,Have you had similar ulcers?,Is there bleeding in the gums?,Is there pain in the gums,"If any tooth/teeth is/are mobile, what is the degree of mobility",Final Diagnosis,Gender,Age
25,F43,Pain,aching,Mild,The pain came suddenly,Cold foods or drinks,No,No,No,No,...,No,No,No,No,No,No,No,Reversible pulpits,F,43
72,F6,Pain,throbbing,Moderate,The pain came suddenly,Hot foods or drinks,No,No,No,No,...,No,No,No,No,No,No,No,Acute irreversible pulpits,F,6
22,F26,Pain,throbbing,Severe,The pain came suddenly,Hot foods or drinks,No,No,No,No,...,No,No,No,No,No,No,No,Acute pulpits,F,26
31,F44,Pain,aching,Mild,lingering pain,Cold foods or drinks,No,No,No,No,...,No,No,No,No,No,No,No,Reversible pulpits,F,44
33,F23,Pain,aching,Mild,The pain came suddenly,Cold foods or drinks,No,No,No,No,...,No,No,No,No,No,No,No,Reversible pulpits,F,23


In [7]:
#using ordinal encoder to change the fields to numerical values
encode=OrdinalEncoder(dtype=float)
data_noage = data.drop('Age',axis=1)
patient_encode=pd.DataFrame(encode.fit_transform(data_noage),columns=data_noage.columns)
patient_encode = pd.concat([patient_encode,data['Age']],axis=1) 
patient_encode.head(5)

Unnamed: 0,Patient Number,Chief complaint,Nature of Pain,Severity of pain,Onset and mode of pain,Factors which worsens the pain,Is the swelling painful?,Has the swelling changed since it was first noticed? If yes how quickly?,"Does the swelling changes during normal activities such as eating, speaking, etc?",Is the ulcer painful,...,Is there a foul smell from the ulcer?,Do the ulcers interfere with daily activities,Has the ulcer changed since first noticed?,Have you had similar ulcers?,Is there bleeding in the gums?,Is there pain in the gums,"If any tooth/teeth is/are mobile, what is the degree of mobility",Final Diagnosis,Gender,Age
0,45.0,1.0,1.0,0.0,2.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,3.0,29.0,1.0,30
1,3.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,3.0,20.0,0.0,21
2,42.0,2.0,4.0,1.0,2.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,3.0,30.0,1.0,20
3,11.0,1.0,4.0,3.0,1.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,3.0,3.0,0.0,34
4,24.0,2.0,0.0,2.0,0.0,4.0,0.0,2.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,3.0,26.0,0.0,5


In [8]:
patient_encode.to_csv('./DentalData611_encoded.csv',index=False)

In [9]:
train,test=train_test_split(patient_encode,test_size=0.2, random_state=42)
print(len(train))
print(len(test))

72
19


In [10]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier

X_train = train.drop(columns=['Final Diagnosis'])
y_train = train['Final Diagnosis']


X_test = test.drop(columns=['Final Diagnosis'])  
y_test = test['Final Diagnosis']


rf = RandomForestClassifier(n_estimators=100, max_depth=4, random_state=42)
rf.estimators_ = [DecisionTreeClassifier() for _ in range(100)] 

rf.fit(X_train, y_train)

rf_pred = rf.predict(X_test)

rf_accuracy = rf.score(X_test, y_test) 

print("Random Forest Accuracy:",rf_accuracy)

Random Forest Accuracy: 0.47368421052631576


In [11]:
# Import LogisticRegression
from sklearn.linear_model import LogisticRegression

# Extract X and y
X_train = train.drop('Final Diagnosis', axis=1) 
y_train = train['Final Diagnosis']

X_test = test.drop('Final Diagnosis', axis=1)
y_test = test['Final Diagnosis']

# Create LogisticRegression instance 
logreg = LogisticRegression()

# Fit on training data
logreg.fit(X_train, y_train)  

# Predict on test data
y_pred = logreg.predict(X_test)

# Evaluate accuracy
from sklearn.metrics import accuracy_score
print("Logistic Regression Accuracy:", 
      accuracy_score(y_test, y_pred))

Logistic Regression Accuracy: 0.42105263157894735
