## IMPORT THE MODULES

In [1]:
#import the necessary modules
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from scipy.cluster.hierarchy import dendrogram, linkage
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
import missingno as msno
from IPython.display import display
from sklearn.preprocessing import OrdinalEncoder
sns.set()
import warnings
warnings.filterwarnings('ignore')



## LOAD THE DATASET

In [20]:
#load the dataset
data = pd.read_csv('./DentalData811.csv')
data.head()

Unnamed: 0,Timestamp,Patient Number,Chief complaint,Nature of Pain,Severity of pain,Onset and mode of pain,Factors which worsens the pain,Is the swelling painful?,Has the swelling changed since it was first noticed? If yes how quickly?,"Does the swelling changes during normal activities such as eating, speaking, etc?",...,Is there bleeding from the ulcer,Is there discharge from the ulcer?,Is there a foul smell from the ulcer?,Do the ulcers interfere with daily activities,Has the ulcer changed since first noticed?,Have you had similar ulcers?,Final Diagnosis,Is there bleeding in the gums?,Is there pain in the gums,"If any tooth/teeth is/are mobile, what is the degree of mobility"
0,31/10/2023 08:55:03,M30,Pain,aching,Mild,lingering pain,No,No,No,No,...,No,No,No,No,No,No,Pulpitis,No,No,No
1,31/10/2023 09:11:12,F21,Pain,aching,Mild,The pain came suddenly,Eating,No,No,No,...,No,No,No,No,No,No,Food impaction,No,No,No
2,31/10/2023 09:28:50,M20,Swelling,throbbing,Moderate,lingering pain,Hot foods or drinks,No,No,No,...,No,No,No,No,No,No,Pulpitis with swelling,No,No,No
3,31/10/2023 09:29:47,F34,Pain,throbbing,Severe,The pain came suddenly,Hot foods or drinks,No,No,No,...,No,No,No,No,No,No,Acute pulpitis,No,No,No
4,31/10/2023 09:36:48,F5,Swelling,No,No,No,No,No,Slow,No,...,No,No,No,No,No,No,Periodontal abcess,No,No,No


In [28]:
y = data['Final Diagnosis']
print(y)
classes=y.unique()
print(len(classes))

0                                            Pulpitis
1                                      Food impaction
2                             Pulpitis with swelling 
3                                      Acute pulpitis
4                                  Periodontal abcess
                            ...                      
112    Irreversible pulpits with extra oral swelling 
113            Periodontitis with grade one mobility 
114                               Reversible pulpits 
115           Reversible pulpits with food impaction 
116                             Irreversible pulpits 
Name: Final Diagnosis, Length: 117, dtype: object
41


In [None]:
ax=sns.countplot(y,label='Count')

## DATA PREPROCESSING

In [3]:
# look out for null values
data.isnull().sum()

Timestamp                                                                             0
Patient Number                                                                        0
Chief complaint                                                                       0
Nature of Pain                                                                        5
Severity of pain                                                                      6
Onset  and mode of pain                                                              11
Factors which worsens the pain                                                       11
Is the swelling painful?                                                             52
Has the swelling changed since it was first noticed? If yes how quickly?             51
Does the swelling changes during normal activities such as eating, speaking, etc?    51
Is the ulcer painful                                                                 63
Is there bleeding from the ulcer

In [4]:
# filling in missing values and checking again
data.fillna("No",inplace=True)
print(data.isnull().sum())
data.sample(10)


Timestamp                                                                            0
Patient Number                                                                       0
Chief complaint                                                                      0
Nature of Pain                                                                       0
Severity of pain                                                                     0
Onset  and mode of pain                                                              0
Factors which worsens the pain                                                       0
Is the swelling painful?                                                             0
Has the swelling changed since it was first noticed? If yes how quickly?             0
Does the swelling changes during normal activities such as eating, speaking, etc?    0
Is the ulcer painful                                                                 0
Is there bleeding from the ulcer           

Unnamed: 0,Timestamp,Patient Number,Chief complaint,Nature of Pain,Severity of pain,Onset and mode of pain,Factors which worsens the pain,Is the swelling painful?,Has the swelling changed since it was first noticed? If yes how quickly?,"Does the swelling changes during normal activities such as eating, speaking, etc?",...,Is there bleeding from the ulcer,Is there discharge from the ulcer?,Is there a foul smell from the ulcer?,Do the ulcers interfere with daily activities,Has the ulcer changed since first noticed?,Have you had similar ulcers?,Final Diagnosis,Is there bleeding in the gums?,Is there pain in the gums,"If any tooth/teeth is/are mobile, what is the degree of mobility"
109,08/11/2023 11:17:25,F68,Pain,No,No,No,No,No,No,No,...,No,No,No,No,No,No,Periodontitis with grade two mobility,Yes,Yes,2
74,06/11/2023 11:14:14,F25,Pain,stabbing,Severe,lingering pain,Eating,No,No,No,...,No,No,No,No,No,No,Pericoronitis,No,No,No
8,01/11/2023 17:13:31,M39,Pain,throbbing,Moderate,The pain came suddenly,Hot foods or drinks,No,No,No,...,No,No,No,No,No,No,Acute pulpits,No,No,No
115,08/11/2023 11:22:31,F35,Swelling,throbbing,Mild,lingering pain,Eating,Yes,Slow,No,...,No,No,No,No,No,No,Reversible pulpits with food impaction,Yes,Yes,No
60,06/11/2023 10:15:37,M58,Pain,throbbing,Severe,The pain came suddenly,Hot foods or drinks,No,No,No,...,No,No,No,No,No,No,Acute irreversible pulpits,No,No,No
45,03/11/2023 11:24:33,M41,Pain,aching,Mild,The pain came suddenly,Eating,No,No,No,...,No,No,No,No,No,No,Dental caries with food impaction,No,No,No
37,03/11/2023 11:06:09,M9,Swelling,throbbing,Moderate,lingering pain,Eating,No,Slow,No,...,No,No,No,No,No,No,Pulpits with swelling,No,No,No
97,07/11/2023 10:15:19,F52,Swelling,aching,Mild,No,Laying down,Yes,Slow,No,...,No,No,No,No,No,No,Irreversible pulpits with extra oral swelling,No,No,No
93,07/11/2023 09:21:04,M28,Pain,throbbing,Mild,lingering pain,Cold foods or drinks,No,No,No,...,No,No,No,No,No,No,Reversible pulpits,No,No,No
98,07/11/2023 10:31:32,F37,Pain,throbbing,Severe,The pain came suddenly,Laying down,No,No,No,...,No,No,No,No,No,No,Irreversible pulpits,No,No,No


In [5]:
#Dropping unnecessary fields
data.drop('Timestamp',axis=1, inplace=True)
data.drop('Patient Number',axis=1, inplace=True)
data.sample(5)

Unnamed: 0,Chief complaint,Nature of Pain,Severity of pain,Onset and mode of pain,Factors which worsens the pain,Is the swelling painful?,Has the swelling changed since it was first noticed? If yes how quickly?,"Does the swelling changes during normal activities such as eating, speaking, etc?",Is the ulcer painful,Is there bleeding from the ulcer,Is there discharge from the ulcer?,Is there a foul smell from the ulcer?,Do the ulcers interfere with daily activities,Has the ulcer changed since first noticed?,Have you had similar ulcers?,Final Diagnosis,Is there bleeding in the gums?,Is there pain in the gums,"If any tooth/teeth is/are mobile, what is the degree of mobility"
78,Swelling,aching,Mild,lingering pain,Eating,No,Slow,No,No,No,No,No,No,No,No,Irreversible pulpits with swelling,No,No,No
32,Pain,aching,Moderate,The pain came suddenly,Laying down,No,No,No,No,No,No,No,No,No,No,Acute pulpits,No,No,No
88,Pain,burning,Moderate,lingering pain,No,No,No,No,No,No,No,No,No,No,No,Gingivitis,Yes,Yes,No
70,Pain,throbbing,Severe,The pain came suddenly,Hot foods or drinks,No,No,No,No,No,No,No,No,No,No,Acute irreversible pulpits,No,No,No
64,Pain,aching,Moderate,The pain came suddenly,Laying down,No,No,No,No,No,No,No,No,No,No,Irreversible pulpits,No,No,No


In [6]:
# #split the patient number into gender and age
# data['Gender'] = data['Patient Number'].str[0]
# data['Age'] = data['Patient Number'].str[1:].astype(int)
# data.sample(5)

In [17]:
#using ordinal encoder to change the fields to numerical values
encode=OrdinalEncoder(dtype=float)
# data_noage = data.drop('Age',axis=1)
# patient_encode=pd.DataFrame(encode.fit_transform(data_noage),columns=data_noage.columns)
patient_encode=pd.DataFrame(encode.fit_transform(data),columns=data.columns)
# patient_encode = pd.concat([patient_encode,data['Age']],axis=1) 
patient_encode.head(5)

Unnamed: 0,Chief complaint,Nature of Pain,Severity of pain,Onset and mode of pain,Factors which worsens the pain,Is the swelling painful?,Has the swelling changed since it was first noticed? If yes how quickly?,"Does the swelling changes during normal activities such as eating, speaking, etc?",Is the ulcer painful,Is there bleeding from the ulcer,Is there discharge from the ulcer?,Is there a foul smell from the ulcer?,Do the ulcers interfere with daily activities,Has the ulcer changed since first noticed?,Have you had similar ulcers?,Final Diagnosis,Is there bleeding in the gums?,Is there pain in the gums,"If any tooth/teeth is/are mobile, what is the degree of mobility"
0,1.0,1.0,0.0,2.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,33.0,0.0,0.0,3.0
1,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20.0,0.0,0.0,3.0
2,2.0,4.0,1.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,34.0,0.0,0.0,3.0
3,1.0,4.0,3.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,3.0
4,2.0,0.0,2.0,0.0,4.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,27.0,0.0,0.0,3.0


In [9]:
patient_encode.to_csv('./DentalData811_encoded.csv',index=False)

In [14]:
train,test=train_test_split(patient_encode,test_size=0.3, random_state=42)
print(len(train))
print(len(test))

81
36


In [15]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier

X_train = train.drop(columns=['Final Diagnosis'])
y_train = train['Final Diagnosis']


X_test = test.drop(columns=['Final Diagnosis'])  
y_test = test['Final Diagnosis']


rf = RandomForestClassifier(n_estimators=100, max_depth=4, random_state=42)
rf.estimators_ = [DecisionTreeClassifier() for _ in range(100)] 

rf.fit(X_train, y_train)

rf_pred = rf.predict(X_test)

rf_accuracy = rf.score(X_test, y_test) 

print("Random Forest Accuracy:",rf_accuracy)

Random Forest Accuracy: 0.3888888888888889


In [16]:
# Import LogisticRegression
from sklearn.linear_model import LogisticRegression

# Extract X and y
X_train = train.drop('Final Diagnosis', axis=1) 
y_train = train['Final Diagnosis']

X_test = test.drop('Final Diagnosis', axis=1)
y_test = test['Final Diagnosis']

# Create LogisticRegression instance 
logreg = LogisticRegression()

# Fit on training data
logreg.fit(X_train, y_train)  

# Predict on test data
y_pred = logreg.predict(X_test)

# Evaluate accuracy
from sklearn.metrics import accuracy_score
print("Logistic Regression Accuracy:", 
      accuracy_score(y_test, y_pred))

Logistic Regression Accuracy: 0.4166666666666667
