In [None]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns
import plotly.express as px
from math import sqrt
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn import svm 
from sklearn.model_selection import GridSearchCV 
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import StandardScaler, LabelBinarizer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.ensemble import RandomForestClassifier

In [None]:
df = pd.read_csv('CBC Report.csv') 
df.head()

In [None]:
df.isnull().sum()

In [None]:
# Fill missing values
df["ESR"].fillna(df["ESR"].median(), inplace=True)  # Fill ESR with median
df["Lymphocyte"].fillna(df["Lymphocyte"].mean(), inplace=True)  # Fill Lymphocyte with mean
df["Monocyte"].fillna(df["Monocyte"].mean(), inplace=True)  # Fill Monocyte with mean
df["Eosinophil"].fillna(df["Eosinophil"].mean(), inplace=True)  # Fill Eosinophil with mean
df["Basophil"].fillna(df["Basophil"].mean(), inplace=True)  # Fill Basophil with mean
df["RBC"].fillna(df["RBC"].median(), inplace=True)  # Fill RBC with median


In [None]:
df.drop(columns=["Serial", "Date"], inplace=True)  # Remove unnecessary columns


In [None]:
df.drop_duplicates()

In [None]:
pd.crosstab(df["Gender"],df["Result"]).plot(kind="bar")
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.show()

In [None]:
# Compute the correlation matrix
plt.figure(figsize=(10, 6))
sns.heatmap(df.corr(), annot=True, cmap="coolwarm", fmt=".2f", linewidths=0.5)
plt.title("Correlation Matrix of Features")
plt.show()


In [None]:
# Pie Chart for Result Distribution
plt.figure(figsize=(6, 6))
df['Result'].value_counts().plot.pie(autopct="%.1f%%", colors=["red", "green"], labels=["Positive", "Negative"], startangle=90)
plt.title("Dengue Test Results Distribution")
plt.ylabel('')  # Remove y-label for cleaner look
plt.show()


In [None]:
from sklearn import preprocessing  

# Initialize LabelEncoder
label_encoder = preprocessing.LabelEncoder()

# Apply Label Encoding to 'Gender' and 'Result' columns
df['Gender'] = label_encoder.fit_transform(df['Gender'])
df['Result'] = label_encoder.fit_transform(df['Result'])  # 0: Negative, 1: Positive

# Display first few rows to verify
df.head()


In [None]:
# Prepare the data
X = df.drop(['Result'], axis=1)
y = df['Result']

#split dataset into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2,random_state=42)
print("Total records found in dataset = "+str(X.shape[0]))
print("Total features found in dataset= "+str(X.shape[1]))
print("80% dataset for training : "+str(X_train.shape[0]))
print("20% dataset for testing  : "+str(X_test.shape[0]))

In [None]:
accuracy = []
precision = []
recall = []
fscore = []

In [None]:
# Function to calculate various metrics such as accuracy, precision, recall, and F1 score
def calculateMetrics(algorithm, predict, testY):
    # Calculate precision, recall, F1-score, and accuracy
    p = precision_score(testY, predict, average='macro') * 100
    r = recall_score(testY, predict, average='macro') * 100
    f = f1_score(testY, predict, average='macro') * 100
    a = accuracy_score(testY, predict) * 100

    # Print the metrics
    print()
    print(f"{algorithm} Accuracy  : {a}")
    print(f"{algorithm} Precision : {p}")
    print(f"{algorithm} Recall    : {r}")
    print(f"{algorithm} FMeasure  : {f}")
    
   
    accuracy.append(a)
    precision.append(p)
    recall.append(r)
    fscore.append(f)
    
    # Confusion Matrix
    conf_matrix = confusion_matrix(testY, predict)
    
    # Define labels based on unique classes in testY
    labels = sorted(np.unique(testY))  # Unique labels sorted

    # Plot the confusion matrix using seaborn
    plt.figure(figsize=(5, 5))
    ax = sns.heatmap(conf_matrix, xticklabels=labels, yticklabels=labels, annot=True, cmap="viridis", fmt="g")
    ax.set_ylim([0, len(labels)])
    plt.title(f"{algorithm} Confusion Matrix")
    plt.ylabel('True Class')
    plt.xlabel('Predicted Class')
    plt.show()

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import StratifiedKFold, GridSearchCV, train_test_split

# Split dataset (stratify ensures train-test balance)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# Define Stratified K-Fold
skf = StratifiedKFold(n_splits=5)

# Random Forest Model (No class weights needed)
rf_cls = RandomForestClassifier(random_state=42)
tuning_param = {"n_estimators": [50, 100], "max_depth": [5, 10]}
tuned_rf = GridSearchCV(rf_cls, tuning_param, cv=skf)
tuned_rf.fit(X_train, y_train)

# Predictions & Evaluation
predict = tuned_rf.predict(X_test)
calculateMetrics("Random Forest", predict, y_test)

In [None]:
# Decision Tree Model
dt_cls = DecisionTreeClassifier()
tuning_param = {"splitter": ["best", "random"], "max_depth": [2, 5]}
tuned_dt = GridSearchCV(dt_cls, tuning_param, cv=skf)
tuned_dt.fit(X_train, y_train)

# Predictions & Evaluation
predict = tuned_dt.predict(X_test)
calculateMetrics("Decision Tree", predict, y_test)

In [None]:
#all algorithms performance graph
df = pd.DataFrame([['Random Forest','Precision',precision[0]],['Random Forest','Recall',recall[0]],['Random Forest','F1 Score',fscore[0]],['Random Forest','Accuracy',accuracy[0]],
                   ['Decision Tree','Precision',precision[1]],['Decision Tree','Recall',recall[1]],['Decision Tree','F1 Score',fscore[1]],['Decision Tree','Accuracy',accuracy[1]],
        
                  ],columns=['Parameters','Algorithms','Value'])
df.pivot("Parameters", "Algorithms", "Value").plot(kind='bar')
plt.title("All Algorithms Performance Graph")
plt.show()

In [None]:
import tkinter as tk
from tkinter import ttk
import pandas as pd
import joblib  # For loading the trained model



# Function to make predictions
def predict_dengue():
    try:
        # Gender mapping
        gender_mapping = {"Male": 0, "Female": 1}
        gender = gender_mapping[gender_var.get()]
        
        # Collecting input values in correct sequence
        input_data = {
            "Gender": [gender],
            "Age": [float(age_entry.get())],
            "Haemoglobin": [float(haemoglobin_entry.get())],
            "ESR": [float(esr_entry.get())],
            "WBC": [float(wbc_entry.get())],
            "Neutrophil": [float(neutrophil_entry.get())],
            "Lymphocyte": [float(lymphocyte_entry.get())],
            "Monocyte": [float(monocyte_entry.get())],
            "Eosinophil": [float(eosinophil_entry.get())],
            "Basophil": [float(basophil_entry.get())],
            "RBC": [float(rbc_entry.get())],
            "Platelets": [float(platelets_entry.get())]
        }

        df_test = pd.DataFrame(input_data)

        # Ensure column order matches model's training data
        feature_order = ["Gender", "Age", "Haemoglobin", "ESR", "WBC", "Neutrophil",
                         "Lymphocyte", "Monocyte", "Eosinophil", "Basophil", "RBC", "Platelets"]
        df_test = df_test[feature_order]

        # Get prediction probability
        y_probs = tuned_rf.predict_proba(df_test)[:, 1]  
        
        # Threshold-based classification
        threshold = 0.5
        y_pred = (y_probs > threshold).astype(int)

        # Display result
        result_text = "Dengue Detected" if y_pred[0] == 1 else "No Dengue Detected"
        result_label.config(text=result_text, fg="white", bg="black")

    except Exception as e:
        result_label.config(text=f"Error: {e}", fg="red", bg="black")

# Create GUI window
root = tk.Tk()
root.title("Dengue Detection")
root.state('zoomed')  # Maximized screen
root.configure(bg="#ff7899")

# Title
title_label = tk.Label(root, text="Dengue Detection", font=("Arial", 28, "bold"), fg="black", bg="#ff7899")
title_label.pack(pady=20)

# Black Box Frame
frame = tk.Frame(root, bg="black")
frame.pack(expand=True, fill="both", padx=50, pady=20)

# Configure grid layout
for i in range(7):  # 7 rows for input fields
    frame.grid_rowconfigure(i, weight=1)
for j in range(4):  # 4 columns for spacing
    frame.grid_columnconfigure(j, weight=1)

# Gender Dropdown
tk.Label(frame, text="Gender:", fg="white", bg="black", font=("Arial", 14)).grid(row=0, column=0, sticky="w", padx=10, pady=10)
gender_var = tk.StringVar()
gender_dropdown = ttk.Combobox(frame, textvariable=gender_var, values=["Male", "Female"], font=("Arial", 14))
gender_dropdown.grid(row=0, column=1, pady=10, padx=10, sticky="ew")
gender_dropdown.current(0)

# Labels and Entry Fields (Sequence Maintained)
labels = ["Age", "Haemoglobin", "ESR", "WBC", "Neutrophil", 
          "Lymphocyte", "Monocyte", "Eosinophil", "Basophil", 
          "RBC", "Platelets"]

entry_vars = {}

for i, text in enumerate(labels):
    row, col = divmod(i+1, 2)  # Adjust row and column dynamically
    label = tk.Label(frame, text=f"{text}:", fg="white", bg="black", font=("Arial", 14))
    label.grid(row=row, column=col*2, sticky="w", padx=10, pady=10)
    
    entry_var = tk.StringVar()
    entry = ttk.Entry(frame, font=("Arial", 14), textvariable=entry_var)
    entry.grid(row=row, column=col*2+1, pady=10, padx=10, sticky="ew")
    entry_vars[text] = entry

# Assign entry fields in correct order
age_entry = entry_vars["Age"]
haemoglobin_entry = entry_vars["Haemoglobin"]
esr_entry = entry_vars["ESR"]
wbc_entry = entry_vars["WBC"]
neutrophil_entry = entry_vars["Neutrophil"]
lymphocyte_entry = entry_vars["Lymphocyte"]
monocyte_entry = entry_vars["Monocyte"]
eosinophil_entry = entry_vars["Eosinophil"]
basophil_entry = entry_vars["Basophil"]
rbc_entry = entry_vars["RBC"]
platelets_entry = entry_vars["Platelets"]

# Predict Button
predict_button = tk.Button(frame, text="Predict", font=("Arial", 16, "bold"), bg="pink", fg="black", 
                           command=predict_dengue)
predict_button.grid(row=7, column=0, columnspan=4, pady=20, sticky="ew")

# Prediction Label
result_label = tk.Label(frame, text="", font=("Arial", 18, "bold"), fg="white", bg="black")
result_label.grid(row=8, column=0, columnspan=4, pady=20)

# Run the GUI
root.mainloop()
