In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.tree import plot_tree

In [None]:
# Load the dataset
data = pd.read_csv("diabetes.csv")

# Check the first few rows of the data
print(data.head())

# Check for missing values
print(data.isnull().sum())

# Get statistics of the data
print(data.describe())


In [None]:
import seaborn as sns

sns.pairplot(data, hue='Outcome')
plt.show()


In [None]:
X = data.drop("Outcome", axis=1)
y = data["Outcome"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)


In [None]:
# Create a decision tree classifier
clf = DecisionTreeClassifier(random_state=42)

# Train the classifier
clf.fit(X_train, y_train)


In [None]:
# Make predictions on the test data
y_pred = clf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
confusion = confusion_matrix(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print("Confusion Matrix:\n", confusion)
print("Classification Report:\n", report)


In [None]:
plt.figure(figsize=(12, 8))
plot_tree(clf, filled=True, feature_names=X.columns.tolist(), class_names=['No Diabetes', 'Diabetes'])

plt.xlim(0, 20)  # Adjust the values accordingly
plt.ylim(0, 5)  # Adjust the values accordingly


plt.show()


In [None]:
from sklearn.tree import export_text

# Generate the tree structure as text
tree_text = export_text(clf, feature_names=X.columns.tolist())

# Save the tree structure as a text file
with open("decision_tree.txt", "w") as text_file:
    text_file.write(tree_text)
