# Import Libraries and Load Data


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import plot_tree
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.tree import export_text

# Load and Explore Data


In [None]:
soap_df = pd.read_csv('liquid_soap_dataset.csv')

# Display basic information about the dataset
soap_df.info()

# Summary statistics
soap_df.describe()

# Check for missing values
soap_df.isnull().sum()

# Visualize the distribution of the target variable
sns.countplot(x='Defective', data=soap_df)
plt.title('Distribution of Defective and Non-defective Products')
plt.show()

# Data Preprocessing


In [None]:
# Convert categorical variables using one-hot encoding
soap_df = pd.get_dummies(soap_df, columns=['Color'], drop_first=True)

# Model Training


In [None]:
# Separate features (X) and target variable (y)
X = soap_df.drop('Defective', axis=1)
y = soap_df['Defective']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

# Initialize Decision Tree model
dt_model = DecisionTreeClassifier(random_state=42)

# Train the model
dt_model.fit(X_train, y_train)

# Model Evaluation


In [None]:
# Make predictions on the testing set
y_pred = dt_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy:.4f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(class_report)

# Visualizations


In [None]:
# Plot the Decision Tree structure graphically
plt.figure(figsize=(12, 8))
plot_tree(dt_model, feature_names=list(X.columns), class_names=[
          'Non-Defective', 'Defective'], filled=True, rounded=True)
plt.title("Decision Tree Structure")
plt.show()