In [4]:
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics
import pandas as pd

# Create a DataFrame from the dataset
df = pd.read_table('fruit_data_with_colors.txt')

# Separate features (X) and target variable (y)
X = df[['mass', 'width', 'height', 'color_score']]
y = df['fruit_label']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a decision tree classifier
clf = DecisionTreeClassifier()

# Train the classifier on the training set
clf.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = clf.predict(X_test)

# Evaluate the performance of the classifier
accuracy = metrics.accuracy_score(y_test, y_pred)
precision = metrics.precision_score(y_test, y_pred, average='weighted')
recall = metrics.recall_score(y_test, y_pred, average='weighted')
f1 = metrics.f1_score(y_test, y_pred, average='weighted')

print(f"Accuracy: {accuracy * 100:.2f}%")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

# from sklearn.tree import export_graphviz
# import graphviz
# dot_data = export_graphviz(clf, out_file=None, feature_names=X.columns, class_names=df['fruit_name'].unique(), filled=True, rounded=True)
# graph = graphviz.Source(dot_data)
# graph.render("fruit_tree")


Accuracy: 83.33%
Precision: 0.861111111111111
Recall: 0.8333333333333334
F1 Score: 0.8333333333333334
