In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import  accuracy_score

"""
These are some of the most important parameters for building a Decision Tree Regression.

It's recommended to experiment with different parameter values 
and use techniques like cross-validation 
to find the optimal set of parameters for your specific problem.
"""

# Load the Iris dataset
data = load_iris()

# Split the dataset into features (X) and target variable (y)
X = data.data
y = data.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Decision Tree Classifier
model = DecisionTreeClassifier()

# Fit the model to the training data
model.fit(X_train, y_train)

# Make predictions on the test data
y_pred = model.predict(X_test)

# Calculate the accuracy score
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy Score:", accuracy)



Accuracy Score: 1.0


In [10]:
import pandas as pd

# Create a DataFrame from the diabetes dataset for the features and target variable
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target

# Display the DataFrame
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [11]:
df.describe()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
count,150.0,150.0,150.0,150.0,150.0
mean,5.843333,3.057333,3.758,1.199333,1.0
std,0.828066,0.435866,1.765298,0.762238,0.819232
min,4.3,2.0,1.0,0.1,0.0
25%,5.1,2.8,1.6,0.3,0.0
50%,5.8,3.0,4.35,1.3,1.0
75%,6.4,3.3,5.1,1.8,2.0
max,7.9,4.4,6.9,2.5,2.0


In [27]:
from sklearn.metrics import r2_score, mean_squared_error

"""
It's recommended to experiment with different parameter values 
and use techniques like cross-validation 
to find the optimal set of parameters for your specific problem.
"""
# Create a Decision Tree Classifier with custom parameter values
model_custom_parameter_values = DecisionTreeClassifier(max_depth=3, min_samples_split=5, max_features=2)
model_custom_parameter_values_2nd = DecisionTreeClassifier(max_depth=3, min_samples_split=5, max_features=3)

# Fit the model to the training data
model_custom_parameter_values.fit(X_train, y_train)

# Make predictions on the test data
y_pred = model_custom_parameter_values.predict(X_test)

# Calculate the accuracy score
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy Score:", accuracy)

# Calculate the R2 score of the model
r2_score = r2_score(y_test, y_pred)
print("R2 Score:", r2_score)

# Calculate the mean squared error (MSE)
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)


# Fit the model_custom_parameter_values_2nd to the training data
model_custom_parameter_values_2nd.fit(X_train, y_train)

# Make predictions on the test data
y_pred = model_custom_parameter_values_2nd.predict(X_test)

# Calculate the accuracy score
accuracy_2nd = accuracy_score(y_test, y_pred)
print("Accuracy Score_2nd:", accuracy_2nd)

# Calculate the mean squared error (MSE)
mse_2nd = mean_squared_error(y_test, y_pred)
print("Mean Squared Error_2nd:", mse_2nd)

Accuracy Score: 1.0
R2 Score: 1.0
Mean Squared Error: 0.0
Accuracy Score_2nd: 1.0
Mean Squared Error_2nd: 0.0
