In [2]:
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score, mean_squared_error, accuracy_score

# Load the diabetes dataset
data = load_diabetes()

# Split the dataset into features (X) and target variable (y)
X = data.data
y = data.target

"""
The accuracy_score function is specifically designed for classification tasks,
 and it may not be suitable for evaluating a regression model.
The accuracy_score calculates the accuracy as the ratio of correct predictions to the total number of samples,
which is more relevant for classification problems 
where the predictions are discrete labels.
"""
"""
If you still want to use accuracy_score for regression, 
you can convert the regression problem into a classification problem
by binning the target variable into classes or applying a threshold.
"""
# Binarize the target variable by applying a threshold
threshold = 150
y_binary = [1 if val >= threshold else 0 for val in y]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_binary, test_size=0.2, random_state=42)

# Create a Decision Tree Regression model
model = DecisionTreeRegressor()

# Fit the model to the training data
model.fit(X_train, y_train)

# Make predictions on the test data
y_pred = model.predict(X_test)

# Convert the predictions into binary classes based on the threshold
y_pred_binary = [1 if val >= threshold else 0 for val in y_pred]

# Calculate the accuracy score
accuracy = accuracy_score(y_test, y_pred_binary)
print("Accuracy Score:", accuracy)

# Calculate the accuracy score
accuracy_model_score = model.score(X_test, y_test)
print("Accuracy with model.score:", accuracy_model_score)

# Calculate the R2 score of the model
r2_score = r2_score(y_test, y_pred)
print("R2 Score:", r2_score)

# Calculate the mean squared error (MSE)
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)




Accuracy Score: 0.550561797752809
Accuracy with model.score: -0.6346938775510207
R2 Score: -0.6346938775510207
Mean Squared Error: 0.4044943820224719


In [3]:
import pandas as pd

# Create a DataFrame from the diabetes dataset for the features and target variable
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target
df['target_binary'] = y_binary

# Display the DataFrame
df.head()


Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target,target_binary
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646,151.0,1
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204,75.0,0
2,0.085299,0.05068,0.044451,-0.00567,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.02593,141.0,0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362,206.0,1
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641,135.0,0


In [8]:
df.describe()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target,target_binary
count,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0
mean,-3.639623e-16,1.309912e-16,-8.013951e-16,1.289818e-16,-9.042540000000001e-17,1.301121e-16,-4.563971e-16,3.863174e-16,-3.848103e-16,-3.398488e-16,152.133484,0.461538
std,0.04761905,0.04761905,0.04761905,0.04761905,0.04761905,0.04761905,0.04761905,0.04761905,0.04761905,0.04761905,77.093005,0.499083
min,-0.1072256,-0.04464164,-0.0902753,-0.1123996,-0.1267807,-0.1156131,-0.1023071,-0.0763945,-0.1260974,-0.1377672,25.0,0.0
25%,-0.03729927,-0.04464164,-0.03422907,-0.03665645,-0.03424784,-0.0303584,-0.03511716,-0.03949338,-0.03324879,-0.03317903,87.0,0.0
50%,0.00538306,-0.04464164,-0.007283766,-0.005670611,-0.004320866,-0.003819065,-0.006584468,-0.002592262,-0.001947634,-0.001077698,140.5,0.0
75%,0.03807591,0.05068012,0.03124802,0.03564384,0.02835801,0.02984439,0.0293115,0.03430886,0.03243323,0.02791705,211.5,1.0
max,0.1107267,0.05068012,0.1705552,0.1320442,0.1539137,0.198788,0.1811791,0.1852344,0.133599,0.1356118,346.0,1.0
