# Dog Breed App Do: Decision Trees

In [1]:
# Initial imports
import pandas as pd
from pathlib import Path
from sklearn import tree
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

## Loading and Preprocessing Loans Encoded Data

In [2]:
# Loading data
file_path = Path("../sourcedata/dog_breed_val.csv")
dog_breed_val_df = pd.read_csv(file_path)
dog_breed_val_df.head()



Unnamed: 0,breed_name,description,popularity,min_height,max_height,min_weight,max_weight,min_expectancy,max_expectancy,grooming_frequency_value,shedding_value,energy_level_value,trainability_value,demeanor_value
0,Affenpinscher,The Affen’s apish look has been described many...,148.0,22.86,29.21,3.175147,4.535924,12.0,15.0,0.6,0.6,0.6,0.8,1.0
1,Afghan Hound,"The Afghan Hound is an ancient breed, his whol...",113.0,63.5,68.58,22.679619,27.215542,12.0,15.0,0.8,0.2,0.8,0.2,0.2
2,Airedale Terrier,The Airedale Terrier is the largest of all ter...,60.0,58.42,58.42,22.679619,31.751466,11.0,14.0,0.6,0.4,0.6,1.0,0.8
3,Akita,"Akitas are burly, heavy-boned spitz-type dogs ...",47.0,60.96,71.12,31.751466,58.967008,10.0,13.0,0.8,0.6,0.8,1.0,0.6
4,Alaskan Malamute,The Alaskan Malamute stands 23 to 25 inches at...,58.0,58.42,63.5,34.019428,38.555351,10.0,14.0,0.6,0.6,0.8,0.4,0.8


In [3]:
# Define features set
col_to_drop = ['breed_name','description']
X = dog_breed_val_df.copy()
X.drop(col_to_drop, axis=1, inplace=True)
X.head()



Unnamed: 0,popularity,min_height,max_height,min_weight,max_weight,min_expectancy,max_expectancy,grooming_frequency_value,shedding_value,energy_level_value,trainability_value,demeanor_value
0,148.0,22.86,29.21,3.175147,4.535924,12.0,15.0,0.6,0.6,0.6,0.8,1.0
1,113.0,63.5,68.58,22.679619,27.215542,12.0,15.0,0.8,0.2,0.8,0.2,0.2
2,60.0,58.42,58.42,22.679619,31.751466,11.0,14.0,0.6,0.4,0.6,1.0,0.8
3,47.0,60.96,71.12,31.751466,58.967008,10.0,13.0,0.8,0.6,0.8,1.0,0.6
4,58.0,58.42,63.5,34.019428,38.555351,10.0,14.0,0.6,0.6,0.8,0.4,0.8


In [22]:
X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 277 entries, 0 to 276
Data columns (total 12 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   popularity                277 non-null    float64
 1   min_height                277 non-null    float64
 2   max_height                277 non-null    float64
 3   min_weight                277 non-null    float64
 4   max_weight                277 non-null    float64
 5   min_expectancy            277 non-null    float64
 6   max_expectancy            277 non-null    float64
 7   grooming_frequency_value  277 non-null    float64
 8   shedding_value            277 non-null    float64
 9   energy_level_value        277 non-null    float64
 10  trainability_value        277 non-null    float64
 11  demeanor_value            277 non-null    float64
dtypes: float64(12)
memory usage: 26.1 KB


In [7]:
# Define target vector
y = dog_breed_val_df["breed_name"].values.reshape(-1, 1)
y[:5]



array([['Affenpinscher'],
       ['Afghan Hound'],
       ['Airedale Terrier'],
       ['Akita'],
       ['Alaskan Malamute']], dtype=object)

In [8]:
# Splitting into Train and Test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)



In [9]:
# Creating StandardScaler instance
scaler = StandardScaler()



In [10]:
# Fitting Standard Scaller
X_scaler = scaler.fit(X_train)



In [11]:
# Scaling data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)


## Fitting the Decision Tree Model

In [12]:
# Creating the decision tree classifier instance
model = tree.DecisionTreeClassifier()



In [13]:
# Fitting the model
model = model.fit(X_train_scaled, y_train)



## Making Predictions Using the Tree Model

In [14]:
# Making predictions using the testing data
predictions = model.predict(X_test_scaled)



## Model Evaluation

In [20]:
# Calculating the confusion matrix
cm = confusion_matrix(y_test, predictions)
cm_df = pd.DataFrame(cm)
#     cm, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"]
# )

# Calculating the accuracy score
acc_score = accuracy_score(y_test, predictions)



In [19]:
cm


array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=int64)

In [21]:
# Displaying results
print("Confusion Matrix")
display(cm_df)
print(f"Accuracy Score : {acc_score}")
print("Classification Report")
print(classification_report(y_test, predictions))


Confusion Matrix


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,121,122,123,124,125,126,127,128,129,130
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
126,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
127,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
128,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
129,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Accuracy Score : 0.0
Classification Report
                                    precision    recall  f1-score   support

                  Airedale Terrier       0.00      0.00      0.00       1.0
                  Alaskan Malamute       0.00      0.00      0.00       1.0
                  American Bulldog       0.00      0.00      0.00       1.0
        American English Coonhound       0.00      0.00      0.00       1.0
               American Eskimo Dog       0.00      0.00      0.00       1.0
                 American Foxhound       0.00      0.00      0.00       0.0
            American Leopard Hound       0.00      0.00      0.00       1.0
            Anatolian Shepherd Dog       0.00      0.00      0.00       0.0
            Appenzeller Sennenhund       0.00      0.00      0.00       0.0
             Australian Cattle Dog       0.00      0.00      0.00       1.0
                 Australian Kelpie       0.00      0.00      0.00       1.0
               Australian Shepherd       0.0

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
