### Import Dependencies

In [22]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score

### Load Data

In [23]:
file = 'preprocessed_data.csv'
df = pd.read_csv(file)
df.head()

Unnamed: 0,Company Name,Ship Date,City,State,Shipping Service,Created Date,Weight,Item/Bottle Count
0,1,43952.87139,1555,34,5,43940.85348,10.5,3
1,1,43952.87149,5113,44,1,43943.52123,20.2,6
2,1,43952.87149,1341,5,9,43945.43794,20.2,6
3,1,43952.87148,4881,4,4,43945.60456,10.5,3
4,1,43952.87148,4984,44,1,43947.43795,39.0,12


### Scale and Split Data
##### Need to drop 'Weight' column completely because it has float variables and cannot be predicted in the classifier model without additional preprocessing

In [32]:
# Split 
X = df.drop(columns=['Weight','Item/Bottle Count'], axis=1)
y = df[['Item/Bottle Count']]

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
y_train

Unnamed: 0,Item/Bottle Count
22933,4
17184,1
20951,1
31764,12
29489,6
...,...
17289,1
5192,3
12172,1
235,3


In [33]:
# scale the data
scaler = MinMaxScaler()
X_scaler = scaler.fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

### Build DecisionTreeClassifier Model

In [34]:
# Fit classifier models with different max_depth
classifier_1 = DecisionTreeClassifier(max_depth=2)
classifier_2 = DecisionTreeClassifier(max_depth=5)
classifier_3 = DecisionTreeClassifier(max_depth=8)

classifier_1.fit(X_train_scaled,y_train)
classifier_2.fit(X_train_scaled,y_train)
classifier_3.fit(X_train_scaled,y_train)

DecisionTreeClassifier(max_depth=8)

In [35]:
y_pred1 = classifier_1.predict(X_test_scaled)
y_pred2 = classifier_2.predict(X_test_scaled)
y_pred3 = classifier_3.predict(X_test_scaled)

In [36]:
print(y_pred1)

[6 6 6 ... 6 6 6]


### View Results

In [37]:
# results = pd.DataFrame({
#     "Prediction1":y_pred1,
#     "Prediction2":y_pred2,
#     "Prediction3":y_pred3,
#     "Actual":y_test
# }).reset_index(drop=True)
# results

ValueError: Data must be 1-dimensional

In [39]:
print(accuracy_score(y_test,y_pred1))
print(accuracy_score(y_test,y_pred2))
print(accuracy_score(y_test,y_pred3))

0.6139240506329114
0.6242243732936212
0.6264581782079921
