### Import Dependencies

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import MinMaxScaler

### Load Data

In [None]:
file = 'preprocessed_data.csv'
df = pd.read_csv(file)
df.head()

### Scale and Split Data
##### Need to drop 'Weight' column completely because it has float variables and cannot be predicted in the classifier model without additional preprocessing

In [None]:
# split into features and target
X = df.drop(columns=['Weight','Item/Bottle Count'], axis=1)
y = df['Item/Bottle Count']

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [None]:
# scale the data
scaler = MinMaxScaler()
X_scaler = scaler.fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

### Build RandomForestRegressor Model

In [None]:
# Fit classifier models with different max_depth
regr_1 = RandomForestRegressor(max_depth=5)
regr_2 = RandomForestRegressor(max_depth=10)
regr_3 = RandomForestRegressor(max_depth=20)
regr_4 = RandomForestRegressor(max_depth=None)

regr_1.fit(X_train_scaled,y_train)
regr_2.fit(X_train_scaled,y_train)
regr_3.fit(X_train_scaled,y_train)
regr_4.fit(X_train_scaled,y_train)

In [None]:
y_pred1 = regr_1.predict(X_test_scaled)
y_pred2 = regr_2.predict(X_test_scaled)
y_pred3 = regr_3.predict(X_test_scaled)
y_pred4 = regr_4.predict(X_test_scaled)

### Print R-Squared Scores (how well does the model predict the target variable)

In [None]:
# Training Score
print("Training Score Max Depth=5: ", regr_1.score(X_train_scaled,y_train))
print("Training Score Max Depth=10: ", regr_2.score(X_train_scaled,y_train))
print("Training Score Max Depth=20: ", regr_3.score(X_train_scaled,y_train))
print("Training Score Max Depth=None: ", regr_4.score(X_train_scaled,y_train))

# Test Score
print("Test Score Max Depth=5: ", regr_1.score(X_test_scaled, y_test))
print("Test Score Max Depth=10: ", regr_2.score(X_test_scaled, y_test))
print("Test Score Max Depth=20: ", regr_3.score(X_test_scaled, y_test))
print("Test Score Max Depth=None: ", regr_4.score(X_test_scaled, y_test))