# Import Data & library

In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Load training data set from CSV file
training_data_df = pd.read_csv("03\sales_data_training.csv")

# Load testing data set from CSV file
test_data_df = pd.read_csv("03\sales_data_test.csv")


In [2]:
training_data_df.head(5)

Unnamed: 0,critic_rating,is_action,is_exclusive_to_us,is_portable,is_role_playing,is_sequel,is_sports,suitable_for_kids,total_earnings,unit_price
0,3.5,1,0,1,0,1,0,0,132717,59.99
1,4.5,0,0,0,0,1,1,0,83407,49.99
2,3.0,0,0,0,0,1,1,0,62423,49.99
3,4.5,1,0,0,0,0,0,1,69889,39.99
4,4.0,1,0,1,0,1,0,1,161382,59.99


In [3]:
test_data_df.head(5)

Unnamed: 0,critic_rating,is_action,is_exclusive_to_us,is_portable,is_role_playing,is_sequel,is_sports,suitable_for_kids,total_earnings,unit_price
0,3.5,1,1,1,0,1,0,1,247537,59.99
1,2.5,0,0,0,1,1,0,0,73960,59.99
2,3.5,0,0,0,0,1,1,0,82671,59.99
3,4.0,1,1,0,0,1,0,0,137456,39.99
4,2.0,1,0,1,0,1,0,0,89639,59.99


# Preprocess Data

## Data needs to be scaled to a small range like 0 to 1 for the neural network to work well.

In [4]:
#scaler
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0,1))

In [5]:
# Scale both the training inputs and outputs
scaled_training = scaler.fit_transform(training_data_df)
scaled_testing = scaler.transform(test_data_df)


In [6]:
# Print out the adjustment that the scaler applied to the total_earnings column of data
print("Note: total_earnings values were scaled by multiplying by {:.10f} and adding {:.6f}".format(scaler.scale_[8], scaler.min_[8]))

Note: total_earnings values were scaled by multiplying by 0.0000036968 and adding -0.115913


In [7]:
# Create new pandas DataFrame objects from the scaled data
scaled_training_df = pd.DataFrame(scaled_training, columns=training_data_df.columns.values)
scaled_testing_df = pd.DataFrame(scaled_testing, columns=test_data_df.columns.values)


In [8]:
# Save scaled data dataframes to new CSV files
scaled_training_df.to_csv("sales_data_training_scaled.csv", index=False)
scaled_testing_df.to_csv("sales_data_test_scaled.csv", index=False)

# Create Model

In [9]:
import pandas as pd
from keras.models import Sequential
from keras.layers import *

training_data_df = pd.read_csv("03\sales_data_training_scaled.csv")

X = training_data_df.drop('total_earnings', axis=1).values
Y = training_data_df[['total_earnings']].values

# Define the model
model = Sequential()
model.add(Dense(50, input_dim=9, activation='relu'))
model.add(Dense(100, activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(1, activation='linear'))
model.compile(loss="mean_squared_error", optimizer="adam")

# Train Model

In [10]:
model.fit(
X,
Y,
epochs=50,
shuffle=True,
verbose=2
)

Epoch 1/50
32/32 - 0s - loss: 0.0161 - 401ms/epoch - 13ms/step
Epoch 2/50
32/32 - 0s - loss: 0.0026 - 35ms/epoch - 1ms/step
Epoch 3/50
32/32 - 0s - loss: 8.3873e-04 - 34ms/epoch - 1ms/step
Epoch 4/50
32/32 - 0s - loss: 4.5172e-04 - 34ms/epoch - 1ms/step
Epoch 5/50
32/32 - 0s - loss: 2.8727e-04 - 35ms/epoch - 1ms/step
Epoch 6/50
32/32 - 0s - loss: 2.0657e-04 - 35ms/epoch - 1ms/step
Epoch 7/50
32/32 - 0s - loss: 1.4917e-04 - 47ms/epoch - 1ms/step
Epoch 8/50
32/32 - 0s - loss: 1.1648e-04 - 40ms/epoch - 1ms/step
Epoch 9/50
32/32 - 0s - loss: 8.8494e-05 - 38ms/epoch - 1ms/step
Epoch 10/50
32/32 - 0s - loss: 7.2895e-05 - 35ms/epoch - 1ms/step
Epoch 11/50
32/32 - 0s - loss: 6.8270e-05 - 35ms/epoch - 1ms/step
Epoch 12/50
32/32 - 0s - loss: 5.7772e-05 - 35ms/epoch - 1ms/step
Epoch 13/50
32/32 - 0s - loss: 5.2459e-05 - 35ms/epoch - 1ms/step
Epoch 14/50
32/32 - 0s - loss: 5.0319e-05 - 36ms/epoch - 1ms/step
Epoch 15/50
32/32 - 0s - loss: 3.7399e-05 - 38ms/epoch - 1ms/step
Epoch 16/50
32/32 - 0s - 

<keras.callbacks.History at 0x1a95604c340>

In [11]:
# Load the separate test data set
test_data_df = pd.read_csv("03\sales_data_test_scaled.csv")

X_test = test_data_df.drop('total_earnings', axis=1).values
Y_test = test_data_df[['total_earnings']].values

test_error_rate = model.evaluate(X_test, Y_test, verbose=0)
print("The mean squared error (MSE) for the test data set is: {}".format(test_error_rate))

The mean squared error (MSE) for the test data set is: 9.33918563532643e-05


# Predict

In [12]:
# Load the data we make to use to make a prediction
X = pd.read_csv("04\proposed_new_product.csv").values
X


array([[0.7, 1. , 1. , 1. , 0. , 1. , 0. , 1. , 0.8]])

In [13]:
# Make a prediction with the neural network
prediction = model.predict(X)

# Grab just the first element of the first prediction (since that's the only have one)
prediction = prediction[0][0]

In [14]:
# Re-scale the data from the 0-to-1 range back to dollars
# These constants are from when the data was originally scaled down to the 0-to-1 range
prediction = prediction + 0.1159
prediction = prediction / 0.0000036968

In [15]:
print("Earnings Prediction for Proposed Product - ${}".format(prediction))

Earnings Prediction for Proposed Product - $265135.6474142316


# Saving and Loading models

In [16]:
# Save the model to disk
model.save("trained_model.h5")
print("Model saved to disk")

Model saved to disk


# Load Model

In [18]:
import pandas as pd
from keras.models import load_model

model = load_model("trained_model.h5")

In [20]:
X = pd.read_csv("04\proposed_new_product.csv").values
prediction = model.predict(X)

# Grab just the first element of the first prediction (since we only have one)
prediction = prediction[0][0]

In [21]:
# Re-scale the data from the 0-to-1 range back to dollars
# These constants are from when the data was originally scaled down to the 0-to-1 range
prediction = prediction + 0.1159
prediction = prediction / 0.0000036968

print("Earnings Prediction for Proposed Product - ${}".format(prediction))

Earnings Prediction for Proposed Product - $265135.6474142316
