<a href="https://colab.research.google.com/github/LongNguyen1984/DeepLearning/blob/master/SaveLoadData.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Define Dataset

In [4]:
# example of creating a test dataset and splitting it into train and test sets
from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split
# prepare dataset
X, y = make_blobs(n_samples=100, centers=2, n_features=2, random_state=1)
# split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1)
# split data into train and test variable
for i in range(X_test.shape[1]):
  print('>%d, train: min=%.3f, max=%.3f, test: min=%.3f, max=%.3f'%
        (i, X_train[:,i].min(), X_train[:, i].max(),
         X_test[:, i].min(), X_test[:, i].max()))

>0, train: min=-11.856, max=0.526, test: min=-11.270, max=0.085
>1, train: min=-6.388, max=6.507, test: min=-5.581, max=5.926


# Scale the Dataset

In [8]:
# example of scaling the dataset
from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
# prepare dataset
X, y = make_blobs(n_samples=100, centers=2, n_features=2, random_state=1)
# split data into train and test set
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.33, random_state=1)
# define scaler
scaler = MinMaxScaler()
# fit scaler on training dataset
scaler.fit(X_train)
# transform both datasets
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)
# summarize the scale of each input variable
for i in range(X_test.shape[1]):
  print('>%d, train: min=%.3f, max=%.3f, test: min=%.3f, max=%.3f' %
        (i, X_train_scaled[:, i].min(), X_train_scaled[:, i].max(),
         X_test_scaled[:, i].min(), X_test_scaled[:, i].max()))

>0, train: min=0.000, max=1.000, test: min=0.047, max=0.964
>1, train: min=0.000, max=1.000, test: min=0.063, max=0.955


# Save Model and Data Scaler

In [9]:
# example of fitting a model on the scaled dataset
from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression

from pickle import dump
# prepare dataset
X, y = make_blobs(n_samples=100, centers=2, n_features=2, random_state=1)
# split data into train and test sets
X_train, _, y_train, _ = train_test_split(X, y, test_size=0.33, random_state=1)
# define scaler
scaler = MinMaxScaler()
# fit scaler on the training dataset
scaler.fit(X_train)
# transform the training dataset
X_train_scaled = scaler.transform(X_train)
# define the model
model = LogisticRegression(solver='lbfgs')
model.fit(X_train_scaled, y_train)
# save the model
dump(model, open('model.pkl','wb'))
# save the scaler
dump(scaler, open('scaler.pkl', 'wb'))

# Load Model and Data Scaler

In [10]:
# load model and scaler and make predictions on new data
from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from pickle import load
# prepare dataset
X, y = make_blobs(n_samples=100, centers=2, n_features=2, random_state=1)
# split data into train and test sets
_, X_test, _, y_test = train_test_split(X, y, test_size=0.33, random_state=1)
# load the model
model = load(open('model.pkl','rb'))
# load the scaler
scaler = load(open('scaler.pkl', 'rb'))
# check scale of the test set before scaling
print('Raw test set range')
for i in range(X_test.shape[1]):
  print('>%d, min=%.3f, max=%.3f' %(i, X_test[:,i].min(), X_test[:,i].max()))
# transform the test dataset
X_test_scaled =scaler.transform(X_test)
print('Scaled test set range')
for i in range(X_test_scaled.shape[1]):
  print('>%d, min=%.3f, max=%.3f'% (i, X_test_scaled[:, i].min(), X_test_scaled[:, i].max()))
# make predictions on the test set
yhat = model.predict(X_test_scaled)
# evaluate accuracy
acc = accuracy_score(y_test, yhat)
print('Test Accuracy:', acc)

Raw test set range
>0, min=-11.270, max=0.085
>1, min=-5.581, max=5.926
Scaled test set range
>0, min=0.047, max=0.964
>1, min=0.063, max=0.955
Test Accuracy: 1.0
