# Converting CSV data into RecordIO Protobuf format

<img align="left" width="130" src="https://raw.githubusercontent.com/PacktPublishing/Amazon-SageMaker-Cookbook/master/Extra/cover-small-padded.png"/>

This notebook contains the code to help readers work through one of the recipes of the book [Machine Learning with Amazon SageMaker Cookbook: 80 proven recipes for data scientists and developers to perform ML experiments and deployments](https://www.amazon.com/Machine-Learning-Amazon-SageMaker-Cookbook/dp/1800567030)

### How it works

In [None]:
%store -r labeled_df
labeled_df.head()

In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
scaled_values = scaler.fit_transform(labeled_df.astype(float))
normalized_df = pd.DataFrame(scaled_values)
normalized_df.columns = labeled_df.columns
normalized_df.index = labeled_df.index

display(normalized_df.head())

In [None]:
normalized_df.shape

In [None]:
from sklearn.model_selection import train_test_split

y = normalized_df["label"].values
X = normalized_df[["x", "x2", "x3", "y", "y2"]].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [None]:
train_np = X_train
label_np = y_train

In [None]:
import io
from sagemaker.amazon.common import write_numpy_to_dense_tensor

buf = io.BytesIO()
write_numpy_to_dense_tensor(buf, train_np, label_np)
buf.seek(0)

In [None]:
!mkdir -p tmp

In [None]:
def save_bytesio(filename, buf):
    with open("tmp/" + filename, "wb") as file:
        file.write(buf.getbuffer())
        print(f"Successfully saved {filename}")

In [None]:
save_bytesio("train.io", buf)

In [None]:
%store buf

In [None]:
%store X_train
%store X_test
%store y_train
%store y_test