# <span style="color:#ff5f27"> 👨🏻‍🏫 Keras model and Sklearn Transformation Functions Registration in the Model Registry Tutorial</span>

## <span style="color:#ff5f27">🗄️ Table of Contents</span>
- [📝 Imports](#1)
- [💽 Loading Data](#2)
- [🔮 Connecting to Hopsworks Feature Store](#3)
- [🪄 Creating Feature Groups](#4)
- [🖍 Feature View Creation](#5)
- [👩🏻‍🔬 Data Transformation](#6)
- [👔 Transformer instances fit](#7)
- [🧬 Modeling](#8)
- [💾 Saving the Model and Transformation Functions](#9)
- [📮 Retrieving the Model and Transformation Functions from Model Registry](#10)
- [👨🏻‍⚖️ Batch Prediction](#11)
- [👨🏻‍⚖️ Serving Feature Vector Prediction](#12)

<a name='1'></a>
## <span style='color:#ff5f27'> 📝 Imports </span>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import joblib

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation

from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import accuracy_score

<a name='2'></a>
## <span style="color:#ff5f27;"> 💽 Loading Data </span>

In [None]:
df_original = pd.read_csv("https://repo.hops.works/dev/davit/air_quality/backfill_pm2_5_eu.csv")
df_original['target'] = np.random.choice([0, 1], size=len(df_original))

df_original.head(3)

<a name='3'></a>
## <span style="color:#ff5f27;"> 🔮 Connecting to Hopsworks Feature Store </span>

In [None]:
import hopsworks

project = hopsworks.login()

fs = project.get_feature_store() 

<a name='4'></a>
## <span style="color:#ff5f27;">🪄 Creating Feature Groups</span>

In [None]:
feature_group = fs.get_or_create_feature_group(
    name='feature_group_online',
    description='Online Feature Group',
    version=1,
    primary_key=['city_name', 'date'],
    online_enabled=True,
)    
feature_group.insert(df_original)

<a name='5'></a>
## <span style="color:#ff5f27;"> 🖍 Feature View Creation</span>

In [None]:
query = feature_group.select_except(['date'])

feature_view = fs.get_or_create_feature_view(
    name='serving_fv',
    version=1,
    query=query,
    labels=['target']
)

## <span style="color:#ff5f27;"> 🏋️ Training Dataset Creation</span>


In [None]:
# Create a train-test split dataset
td_version, job = feature_view.create_train_test_split(
    test_size=0.1,
    description='Description of the dataset',
    data_format='csv'
)

### <span style="color:#ff5f27;">🪝 Training Dataset Retrieval</span>

In [None]:
X_train, X_test, y_train, y_test = feature_view.get_train_test_split(
    training_dataset_version=td_version
)

In [None]:
X_train.head(3)

In [None]:
y_train.head(3)

<a name='6'></a>
## <span style="color:#ff5f27;">👩🏻‍🔬 Data Transformation</span>

In [None]:
def transform_all(func):
    def inner(data, one_hot_encoder, standard_scaler):
        
        if isinstance(data, pd.DataFrame):
            return func(data, one_hot_encoder, standard_scaler)
        
        if isinstance(data[0], list): 
            city_names = [vector[0] for vector in feature_vectors]
            pm2_5_values = [vector[1] for vector in feature_vectors]
            data = pd.DataFrame(
                {
                    'city_name': city_names,
                    'pm2_5': pm2_5_values,
                }
            )
            return func(data, one_hot_encoder, standard_scaler)
            
        data = pd.DataFrame(
                {
                    'city_name': [data[0]],
                    'pm2_5': [data[1]],
                }
            )
        return func(data, one_hot_encoder, standard_scaler)
    return inner

In [None]:
@transform_all
def transform_data(data, one_hot_encoder, standard_scaler):
    # Transform the 'city_name' column using OneHotEncoder
    city_encoded = one_hot_encoder.transform(data[['city_name']])

    # Create a new DataFrame with the encoded values
    encoded_df = pd.DataFrame(city_encoded, columns=one_hot_encoder.categories_[0])

    # Concatenate the encoded DataFrame with the original DataFrame
    data = pd.concat([data.drop('city_name', axis=1), encoded_df], axis=1)
    
    # Transform the 'pm2_5' column using StandardScaler
    data['pm2_5'] = standard_scaler.transform(data[['pm2_5']])

    return data

<a name='7'></a>
### <span style="color:#ff5f27;"> 👔 Transformer instances fit</span>

In [None]:
# Create an instance of the OneHotEncoder and StandardScaler
one_hot_encoder = OneHotEncoder(sparse=False)
standard_scaler = StandardScaler()

In [None]:
one_hot_encoder.fit(X_train[['city_name']])
standard_scaler.fit(X_train[['pm2_5']])
print('✅ Done!')

### <span style="color:#ff5f27;">⛳️ Train Data Transformation</span>

In [None]:
X_train_transformed = transform_data(X_train, one_hot_encoder, standard_scaler)
X_train_transformed.head(3)

### <span style="color:#ff5f27;">⛳️ Test Data Transformation</span>

In [None]:
X_test_transformed = transform_data(X_test, one_hot_encoder, standard_scaler)
X_test_transformed.head(3)

<a name='8'></a>
## <span style="color:#ff5f27;">🧬 Modeling</span>

In [None]:
# Create a Sequential model
model = Sequential()

# Add the input layer with appropriate input shape
model.add(Dense(units=64, input_dim=X_train_transformed.shape[1], activation='relu'))

# Add one or more hidden layers
model.add(Dense(units=32, activation='relu'))

# Add the sigmoid activation function
model.add(Dense(units=1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# Train the model
model.fit(X_train_transformed, y_train, epochs=10, batch_size=32, validation_split=0.2)

## <span style="color:#ff5f27;">🗄 Model Registry</span>

In [None]:
mr = project.get_model_registry()

### <span style="color:#ff5f27;">⚙️ Model Schema</span>


In [None]:
from hsml.schema import Schema
from hsml.model_schema import ModelSchema

input_schema = Schema(X_train_transformed.values)
output_schema = Schema(y_train)
model_schema = ModelSchema(input_schema=input_schema, output_schema=output_schema)

model_schema.to_dict()

<a name='9'></a>
### <span style="color:#ff5f27;">💾 Saving the Model and Transformation Functions</span>

In [None]:
model_dir = "keras_tf_model"

if os.path.isdir(model_dir) == False:
    os.mkdir(model_dir)

# Save Transformation Functions
joblib.dump(one_hot_encoder, model_dir + '/one_hot_encoder.pkl')
joblib.dump(standard_scaler, model_dir + '/standard_scaler.pkl')

# Save the model
joblib.dump(model, model_dir + '/keras_classifier.pkl')

In [None]:
model = mr.tensorflow.create_model(
    name="keras_model",
    description="Keras model",
    input_example=X_train.sample(),
    model_schema=model_schema,
)

model.save(model_dir)

<a name='10'></a>
## <span style="color:#ff5f27;"> 📮 Retrieving the Model and Transformation Functions from Model Registry </span>

In [None]:
retrieved_model = mr.get_model(
    name="keras_model",
    version=1
)
saved_model_dir = retrieved_model.download()

In [None]:
# Retrieve the Keras model
retrieved_keras_model = joblib.load(saved_model_dir + "/keras_classifier.pkl")

# Retrieve Transformation Functions
one_hot_encoder = joblib.load(saved_model_dir + "/one_hot_encoder.pkl")
standard_scaler = joblib.load(saved_model_dir + "/standard_scaler.pkl")

<a name='11'></a>
## <span style="color:#ff5f27;"> 👨🏻‍⚖️ Batch Prediction </span>

In [None]:
feature_view.init_batch_scoring(training_dataset_version=td_version)

batch_data = feature_view.get_batch_data()
batch_data.head(3)

In [None]:
batch_data_transformed = transform_data(batch_data, one_hot_encoder, standard_scaler)
batch_data_transformed.head(3)

In [None]:
predictions_batch = retrieved_keras_model.predict(batch_data_transformed)
predictions_batch[:10]

<a name='12'></a>
## <span style="color:#ff5f27;"> 👨🏻‍⚖️ Serving Feature Vector Prediction</span>

In [None]:
feature_view.init_serving(1)

feature_vector = feature_view.get_feature_vector(
    entry = {
        "city_name": 'Amsterdam',
        "date": '2013-01-01',
    }
)
feature_vector

In [None]:
feature_vector_transformed = transform_data(feature_vector, one_hot_encoder, standard_scaler)
feature_vector_transformed.head(3)

In [None]:
prediction_feature_vector = retrieved_keras_model.predict(feature_vector_transformed)
prediction_feature_vector

In [None]:
feature_vectors = feature_view.get_feature_vectors(
    entry = [
        {"city_name": 'Amsterdam', "date": '2013-01-01'},
        {"city_name": 'Amsterdam', "date": '2014-01-01'},
    ]
)
feature_vectors

In [None]:
feature_vectors_transformed = transform_data(feature_vectors, one_hot_encoder, standard_scaler)
feature_vectors_transformed.head(3)

In [None]:
prediction_feature_vectors = retrieved_keras_model.predict(feature_vectors_transformed)
prediction_feature_vectors

---