# This notebook should receive features from hopswork train a model and store it back in hopswork

In [77]:
import hopsworks
import pandas as pd
from hsml.schema import Schema
from hsml.model_schema import ModelSchema
import joblib
import os
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error

In [2]:
project = hopsworks.login()
fs = project.get_feature_store()

Connected. Call `.close()` to terminate connection gracefully.





Multiple projects found. 

	 (1) SMDL_A01
	 (2) kompot

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/194711
Connected. Call `.close()` to terminate connection gracefully.


In [3]:
import hsfs
connection = hsfs.connection()
fs = connection.get_feature_store(name='smdl_a01_featurestore')
fv = fs.get_feature_view('merged_swells_huntington', version=4)

Connected. Call `.close()` to terminate connection gracefully.


In [55]:
X_train, X_test, y_train, y_test = fv.train_test_split(0.2,read_options={"use_hive": True})



Finished: Reading data from Hopsworks, using Hive (1.60s) 




In [56]:
X_test.shape

(4, 3)

In [57]:
X_train.shape

(14, 3)

In [83]:


class LinReg(LinearRegression):

    encoding = {'Poor':0.0,
                'Poor To Fair':1.0,
                'Fair':2.0,
                'Fair To Good':3.0}

    def __init__(self):
        super().__init__()

    def fit(self , X, y, sample_weight = None):
        y = self.encode(y)
        return super().fit(X, y, sample_weight)

    def predict_labels(self, X):
        y =  np.round(super().predict(X))
        return self.decode(y)
    
    def score(self, X, y, sample_weight = None):
        y = self.encode(y)
        return super().score(X, y, sample_weight)
    
    def encode(self,arr):
        arr = arr.copy()
        try:
            arr = arr.values
        except AttributeError:
            pass
        
        for key,val in self.encoding.items():
            arr[arr==key] = val
        arr = arr.astype(int)
        return arr
    
    def decode(self,arr):
        arr = arr.copy()
        arr = arr.astype(str).reshape(-1)
        for key,val in self.encoding.items():
            arr[arr==str(val)] = key
        return arr

In [84]:
reg = LinReg().fit(X_train,y_train)
reg.score(X_train, y_train)

0.4971454271190481

In [86]:
df = y_test.copy()
df['prediction'] = reg.predict_labels(X_test)
df['abs_err'] = np.abs(reg.encode(y_test).reshape(-1) - reg.encode(df['prediction']))
df

Unnamed: 0,quality,prediction,abs_err
2,Poor,Fair To Good,3
6,Fair,Fair To Good,1
9,Poor,Fair,2
13,Fair To Good,Poor To Fair,2


In [96]:
metrics = {'train_r2':reg.score(X_train, y_train),
           'test_r2':reg.score(X_test,y_test),
           'RMSE': np.sqrt(mean_squared_error(reg.encode(y_test),reg.predict(X_test))),
           'MSE': mean_squared_error(reg.encode(y_test),reg.predict(X_test)),
           'MAE':mean_absolute_error(reg.encode(y_test),reg.predict(X_test))}

In [97]:
metrics

{'train_r2': 0.4971454271190481,
 'test_r2': -1.6022222598257874,
 'RMSE': 2.095530974110384,
 'MSE': 4.391250063456016,
 'MAE': 1.8475893691041207}

In [98]:
# We will now upload our model to the Hopsworks Model Registry. First get an object for the model registry.
mr = project.get_model_registry()

# The contents of the 'iris_model' directory will be saved to the model registry. Create the dir, first.
model_dir="wave_model"
if os.path.isdir(model_dir) == False:
    os.mkdir(model_dir)

# Save both our model and the confusion matrix to 'model_dir', whose contents will be uploaded to the model registry
joblib.dump(reg, model_dir + "/wave_reg.pkl") 

# Specify the schema of the model's input/output using the features (X_train) and labels (y_train)
input_schema = Schema(X_train)
output_schema = Schema(y_train)
model_schema = ModelSchema(input_schema, output_schema)

# Create an entry in the model registry that includes the model's name, desc, metrics
wave_reg = mr.python.create_model(
    name="wave_reg", 
    metrics=metrics,
    model_schema=model_schema,
    description="Surf quality model"
)

# Upload the model to the model registry, including all files in 'model_dir'
wave_reg.save(model_dir)



Connected. Call `.close()` to terminate connection gracefully.


  0%|          | 0/6 [00:00<?, ?it/s]

Model created, explore it at https://c.app.hopsworks.ai:443/p/194711/models/wave_reg/1


Model(name: 'wave_reg', version: 1)