## Introduction to feature groups with Hopsworks
**This notebook inmports 3 CSV**; Titanic's passengers, tickets and survivors.
We then perform some minor feature engineering (binning and one-hot-encoding) and process to create a feature group for each. 

In [None]:
!pip install -U hopsworks --quiet

In [None]:
import hopsworks
import pandas as pd

In [None]:
project = hopsworks.login()
fs = project.get_feature_store()

In [None]:
titanic_passengers_df = pd.read_csv("https://repo.hops.works/dev/jdowling/titanic/titanic_passengers.csv")
titanic_passengers_df

In [None]:
titanic_tickets_df = pd.read_csv("https://repo.hops.works/dev/jdowling/titanic/titanic_tickets.csv")
titanic_tickets_df

In [None]:
titanic_survived_df = pd.read_csv("https://repo.hops.works/dev/jdowling/titanic/titanic_survived.csv")
titanic_survived_df

## Feature Engineering

Encode all the categorical features as number

In [None]:
titanic_passengers_df['sex'].replace('female', 0,inplace=True)
titanic_passengers_df['sex'].replace('male', 1,inplace=True)
titanic_passengers_df['sex'].astype(int)


titanic_passengers_df['age_bin'] = pd.cut(titanic_passengers_df['age'], bins=[0,12,19,40, 65, 130], 
                                          labels=['child','teen','young_adult','middle_aged','pensioner'])
titanic_passengers_df = pd.get_dummies(titanic_passengers_df, columns=["age_bin"], prefix=["age"])

titanic_passengers_df

In [None]:
titanic_tickets_df['embarked'].replace('S', 0,inplace=True)
titanic_tickets_df['embarked'].replace('C', 1,inplace=True)
titanic_tickets_df['embarked'].replace('Q', 2,inplace=True) 
titanic_tickets_df['embarked'].astype(int)

titanic_tickets_df

In [None]:
titanic_passengers_fg = fs.get_or_create_feature_group(
    name="titanic_passengers",
    version=1,
    primary_key=["passenger_id"]
)
titanic_passengers_fg.insert(titanic_passengers_df, write_options={"wait_for_job" : False})

In [None]:
titanic_tickets_fg = fs.get_or_create_feature_group(
    name="titanic_tickets",
    version=1,
    primary_key=["passenger_id"]
)
titanic_tickets_fg.insert(titanic_tickets_df, write_options={"wait_for_job" : False})

In [None]:
titanic_survived_fg = fs.get_or_create_feature_group(
    name="titanic_survived",
    version=1,
    primary_key=["passenger_id"]
)
titanic_survived_fg.insert(titanic_survived_df, write_options={"wait_for_job" : False})