In [9]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

import hopsworks

In [10]:
project = hopsworks.login()
fs = project.get_feature_store()

Connection closed.
Connected. Call `.close()` to terminate connection gracefully.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/197784
Connected. Call `.close()` to terminate connection gracefully.


In [11]:
# Get a list of all CSV files in the data folder
data_folder = 'data'
csv_files = [f for f in os.listdir(data_folder) if f.endswith('.csv')]
# Loop through the CSV files

dataframes = []
for idx, csv_file in enumerate(csv_files):
        # Read the CSV file into a DataFrame
        df = pd.read_csv(os.path.join(data_folder, csv_file), sep=";")
        
        # Drop duplicate rows
        df = df.drop_duplicates()
        if idx == 0:
            df['wine_type'] = 0 # Red wine
        else:
            df['wine_type'] = 1 # White wine           

        dataframes.append(df)
        
for df in dataframes:
    df.loc[df.quality <= 5, 'quality'] = 1
    df.loc[df.quality == 6, 'quality'] = 2
    df.loc[df.quality > 6, 'quality'] = 3

dataframe_tot = pd.concat(dataframes)
dataframe_tot = dataframe_tot.drop_duplicates()

keep_cols = ["alcohol", "volatile acidity", "sulphates", "chlorides", "free sulfur dioxide", 'quality', 'wine_type']
dataframe_tot = dataframe_tot[keep_cols]

#rename columns to remove spaces
dataframe_tot.columns = dataframe_tot.columns.str.replace(' ', '_')

X, y = dataframe_tot.drop('quality', axis=1), dataframe_tot['quality']
y = pd.DataFrame(y, columns=['quality'])

In [12]:
feature_group = fs.get_or_create_feature_group(
    name="wine_features",
    version=1,
    primary_key=dataframe_tot.columns,
    description="Dataset with wine features, also containing the quality label as well as wine type",
)
feature_group.insert(dataframe_tot)

Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/197784/fs/197703/fg/237940


Uploading Dataframe: 0.00% |          | Rows 0/5320 | Elapsed Time: 00:00 | Remaining Time: ?

Launching job: wine_features_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai/p/197784/jobs/named/wine_features_1_offline_fg_materialization/executions


(<hsfs.core.job.Job at 0x7f2c01686d70>, None)

In [13]:
# Group the features by quality
features = X.columns
features_by_quality = { quality: None for quality in y['quality']}

for feature in features:
    for quality in features_by_quality:
        features_by_quality[quality] = X[y['quality'] == quality]

In [14]:
for quality in features_by_quality:
    fg_quality = fs.get_or_create_feature_group(
    name=f"wine_features_quality_{quality}",
    version=1,
    primary_key=features.tolist(),
    description=f"Dataset with wine features for quality {quality}",
    parents=[feature_group],       
    )
    feature_quality_df = pd.DataFrame(features_by_quality[quality], columns=features)
    fg_quality.insert(feature_quality_df)

Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/197784/fs/197703/fg/236919


Uploading Dataframe: 0.00% |          | Rows 0/1988 | Elapsed Time: 00:00 | Remaining Time: ?

Launching job: wine_features_quality_1_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai/p/197784/jobs/named/wine_features_quality_1_1_offline_fg_materialization/executions
Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/197784/fs/197703/fg/235902


Uploading Dataframe: 0.00% |          | Rows 0/2323 | Elapsed Time: 00:00 | Remaining Time: ?

Launching job: wine_features_quality_2_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai/p/197784/jobs/named/wine_features_quality_2_1_offline_fg_materialization/executions
Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/197784/fs/197703/fg/235903


Uploading Dataframe: 0.00% |          | Rows 0/1009 | Elapsed Time: 00:00 | Remaining Time: ?

Launching job: wine_features_quality_3_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai/p/197784/jobs/named/wine_features_quality_3_1_offline_fg_materialization/executions
