In [2]:
%load_ext autoreload
%autoreload 2

# Feature Pipeline: Computing features

In [91]:
import os
import sys

repo_path = os.path.abspath(os.path.join(os.getcwd(), ".."))
sys.path.append(repo_path)

import torch
import warnings
import polars as pl

from pprint import pprint
from loguru import logger
from sentence_transformers import SentenceTransformer

# Config
from recsys.config import settings

# Feature Store
# from recsys import gcp_integrations
from recsys.gcp_integrations import feature_store, bq_utils

# Features
from recsys.features.articles import (
    compute_features_articles,
    generate_embeddings_for_dataframe,
)
from recsys.features.ranking import compute_rankings_dataset
from recsys.features.customers import DatasetSampler, compute_features_customers
from recsys.features.interaction import generate_interaction_data
from recsys.features.transactions import compute_features_transactions

# Raw Data
from recsys.raw_data_sources import h_and_m_raw_data

warnings.filterwarnings("ignore")

In [92]:
ONLINE: bool = False

# 🗄️ Articles data

The **article_id** and **product_code** serve different purposes in the context of H&M's product database:

- **Article ID**: This is a unique identifier assigned to each individual article within the database. It is typically used for internal tracking and management purposes. Each distinct item or variant of a product (e.g., different sizes or colors) would have its own unique article_id.

- **Product Code**: This is also a unique identifier, but it is associated with a specific product or style rather than individual articles. It represents a broader category or type of product within H&M's inventory. Multiple articles may share the same product code if they belong to the same product line or style.

While both are unique identifiers, the article_id is specific to individual items, whereas the product_code represents a broader category or style of product.

Here is an example:

**Product: Basic T-Shirt**

- **Product Code:** TS001

- **Article IDs:**
    - Article ID: 1001 (Size: Small, Color: White)
    - Article ID: 1002 (Size: Medium, Color: White)
    - Article ID: 1003 (Size: Large, Color: White)
    - Article ID: 1004 (Size: Small, Color: Black)
    - Article ID: 1005 (Size: Medium, Color: Black)

In this example, "TS001" is the product code for the basic t-shirt style. Each variant of this t-shirt (e.g., different sizes and colors) has its own unique article_id.



In [120]:
if ONLINE:
    articles_df = h_and_m_raw_data.extract_articles_df()
else:
    articles_df = pl.read_csv(source=f"{repo_path}/data/articles.csv")

articles_df.shape

(105542, 25)

In [121]:
articles_df.head(3)

article_id,product_code,prod_name,product_type_no,product_type_name,product_group_name,graphical_appearance_no,graphical_appearance_name,colour_group_code,colour_group_name,perceived_colour_value_id,perceived_colour_value_name,perceived_colour_master_id,perceived_colour_master_name,department_no,department_name,index_code,index_name,index_group_no,index_group_name,section_no,section_name,garment_group_no,garment_group_name,detail_desc
i64,i64,str,i64,str,str,i64,str,i64,str,i64,str,i64,str,i64,str,str,str,i64,str,i64,str,i64,str,str
108775015,108775,"""Strap top""",253,"""Vest top""","""Garment Upper body""",1010016,"""Solid""",9,"""Black""",4,"""Dark""",5,"""Black""",1676,"""Jersey Basic""","""A""","""Ladieswear""",1,"""Ladieswear""",16,"""Womens Everyday Basics""",1002,"""Jersey Basic""","""Jersey top with narrow shoulde…"
108775044,108775,"""Strap top""",253,"""Vest top""","""Garment Upper body""",1010016,"""Solid""",10,"""White""",3,"""Light""",9,"""White""",1676,"""Jersey Basic""","""A""","""Ladieswear""",1,"""Ladieswear""",16,"""Womens Everyday Basics""",1002,"""Jersey Basic""","""Jersey top with narrow shoulde…"
108775051,108775,"""Strap top (1)""",253,"""Vest top""","""Garment Upper body""",1010017,"""Stripe""",11,"""Off White""",1,"""Dusty Light""",9,"""White""",1676,"""Jersey Basic""","""A""","""Ladieswear""",1,"""Ladieswear""",16,"""Womens Everyday Basics""",1002,"""Jersey Basic""","""Jersey top with narrow shoulde…"


## Articles feature engineering

In [122]:
articles_df = compute_features_articles(articles_df, ONLINE, repo_path)
articles_df.shape

(105542, 27)

In [123]:
articles_df.head(3)

article_id,product_code,prod_name,product_type_no,product_type_name,product_group_name,graphical_appearance_no,graphical_appearance_name,colour_group_code,colour_group_name,perceived_colour_value_id,perceived_colour_value_name,perceived_colour_master_id,perceived_colour_master_name,department_no,department_name,index_code,index_name,index_group_no,index_group_name,section_no,section_name,garment_group_no,garment_group_name,prod_name_length,article_description,image_url
str,i64,str,i64,str,str,i64,str,i64,str,i64,str,i64,str,i64,str,str,str,i64,str,i64,str,i64,str,u32,str,str
"""108775015""",108775,"""Strap top""",253,"""Vest top""","""Garment Upper body""",1010016,"""Solid""",9,"""Black""",4,"""Dark""",5,"""Black""",1676,"""Jersey Basic""","""A""","""Ladieswear""",1,"""Ladieswear""",16,"""Womens Everyday Basics""",1002,"""Jersey Basic""",9,"""Strap top - Vest top in Garmen…","""/Users/galcala/Desktop/Github/…"
"""108775044""",108775,"""Strap top""",253,"""Vest top""","""Garment Upper body""",1010016,"""Solid""",10,"""White""",3,"""Light""",9,"""White""",1676,"""Jersey Basic""","""A""","""Ladieswear""",1,"""Ladieswear""",16,"""Womens Everyday Basics""",1002,"""Jersey Basic""",9,"""Strap top - Vest top in Garmen…","""/Users/galcala/Desktop/Github/…"
"""108775051""",108775,"""Strap top (1)""",253,"""Vest top""","""Garment Upper body""",1010017,"""Stripe""",11,"""Off White""",1,"""Dusty Light""",9,"""White""",1676,"""Jersey Basic""","""A""","""Ladieswear""",1,"""Ladieswear""",16,"""Womens Everyday Basics""",1002,"""Jersey Basic""",13,"""Strap top (1) - Vest top in Ga…","""/Users/galcala/Desktop/Github/…"


## Create embeddings from the articles description

In [124]:
for i, desc in enumerate(articles_df["article_description"].head(n=3)):
    logger.info(f"Item {i + 1}:\n{desc}")

[32m2025-02-10 09:52:41.494[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m2[0m - [1mItem 1:
Strap top - Vest top in Garment Upper body
 Apperance: Solid
 Color: Dark Black 9
 Category: Ladieswear Womens Everyday Basics Jersey Basic
 Details: Jersey top with narrow shoulder straps.[0m
[32m2025-02-10 09:52:41.495[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m2[0m - [1mItem 2:
Strap top - Vest top in Garment Upper body
 Apperance: Solid
 Color: Light White 10
 Category: Ladieswear Womens Everyday Basics Jersey Basic
 Details: Jersey top with narrow shoulder straps.[0m
[32m2025-02-10 09:52:41.495[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m2[0m - [1mItem 3:
Strap top (1) - Vest top in Garment Upper body
 Apperance: Stripe
 Color: Dusty Light White 11
 Category: Ladieswear Womens Everyday Basics Jersey Basic
 Details: Jersey top with narrow shoulder straps.[0m


In [125]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)

logger.info(
    f"Loading ${settings.FEATURES_EMBEDDING_MODEL_ID} embedding model to {device=}"
)

# Load embedding model from SentenceTransformers model registry
model = SentenceTransformer(settings.FEATURES_EMBEDDING_MODEL_ID, device=device)

[32m2025-02-10 09:52:42.282[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m9[0m - [1mLoading $all-MiniLM-L6-v2 embedding model to device='mps'[0m


In [126]:
articles_df = generate_embeddings_for_dataframe(
    articles_df, "article_description", model, batch_size=128
)

Generating embeddings...: 100%|██████████| 105542/105542 [05:59<00:00, 293.88it/s]


For each article description, we have a numerical vector which we can feed to a model, opposite to a string containing the description of an object.

In [127]:
articles_df[["article_description", "embeddings"]].head(3)

article_description,embeddings
str,list[f64]
"""Strap top - Vest top in Garmen…","[-0.023064, 0.069894, … 0.012397]"
"""Strap top - Vest top in Garmen…","[-0.011304, 0.075073, … 0.016485]"
"""Strap top (1) - Vest top in Ga…","[-0.033244, 0.081816, … 0.020111]"


In [128]:
articles_df["image_url"][0]

'/Users/galcala/Desktop/Github/GenAI_Custom_Real_Time_Personalized_Recommender/data/images/010/0108775015.jpg'

In [129]:
from IPython.display import HTML, display

image_urls = articles_df["image_url"].tail(12).to_list()
grid_html = '<div style="display: grid; grid-template-columns: repeat(6, 1fr); gap: 10pxl max-width: 900px;">'

for url in image_urls:
    grid_html += f'<img src="{url}" style="width: 100%; height: auto;">'

grid_html += "</div>"

display(HTML(grid_html))

# 👯‍♀️ Customers Data

In [132]:
if ONLINE:
    customers_df = h_and_m_raw_data.extract_customers_df()
else:
    customers_df = pl.read_csv(source=f"{repo_path}/data/customers.csv")

customers_df.shape

(1371980, 7)

In [133]:
customers_df.head(3)

customer_id,FN,Active,club_member_status,fashion_news_frequency,age,postal_code
str,f64,f64,str,str,i64,str
"""00000dbacae5abe5e23885899a1fa4…",,,"""ACTIVE""","""NONE""",49,"""52043ee2162cf5aa7ee79974281641…"
"""0000423b00ade91418cceaf3b26c6a…",,,"""ACTIVE""","""NONE""",25,"""2973abc54daa8a5f8ccfe9362140c6…"
"""000058a12d5b43e67d225668fa1f8d…",,,"""ACTIVE""","""NONE""",24,"""64f17e6a330a85798e4998f62d0930…"


In [134]:
customers_df.null_count()

customer_id,FN,Active,club_member_status,fashion_news_frequency,age,postal_code
u32,u32,u32,u32,u32,u32,u32
0,895050,907576,6062,16009,15861,0


## Customers feature engineering

In [135]:
customers_df.head(3)

customer_id,FN,Active,club_member_status,fashion_news_frequency,age,postal_code
str,f64,f64,str,str,i64,str
"""00000dbacae5abe5e23885899a1fa4…",,,"""ACTIVE""","""NONE""",49,"""52043ee2162cf5aa7ee79974281641…"
"""0000423b00ade91418cceaf3b26c6a…",,,"""ACTIVE""","""NONE""",25,"""2973abc54daa8a5f8ccfe9362140c6…"
"""000058a12d5b43e67d225668fa1f8d…",,,"""ACTIVE""","""NONE""",24,"""64f17e6a330a85798e4998f62d0930…"


In [136]:
customers_df = compute_features_customers(customers_df, drop_null_age=True)
customers_df.shape

(1356119, 5)

In [137]:
customers_df.head(3)

customer_id,club_member_status,age,postal_code,age_group
str,str,f64,str,str
"""00000dbacae5abe5e23885899a1fa4…","""ACTIVE""",49.0,"""52043ee2162cf5aa7ee79974281641…","""46-55"""
"""0000423b00ade91418cceaf3b26c6a…","""ACTIVE""",25.0,"""2973abc54daa8a5f8ccfe9362140c6…","""19-25"""
"""000058a12d5b43e67d225668fa1f8d…","""ACTIVE""",24.0,"""64f17e6a330a85798e4998f62d0930…","""19-25"""


# 🧾 Transactions Data

In [140]:
if ONLINE:
    transactions_df = h_and_m_raw_data.extract_transactions_df()
else:
    transactions_df = pl.read_csv(source=f"{repo_path}/data/transactions_train.csv")

transactions_df.shape

(31788324, 5)

In [141]:
transactions_df.head(3)

t_dat,customer_id,article_id,price,sales_channel_id
str,str,i64,f64,i64
"""2018-09-20""","""000058a12d5b43e67d225668fa1f8d…",663713001,0.050831,2
"""2018-09-20""","""000058a12d5b43e67d225668fa1f8d…",541518023,0.030492,2
"""2018-09-20""","""00007d2de826758b65a93dd24ce629…",505221004,0.015237,2


## Transactions feature engineering

In [145]:
transactions_df["t_dat"].head(3)

t_dat
str
"""2018-09-20"""
"""2018-09-20"""
"""2018-09-20"""


In [147]:
transactions_df = compute_features_transactions(transactions_df)
transactions_df.shape

(31788324, 9)

The time of the year a purchase was made should be a strong predictor, as seasonality plays a big factor in fashion purchases. Here, you will use the month of the purchase as a feature. Since this is a cyclical feature (January is as close to December as it is to February), you'll map each month to the unit circle using sine and cosine.

Thus, the features of the transactions DataFrame look as follows:

In [148]:
transactions_df.head(3)

t_dat,customer_id,article_id,price,sales_channel_id,year,month,day,day_of_week
i64,str,str,f64,i64,i32,i8,i8,i8
1537401600,"""000058a12d5b43e67d225668fa1f8d…","""663713001""",0.050831,2,2018,9,20,4
1537401600,"""000058a12d5b43e67d225668fa1f8d…","""541518023""",0.030492,2,2018,9,20,4
1537401600,"""00007d2de826758b65a93dd24ce629…","""505221004""",0.015237,2,2018,9,20,4


We don't want to work with ~30 million transactions in these series, as everything will take too much time to run. Thus, we create a subset of the original dataset by randomly sampling from the customers' datasets and taking only their transactions.

In [153]:
sampler = DatasetSampler(size=settings.CUSTOMER_DATA_SIZE)

dataset_subset = sampler.sample(
    customers_df=customers_df, transactions_df=transactions_df
)

customers_df = dataset_subset["customers"]
transactions_df = dataset_subset["transactions"]

[32m2025-02-10 10:28:19.307[0m | [1mINFO    [0m | [36mrecsys.features.customers[0m:[36msample[0m:[36m29[0m - [1mSampling 1000 customers.[0m
[32m2025-02-10 10:28:19.710[0m | [1mINFO    [0m | [36mrecsys.features.customers[0m:[36msample[0m:[36m32[0m - [1mNumber of transactions for all the customers: 31788324[0m
[32m2025-02-10 10:28:21.564[0m | [1mINFO    [0m | [36mrecsys.features.customers[0m:[36msample[0m:[36m38[0m - [1mNumber of transactions for the 1000 sampled customers: 23799[0m


In [154]:
transactions_df.shape

(23799, 9)

Some of the remaining customers:

In [155]:
for customer_id in transactions_df["customer_id"].unique().head(10):
    logger.info(f"Logging customer ID: {customer_id}")

[32m2025-02-10 10:28:26.708[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m2[0m - [1mLogging customer ID: da184d6401df335febd070613311cdb5fd01d263b1311e95a422a52ef63f5744[0m
[32m2025-02-10 10:28:26.708[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m2[0m - [1mLogging customer ID: 6de346c93aa0becee020a55e9d10a82120b5b900a80a25ec29589c19ff1b2962[0m
[32m2025-02-10 10:28:26.708[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m2[0m - [1mLogging customer ID: f07e2b7fd97702bd88014812d367e3708f254e1df78849198c62219d3fcb9d70[0m
[32m2025-02-10 10:28:26.709[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m2[0m - [1mLogging customer ID: f8825cb1ea8a2014f8bdfa179c6590f71d3a0b71a58e41b261d8ac112c2553c1[0m
[32m2025-02-10 10:28:26.709[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m2[0m - [1mLogging customer ID: 38194592527e5a2af32c13b8baf4d761ca5084e69341e269a26e5f00b0765d3b[0m
[32m2025-02-10

# 🤳🏻 Interaction data

To train our models, we need more than just the transactions DataFrame. We need positive samples that signal whether a customer clicked or bought an item, but we also need negative samples that signal no interactions between a customer and an item.

In [157]:
interaction_df = generate_interaction_data(transactions_df)
interaction_df.shape

Processing customer chunks:   0%|          | 0/1 [00:03<?, ?it/s]


(135386, 5)

In [158]:
interaction_df.head()

t_dat,customer_id,article_id,interaction_score,prev_article_id
i64,str,str,i64,str
1254614400,"""00b203a32faa3d007dba198ef27c15…","""844734001""",0,"""START"""
1255694400,"""00b203a32faa3d007dba198ef27c15…","""844734001""",0,"""844734001"""
1256414400,"""00b203a32faa3d007dba198ef27c15…","""811585002""",0,"""844734001"""
1258934400,"""00b203a32faa3d007dba198ef27c15…","""633017004""",0,"""811585002"""
1258934400,"""00b203a32faa3d007dba198ef27c15…","""633017004""",0,"""633017004"""


Score distributions:

Here is what each score means:
- `0` : No interaction between a customer and an item
- `1` : A customer clicked an item
- `2` : A customer bought an item

In [159]:
interaction_df.group_by("interaction_score").agg(
    pl.count("interaction_score").alias("total_interactions")
)

interaction_score,total_interactions
i64,u32
0,73285
1,38302
2,23799


# Upload feature group

### Customers

In [160]:
customers_df.head()

customer_id,club_member_status,age,postal_code,age_group
str,str,f64,str,str
"""6f173c0d871007b835f77d16232bae…","""ACTIVE""",61.0,"""aacae4bc0b6cf9c6708b05f234512a…","""56-65"""
"""1ae5ddce4cdf0f21ae41379a6edad9…","""ACTIVE""",23.0,"""2c29ae653a9282cce4151bd87643c9…","""19-25"""
"""b903a2e7b1fba1924a2332c1b69fc4…","""ACTIVE""",25.0,"""612ae24e8d0ea791da6b5b4de91714…","""19-25"""
"""f17d07ee3b52dc06ba23e5dbd0621a…","""ACTIVE""",33.0,"""caaf654c6a82da724a42162a80f037…","""26-35"""
"""27e13eb33e457d98f0f7fd086231a4…","""ACTIVE""",35.0,"""f919da058a4a967dd830c3cbb3c657…","""26-35"""


In [None]:
logger.info("Uploading 'customers' Feature to BigQuery.")
bq_utils.load_features_to_bigquery(df=customers_df)
logger.info("✅ Uploaded 'customers' Feature to BigQuery!")

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
[32m2025-02-06 12:46:56.957[0m | [1mINFO    [0m | [36mrecsys.gcp_integrations.feature_store[0m:[36mupload_dataframe_to_bigquery[0m:[36m96[0m - [1mUploading DataFrame to BigQuery table: recsys-dev-gonzo.recsys_dataset.recsys_customers[0m
[32m2025-02-06 12:46:56.958[0m | [1mINFO    [0m | [36mrecsys.gcp_integrations.feature_store[0m:[36mupload_dataframe_to_bigquery[0m:[36m100[0m - [1mConverting Polars DataFrame to Pandas[0m
[32m2025-02-06 12:46:57.001[0m | [1mINFO    [0m | [36mrecsys.gcp_integrations.feature_store[0m:[36mupload_dataframe_to_bigquery[0m:[36m105[0m - [1mDataFrame shape: (1000, 5)[0m
[32m2025-02-06 12:47:04.367[0m | [1mINFO    [0m | [36mrecsys.gcp_integrati

### Articles

In [81]:
logger.info("Uploading 'articles' Feature to BigQuery.")
bq_utils.load_features_to_bigquery(articles_df=articles_df)
logger.info("✅ Uploaded 'articles' Feature to BigQuery!")

[32m2025-02-06 13:46:10.303[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m1[0m - [1mUploading 'articles' Feature to BigQuery.[0m
[32m2025-02-06 13:46:10.425[0m | [34m[1mDEBUG   [0m | [36mrecsys.gcp_integrations.bq_utils[0m:[36mconvert_types_for_bigquery[0m:[36m55[0m - [34m[1mConverted article_id to STRING[0m
[32m2025-02-06 13:46:10.427[0m | [34m[1mDEBUG   [0m | [36mrecsys.gcp_integrations.bq_utils[0m:[36mconvert_types_for_bigquery[0m:[36m55[0m - [34m[1mConverted prod_name to STRING[0m
[32m2025-02-06 13:46:10.428[0m | [34m[1mDEBUG   [0m | [36mrecsys.gcp_integrations.bq_utils[0m:[36mconvert_types_for_bigquery[0m:[36m55[0m - [34m[1mConverted prod_name_length to INTEGER[0m
[32m2025-02-06 13:46:10.429[0m | [34m[1mDEBUG   [0m | [36mrecsys.gcp_integrations.bq_utils[0m:[36mconvert_types_for_bigquery[0m:[36m55[0m - [34m[1mConverted product_type_name to STRING[0m
[32m2025-02-06 13:46:10.431[0m | [34m[1mDEBUG   [

### Transactions

In [89]:
transactions_df.head()

t_dat,customer_id,article_id,price,sales_channel_id,year,month,day,day_of_week
i64,str,str,f64,i64,i32,i8,i8,i8
0,"""25f0bcb3b9e6fffee7541b02d21e48…","""541518004""",0.030492,1,2018,9,20,4
0,"""25f0bcb3b9e6fffee7541b02d21e48…","""654564002""",0.016932,1,2018,9,20,4
0,"""25f0bcb3b9e6fffee7541b02d21e48…","""372860002""",0.013542,1,2018,9,20,4
0,"""4e82363f3c5a710922073cdf626309…","""673285001""",0.025407,1,2018,9,20,4
0,"""4e82363f3c5a710922073cdf626309…","""564314018""",0.025407,1,2018,9,20,4


In [None]:
logger.info("Uploading 'transactions' Feature to BigQuery.")
bq_utils.load_features_to_bigquery(articles_df=transactions_df)
logger.info("✅ Uploaded 'transactions' Feature to BigQuery!")

### Interactions

In [163]:
interaction_df.head(3)

t_dat,customer_id,article_id,interaction_score,prev_article_id
i64,str,str,i64,str
1254614400,"""00b203a32faa3d007dba198ef27c15…","""844734001""",0,"""START"""
1255694400,"""00b203a32faa3d007dba198ef27c15…","""844734001""",0,"""844734001"""
1256414400,"""00b203a32faa3d007dba198ef27c15…","""811585002""",0,"""844734001"""


In [None]:
logger.info("Uploading 'interactions' Feature to BigQuery.")
bq_utils.load_features_to_bigquery(articles_df=interaction_df)
logger.info("✅ Uploaded 'interactions' Feature to BigQuery!")

# Compute ranking dataset

## TO-DO: REPLACE THE DFS WITH THE DATA FROM VERTEX AI FEATURE ONLINE STORE AS WE WANT TO REFLECT THE LINEAGE OF OUR SOURCE OF TRUTH

The last step is to compute the ranking dataset used to train the scoring/ranking model from the feature groups we've just created:


In [166]:
ranking_df = compute_rankings_dataset(transactions_df, articles_df, customers_df)

In [167]:
ranking_df.head(3)

customer_id,article_id,age,label,product_type_name,product_group_name,graphical_appearance_name,perceived_colour_value_name,perceived_colour_master_name,department_name,index_name,index_group_name,section_name,garment_group_name
str,str,f64,i32,str,str,str,str,str,str,str,str,str,str
"""7c369adb7ca840f709eb0a199c0ab9…","""630084007""",34.0,1,"""Shorts""","""Garment Lower body""","""Solid""","""Dark""","""Khaki green""","""Shorts""","""Menswear""","""Menswear""","""Contemporary Casual""","""Shorts"""
"""98d51986e39d835efe6c4386715e3f…","""788176003""",63.0,1,"""Dress""","""Garment Full body""","""All over pattern""","""Dark""","""Green""","""Dress""","""Ladieswear""","""Ladieswear""","""Womens Everyday Collection""","""Dresses Ladies"""
"""4d4e016ffb468285e5b6943f130f92…","""549251004""",66.0,1,"""Trousers""","""Garment Lower body""","""Denim""","""Medium Dusty""","""Blue""","""Woven bottoms""","""Ladieswear""","""Ladieswear""","""H&M+""","""Trousers"""


In [168]:
ranking_df.get_column("label").value_counts()

label,count
i32,u32
1,20376
0,203760


As the ranking dataset was computed based on articles, customers, and transactions Feature Views, we can reflect this lineage in the ranking Feature View.

In [None]:
logger.info("Uploading 'rankings' Feature to BigQuery.")
bq_utils.load_features_to_bigquery(articles_df=interaction_df)
logger.info("✅ Uploaded 'rankings' Feature to BigQuery!")