# (baseline) Most Popular Articles

Baseline model, just recommend the most popular articles over the training period.

In [1]:
!pip install polars



In [2]:
import os
os.chdir('/home/jovyan/work')

In [3]:
import polars as pl

In [13]:
import notebooks
from notebooks import data_storage

transactions = data_storage.load_dataset('transactions_train')

In [5]:
class MostPopularRecommender:
  def __init__(self, transactions: pl.DataFrame):
    self.transactions = transactions

  def get_recommendations(self, customer_ids: list, start_date, end_date) -> pl.DataFrame:
    most_popular = (
      transactions
      .lazy()
      .filter(
        (pl.col('t_dat') < end_date)
      & (pl.col('t_dat') > start_date)
      )
      .groupby('article_id')
      .agg([pl.count()])
      .sort('count', reverse=True)
      .head(12)
      .collect()
    )

    predictions = " ".join(map(str, most_popular['article_id'].to_list()))

    df = pl.DataFrame({
      'customer_id': customer_ids,
      'prediction': [predictions] * len(customer_ids),
    })

    return df

In [6]:
sample_submission = pl.read_csv('data/sample_submission.csv')

In [7]:
transactions.select([pl.max('t_dat').alias('max_date'), pl.min('t_dat').alias('min_date')])

max_date,min_date
str,str
,


In [8]:
rec = MostPopularRecommender(transactions)
submission = rec.get_recommendations(sample_submission['customer_id'], '2018-09-20', '2020-09-23')

In [9]:
submission

customer_id,prediction
str,str
"""00000dbacae5abe5e23885899a1fa44253a17956c6d1c3d25f88aa139fdfc657""","""0706016001 0706016002 0372860001 0610776002 0759871002 0464297007 0372860002 0610776001 0399223001 0706016003 0720125001 0156231001"""
"""0000423b00ade91418cceaf3b26c6af3dd342b51fd051eec9c12fb36984420fa""","""0706016001 0706016002 0372860001 0610776002 0759871002 0464297007 0372860002 0610776001 0399223001 0706016003 0720125001 0156231001"""
"""000058a12d5b43e67d225668fa1f8d618c13dc232df0cad8ffe7ad4a1091e318""","""0706016001 0706016002 0372860001 0610776002 0759871002 0464297007 0372860002 0610776001 0399223001 0706016003 0720125001 0156231001"""
"""00005ca1c9ed5f5146b52ac8639a40ca9d57aeff4d1bd2c5feb1ca5dff07c43e""","""0706016001 0706016002 0372860001 0610776002 0759871002 0464297007 0372860002 0610776001 0399223001 0706016003 0720125001 0156231001"""
"""00006413d8573cd20ed7128e53b7b13819fe5cfc2d801fe7fc0f26dd8d65a85a""","""0706016001 0706016002 0372860001 0610776002 0759871002 0464297007 0372860002 0610776001 0399223001 0706016003 0720125001 0156231001"""
"""000064249685c11552da43ef22a5030f35a147f723d5b02ddd9fd22452b1f5a6""","""0706016001 0706016002 0372860001 0610776002 0759871002 0464297007 0372860002 0610776001 0399223001 0706016003 0720125001 0156231001"""
"""0000757967448a6cb83efb3ea7a3fb9d418ac7adf2379d8cd0c725276a467a2a""","""0706016001 0706016002 0372860001 0610776002 0759871002 0464297007 0372860002 0610776001 0399223001 0706016003 0720125001 0156231001"""
"""00007d2de826758b65a93dd24ce629ed66842531df6699338c5570910a014cc2""","""0706016001 0706016002 0372860001 0610776002 0759871002 0464297007 0372860002 0610776001 0399223001 0706016003 0720125001 0156231001"""
"""00007e8d4e54114b5b2a9b51586325a8d0fa74ea23ef77334eaec4ffccd7ebcc""","""0706016001 0706016002 0372860001 0610776002 0759871002 0464297007 0372860002 0610776001 0399223001 0706016003 0720125001 0156231001"""
"""00008469a21b50b3d147c97135e25b4201a8c58997f78782a0cc706645e14493""","""0706016001 0706016002 0372860001 0610776002 0759871002 0464297007 0372860002 0610776001 0399223001 0706016003 0720125001 0156231001"""


In [10]:
submission.write_csv('most-popular-submission.csv')

In [11]:
submission2 = rec.get_recommendations(sample_submission['customer_id'], '2020-09-14', '2020-09-23')
submission2.write_csv('most-popular-recent-submission.csv')

In [12]:
transactions['article_id']

shape: (31788324,)
Series: 'article_id' [str]
[
	"0663713001"
	"0541518023"
	"0505221004"
	"0685687003"
	"0685687004"
	"0685687001"
	"0505221001"
	"0688873012"
	"0501323011"
	"0598859003"
	"0688873020"
	"0688873011"
	...
	"0791587021"
	"0910949002"
	"0701472004"
	"0456163087"
	"0832505003"
	"0902288001"
	"0856440002"
	"0929511001"
	"0891322004"
	"0918325001"
	"0833459002"
	"0898573003"
]