In [1]:
from typing import Optional, Union, Tuple, Iterable
import freerec
from freerec.data.preprocessing import AtomicConverter

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# dataset
root: str = "../RecSets/file_"
filename: Optional[str] = "ml-1m"
# 'root/filename should' the directory storing '.inter' file
# 'dataset' below is the name of the dataset after processing
dataset: str = "MovieLens1M"

# basic settings

star4pos: int = 0
kcore4user: int = 5
kcore4item: int = 5
ratios: Tuple[int, int, int] = (8, 1, 1)


In [3]:
# Converter

converter: AtomicConverter = AtomicConverter(
    root=root,
    filename=filename,
    dataset=dataset
)

In [4]:
# Make General dataset by ratios

converter.make_general_dataset(
    star4pos=star4pos,
    kcore4user=kcore4user,
    kcore4item=kcore4item,
    ratios=ratios,
)

[Converter] >>> Load `ml-1m.inter' ...
[Converter] >>> Load `ml-1m.user' ...
[Converter] >>> Load `ml-1m.item' ...
[Converter] >>> Filter dataframe according to Rating ...
[Converter] >>> Filter dataframe: User in [5, inf]; Item in [5, inf] ...
[Converter] >>> Current datasize: 1000209 ...
[Converter] >>> Current datasize: 999611
[Converter] >>> Current datasize: 999611
[Converter] >>> Map user ID to Token ...
[Converter] >>> Map item ID to Token ...
[Converter] >>> Sort by [User] [Timestamp] ...
[Converter] >>> Reserve fields: ['User', 'Item'] ...
[Converter] >>> Split by ratios: (8, 1, 1) ...
[Converter] >>> Save `train.txt' to ../RecSets/file_\General\MovieLens1M_550811_Chron ...
[Converter] >>> Save `valid.txt' to ../RecSets/file_\General\MovieLens1M_550811_Chron ...
[Converter] >>> Save `test.txt' to ../RecSets/file_\General\MovieLens1M_550811_Chron ...
[Converter] >>> Save `user.txt' to ../RecSets/file_\General\MovieLens1M_550811_Chron ...
[Converter] >>> Save `item.txt' to ../Re

In [5]:
# Make Sequential dataset by leaving last two

converter.make_sequential_dataset(
    star4pos=star4pos,
    kcore4user=kcore4user,
    kcore4item=kcore4item,
)

[Converter] >>> Load `ml-1m.inter' ...
[Converter] >>> Load `ml-1m.user' ...
[Converter] >>> Load `ml-1m.item' ...
[Converter] >>> Filter dataframe according to Rating ...
[Converter] >>> Filter dataframe: User in [5, inf]; Item in [5, inf] ...
[Converter] >>> Current datasize: 1000209 ...
[Converter] >>> Current datasize: 999611
[Converter] >>> Current datasize: 999611
[Converter] >>> Map user ID to Token ...
[Converter] >>> Map item ID to Token ...
[Converter] >>> Sort by [User] [Timestamp] ...
[Converter] >>> Reserve fields: ['User', 'Item', 'Timestamp'] ...
[Converter] >>> Split by leaving last two ...
[Converter] >>> Save `train.txt' to ../RecSets/file_\Sequential\MovieLens1M_550_Chron ...
[Converter] >>> Save `valid.txt' to ../RecSets/file_\Sequential\MovieLens1M_550_Chron ...
[Converter] >>> Save `test.txt' to ../RecSets/file_\Sequential\MovieLens1M_550_Chron ...
[Converter] >>> Save `user.txt' to ../RecSets/file_\Sequential\MovieLens1M_550_Chron ...
[Converter] >>> Save `item.t

In [6]:
# Make Sequential dataset by ratios

converter.make_sequential_dataset_by_ratio(
    star4pos=star4pos,
    kcore4user=kcore4user,
    kcore4item=kcore4item,
)

[Converter] >>> Load `ml-1m.inter' ...
[Converter] >>> Load `ml-1m.user' ...
[Converter] >>> Load `ml-1m.item' ...
[Converter] >>> Filter dataframe according to Rating ...
[Converter] >>> Filter dataframe: User in [5, inf]; Item in [5, inf] ...
[Converter] >>> Current datasize: 1000209 ...
[Converter] >>> Current datasize: 999611
[Converter] >>> Current datasize: 999611
[Converter] >>> Map user ID to Token ...
[Converter] >>> Map item ID to Token ...
[Converter] >>> Sort by [User] [Timestamp] ...
[Converter] >>> Reserve fields: ['User', 'Item', 'Timestamp'] ...
[Converter] >>> Split by ratios: (8, 1, 1) ...
[Converter] >>> Save `train.txt' to ../RecSets/file_\Sequential\MovieLens1M_550811_Chron ...
[Converter] >>> Save `valid.txt' to ../RecSets/file_\Sequential\MovieLens1M_550811_Chron ...
[Converter] >>> Save `test.txt' to ../RecSets/file_\Sequential\MovieLens1M_550811_Chron ...
[Converter] >>> Save `user.txt' to ../RecSets/file_\Sequential\MovieLens1M_550811_Chron ...
[Converter] >>>