In [11]:
from elliot.run import run_experiment
from lib.data_generation import generate_data
import zipfile
import io
import requests
import os
import pandas as pd
import numpy as np
import yaml
import copy
from lenskit import crossfold as xf

### DMF experiment

Generate train-test splits.

In [2]:
location = 'elliot/data/'

In [12]:
current_splits = []
data_strategy = 'ml1m'
# generate the data
ratings = pd.read_csv(
    'elliot/data/movielens_1m/ml1m_events.dat', header=None, sep="::", engine="python"
).drop(3, axis=1)
ratings.columns = ["user", "item", "rating"]

sample = xf.SampleFrac(0.2, rng_spec=0)
sets = [i for i in enumerate(xf.partition_users(ratings, 5, sample, rng_spec=0))]
for j, tp in sets:
    current_splits.append([tp[0], tp[1]])

    tp[0].to_csv(
        location + data_strategy + "_fold_" + str(j + 1) + "_train.csv", index=False
    )
    tp[1].to_csv(
        location + data_strategy + "_fold_" + str(j + 1) + "_test.csv", index=False
    )



Change data format.

In [13]:
for i in range(1, 6):
    tr = pd.read_csv("elliot/data/ml1m_fold_"+str(i)+"_train.csv")
    te = pd.read_csv("elliot/data/ml1m_fold_"+str(i)+"_test.csv")
    np.savetxt("elliot/data/ml1m_fold_"+str(i)+"_train.tsv", tr,delimiter='\t',fmt='%i')
    np.savetxt("elliot/data/ml1m_fold_"+str(i)+"_test.tsv", te,delimiter='\t',fmt='%i')
    

Experiment.

In [14]:
# possible values for the hyperparameters
mlp_values = ['(64,32)', '(64,64)']
batch_size_values = [256, 512]

In [15]:
for i in range(1, 6):
    print('Start for ', i, '!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
    with open('elliot/config_files/ml1m'+str(i)+'.yml', 'r') as f: # open the relevant yaml file
        base_config = yaml.safe_load(f)
    
    for mlp in mlp_values:
        for batch_size in batch_size_values:
            print("We re doing the following: ", mlp, batch_size)
            # Make a copy of the base configuration
            config = copy.deepcopy(base_config)
            # Update the configuration with the current hyperparameters
            config['experiment']['models']['DMF']['user_mlp'] = mlp
            config['experiment']['models']['DMF']['item_mlp'] = mlp
            config['experiment']['models']['DMF']['batch_size'] = batch_size

            # Write the configuration to a temporary file
            with open('elliot/config_files/temp_config.yml', 'w') as f:
                yaml.dump(config, f)

            # Run the experiment with the current configuration
            run_experiment('elliot/config_files/temp_config.yml')
            
            
            # Remove the temp file
            os.remove('elliot/config_files/temp_config.yml')    

Start for  1 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
We re doing the following:  (64,32) 256
2024-04-23 16:48:32.900116: I Start experiment
2024-04-23 16:48:33.181494: I /export/scratch2/home/savvina/new_environment/Elliot/elliot/data/ml1m_fold_1_train.tsv - Loaded
2024-04-23 16:48:33.245994: I Test Fold 0


TO do next:

Extract the best iteration from the resulting file so you read the appropriate file.