In [1]:
import pandas as pd
import numpy as np
import lightgbm as lgb
from sklearn.model_selection import train_test_split


In [2]:
K = 10  
RATING_FILE = 'train_data_movie_rate.csv'
TRUST_FILE  = 'train_data_movie_trust.csv'
TEST_FILE   = 'test_data.csv'  
OUT_FILE    = 'predictions.csv'


In [3]:
ratings = pd.read_csv(RATING_FILE)  
trust   = pd.read_csv(TRUST_FILE) 
test_df = pd.read_csv(TEST_FILE)   

In [4]:
trust_sorted = trust.sort_values(
    ['user_id_trustor', 'trust_value'],
    ascending=[True, False]
)
topk = trust_sorted.groupby('user_id_trustor')['user_id_trustee'] \
                   .apply(lambda lst: lst.tolist()[:K]) \
                   .to_dict()


In [5]:
rating_map = {
    (u, i): r
    for u, i, r in zip(ratings.user_id, ratings.item_id, ratings.label)
}
trust_map = {
    (u, v): tv
    for u, v, tv in zip(
        trust.user_id_trustor,
        trust.user_id_trustee,
        trust.trust_value
    )
}

In [6]:
def make_features(user_id, item_id):
    """
    For (user_id, item_id), returns a list of length 2*K:
      [r(v1,i), t(u->v1), r(v2,i), t(u->v2), …, r(vK,i), t(u->vK)]
    Missing ratings or trust edges → -1
    """
    neigh = topk.get(user_id, [])
    feats = []
    for v in neigh:
        feats.append(rating_map.get((v, item_id), -1))   
        feats.append(trust_map.get((user_id, v), -1))    
    
    if len(neigh) < K:
        pads = K - len(neigh)
        feats.extend([-1, -1] * pads)
    return feats

In [7]:
X = np.array(
    [make_features(u, i) for u, i in zip(ratings.user_id, ratings.item_id)],
    dtype=np.float32
)
y = ratings.label.values

In [8]:
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [9]:
dtrain = lgb.Dataset(X_train, label=y_train)
dvalid = lgb.Dataset(X_val,   label=y_val, reference=dtrain)

In [10]:
params = {
    'objective': 'regression',
    'metric':    'rmse',
    'learning_rate': 0.1,
    'num_leaves':    31,
    'verbose':       -1
}
model = lgb.train(
    params,
    train_set=dtrain,
    num_boost_round=1000,
    valid_sets=[dvalid]
)

In [11]:
test_feats = np.vstack([
    make_features(u, i)
    for u, i in zip(test_df.user_id, test_df.item_id)
])
test_df['label'] = model.predict(test_feats)

test_df.reset_index(drop=True, inplace=True)
test_df['id'] = test_df.index + 1
out = test_df[['id','label']]
out.to_csv(OUT_FILE, index=False)