In [1]:
import torch
from requests import request
from requests.compat import *
from bs4 import BeautifulSoup
import pandas as pd
import math
import dgl
import pickle

In [2]:
from multisage.builder import PandasGraphBuilder

def graph_maker(df):

    df['성분'] = df['성분'].fillna('[]')
    df['평점'] = df['평점'].fillna(0.0)

    attributes_df, attributes, attributes_merged_df = seperateDF(df, '특징')
    ingredient_df, ingredients, ingredient_merged_df = seperateDF(df,'성분')
    feel_df, feels, feel_merged_df = seperateDF(df,'느낌')

    graph_builder = PandasGraphBuilder()
    graph_builder.add_entities(attributes, '특징_id', '특징')
    graph_builder.add_entities(df, 'ID', '상품')
    graph_builder.add_binary_relations(attributes_merged_df, '특징_id', 'ID', 'define')
    graph_builder.add_binary_relations(attributes_merged_df, 'ID', '특징_id', 'define-by')
    g = graph_builder.build()

    g.nodes['특징'].data['id'] = torch.LongTensor(attributes['특징_id'].cat.codes.values)

    attribute_columns = attributes_df.columns.drop(['ID', '상품명', '상품 카테고리 대분류', '상품 카테고리 중분류','상품 카테고리 소분류','제조사','브랜드','특징','느낌','평점','성분'])
    attributes_df[attribute_columns] = attributes_df[attribute_columns].fillna(False).astype('bool')
    g.nodes['상품'].data['특징'] = torch.FloatTensor(attributes_df[attribute_columns].values)

    attribute_columns = ingredient_df.columns.drop(['ID', '상품명', '상품 카테고리 대분류', '상품 카테고리 중분류','상품 카테고리 소분류','제조사','브랜드','특징','느낌','평점','성분'])
    ingredient_df[attribute_columns] = ingredient_df[attribute_columns].fillna(False).astype('bool')
    g.nodes['상품'].data['성분'] = torch.FloatTensor(ingredient_df[attribute_columns].values)

    attribute_columns = feel_df.columns.drop(['ID', '상품명', '상품 카테고리 대분류', '상품 카테고리 중분류','상품 카테고리 소분류','제조사','브랜드','특징','느낌','평점','성분'])
    feel_df[attribute_columns] = feel_df[attribute_columns].fillna(False).astype('bool')
    g.nodes['상품'].data['느낌'] = torch.FloatTensor(feel_df[attribute_columns].values)

    g.edges['define'].data['rating'] = torch.LongTensor(df['평점'].values)
    g.edges['define-by'].data['rating'] = torch.LongTensor(df['평점'].values)

    # 그래프 구조 확인

    print("Number of nodes per type:")
    print(g.number_of_nodes())
    print("Number of edges per type:")
    print(g.number_of_edges())
    print("Node types:")
    print(g.ntypes)
    print("Edge types:")
    print(g.etypes)

    # 노드 데이터 확인
    for ntype in g.ntypes:
        print(f"Node type: {ntype}")
        print(g.nodes[ntype].data)

    # 엣지 데이터 확인
    for stype, etype, dtype in g.canonical_etypes:
        print(f"Edge type: {etype}")
        print(g.edges[etype].data)

    # 그래프 구조 확인
    print("Graph structure:")
    print(g)

    return g

In [3]:
def seperateDF(df, name):
    df_copy = df.copy()

    small_df = df_copy.explode(name)[['ID','상품명',name,'평점']]
    attributes = pd.DataFrame(small_df[name].unique()).reset_index()
    attributes.columns = [name + '_id', name]
    attributes = pd.DataFrame(attributes).astype({name + '_id': 'category'})
    attribute_list = list(attributes[name])
    merged_df = pd.merge(small_df, attributes, on=[name])

    for i in attribute_list:
        data_list = []
        for j in range(len(df)):
            if i in df[name][j]:
                data_list.append(True)
            else:
                data_list.append(False)
        df_copy = pd.concat([df_copy, pd.DataFrame({name: data_list})], axis=1)

    return df_copy, attributes, merged_df

In [4]:
df = pd.read_excel('data.xlsx')

g = graph_maker(df)

output_path = 'beauty_graph.dgl'
dgl.save_graphs(output_path, [g])

dataset = {
    'train-graph': g,
    'context-type': '특징',
    'item-type': '상품',
    'context-to-item-type': 'define',
    'item-to-context-type': 'define-by'}

with open('beauty_data.pickle', 'wb') as f:
    pickle.dump(dataset, f)

KeyboardInterrupt: 