In [8]:
import tensorflow as tf
import tensorflow_gnn as tfgnn
import pandas as pd
import numpy as np

def prepare_graph_tensor(user_features, dish_features, interaction_df):
    """
    将用户-菜品数据转换为 GraphTensor
    Args:
        user_features: [num_users, user_feature_dim]
        dish_features: [num_dishes, dish_feature_dim]
        interaction_df: DataFrame with user_id, dish_id, score
    Returns:
        GraphTensor: TF-GNN的图数据对象
    """
    num_users = user_features.shape[0]
    num_dishes = dish_features.shape[0]
    
    # 创建 GraphTensor
    graph = tfgnn.GraphTensor.from_pieces(
        node_sets={
            'user': tfgnn.NodeSet.from_fields(
                sizes=tf.constant([num_users]),
                features={'features': tf.convert_to_tensor(user_features, dtype=tf.float32)}
            ),
            'dish': tfgnn.NodeSet.from_fields(
                sizes=tf.constant([num_dishes]),
                features={'features': tf.convert_to_tensor(dish_features, dtype=tf.float32)}
            )
        },
        edge_sets={
            ('user', 'interacts', 'dish'): tfgnn.EdgeSet.from_fields(
                sizes=tf.constant([len(interaction_df)]),
                adjacency=tfgnn.Adjacency.from_indices(
                    source=('user', tf.convert_to_tensor(interaction_df['user_id'].values, dtype=tf.int32)),
                    target=('dish', tf.convert_to_tensor(interaction_df['dish_id'].values, dtype=tf.int32))
                ),
                features={'score': tf.convert_to_tensor(interaction_df['score'].values, dtype=tf.float32)}
            )
        }
    )
    
    return graph

# 示例数据
num_users, num_dishes = 100, 200
user_feature_dim, dish_feature_dim = 16, 12
user_features = np.random.rand(num_users, user_feature_dim)
dish_features = np.random.rand(num_dishes, dish_feature_dim)
interaction_data = {
    'user_id': np.random.randint(0, num_users, 1000),
    'dish_id': np.random.randint(0, num_dishes, 1000),
    'score': np.random.rand(1000) * 5
}
interaction_df = pd.DataFrame(interaction_data)

graph_tensor = prepare_graph_tensor(user_features, dish_features, interaction_df)

graph_tensor

GraphTensor(
  context=Context(features={}, sizes=[1], shape=(), indices_dtype=tf.int32),
  node_set_names=['user', 'dish'],
  edge_set_names=[('user', 'interacts', 'dish')])

In [9]:
class BipartiteGNN(tf.keras.Model):
    def __init__(self, user_dim, dish_dim, hidden_dim, output_dim):
        super(BipartiteGNN, self).__init__()
        
        # 用户节点更新
        self.user_conv1 = tfgnn.keras.layers.GraphUpdate(
            node_sets={'user': tfgnn.keras.layers.NodeSetUpdate(
                {'interacts': tfgnn.keras.layers.SimpleConv(
                    sender_node_feature='features',
                    message_fn=tf.keras.layers.Dense(hidden_dim, activation='relu')
                )},
                tfgnn.keras.layers.NextStateFromConcat(
                    tf.keras.layers.Dense(hidden_dim, activation='relu')
                )
            )}
        )
        self.user_conv2 = tf.keras.layers.Dense(output_dim)
        
        # 菜品节点更新
        self.dish_conv1 = tfgnn.keras.layers.GraphUpdate(
            node_sets={'dish': tfgnn.keras.layers.NodeSetUpdate(
                {('user', 'interacts', 'dish'): tfgnn.keras.layers.SimpleConv(
                    sender_node_feature='features',
                    message_fn=tf.keras.layers.Dense(hidden_dim, activation='relu')
                )},
                tfgnn.keras.layers.NextStateFromConcat(
                    tf.keras.layers.Dense(hidden_dim, activation='relu')
                )
            )}
        )
        self.dish_conv2 = tf.keras.layers.Dense(output_dim)
    
    def call(self, graph):
        # 用户更新
        graph = self.user_conv1(graph)
        user_emb = self.user_conv2(graph.node_sets['user']['features'])
        
        # 菜品更新
        graph = self.dish_conv1(graph)
        dish_emb = self.dish_conv2(graph.node_sets['dish']['features'])
        
        return user_emb, dish_emb

# 初始化模型
model = BipartiteGNN(user_feature_dim, dish_feature_dim, hidden_dim=32, output_dim=16)

In [11]:
def compute_loss(model, graph):
    user_emb, dish_emb = model(graph)
    edge_indices = graph.edge_sets[('user', 'interacts', 'dish')].adjacency
    user_idx = edge_indices.source
    dish_idx = edge_indices.target
    pred_scores = tf.reduce_sum(
        tf.gather(user_emb, user_idx) * tf.gather(dish_emb, dish_idx),
        axis=1
    )
    true_scores = graph.edge_sets[('user', 'interacts', 'dish')]['score']
    return tf.keras.losses.mean_squared_error(true_scores, pred_scores)

def train_model(model, graph, epochs=100, lr=0.01):
    optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
    for epoch in range(epochs):
        with tf.GradientTape() as tape:
            loss = compute_loss(model, graph)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        if epoch % 10 == 0:
            print(f'Epoch {epoch}, Loss: {loss.numpy():.4f}')
    return model

# 训练
trained_model = train_model(model, graph_tensor)

KeyError: "Exception encountered when calling layer 'node_set_update_2' (type NodeSetUpdate).\n\nhidden_state\n\nCall arguments received by layer 'node_set_update_2' (type NodeSetUpdate):\n  • graph=GraphTensor(\n  context=Context(features={}, sizes=[1], shape=(), indices_dtype=tf.int32),\n  node_set_names=['user', 'dish'],\n  edge_set_names=[('user', 'interacts', 'dish')])\n  • node_set_name='user'"

In [12]:
import tensorflow as tf
import tensorflow_gnn as tfgnn
import pandas as pd
import numpy as np

# 数据准备
def prepare_graph_tensor(user_features, dish_features, interaction_df):
    graph = tfgnn.GraphTensor.from_pieces(
        node_sets={
            'user': tfgnn.NodeSet.from_fields(
                sizes=tf.constant([user_features.shape[0]]),
                features={'features': tf.convert_to_tensor(user_features, dtype=tf.float32)}
            ),
            'dish': tfgnn.NodeSet.from_fields(
                sizes=tf.constant([dish_features.shape[0]]),
                features={'features': tf.convert_to_tensor(dish_features, dtype=tf.float32)}
            )
        },
        edge_sets={
            ('user', 'interacts', 'dish'): tfgnn.EdgeSet.from_fields(
                sizes=tf.constant([len(interaction_df)]),
                adjacency=tfgnn.Adjacency.from_indices(
                    source=('user', tf.convert_to_tensor(interaction_df['user_id'].values, dtype=tf.int32)),
                    target=('dish', tf.convert_to_tensor(interaction_df['dish_id'].values, dtype=tf.int32))
                ),
                features={'score': tf.convert_to_tensor(interaction_df['score'].values, dtype=tf.float32)}
            ),
            ('dish', 'interacts_reverse', 'user'): tfgnn.EdgeSet.from_fields(
                sizes=tf.constant([len(interaction_df)]),
                adjacency=tfgnn.Adjacency.from_indices(
                    source=('dish', tf.convert_to_tensor(interaction_df['dish_id'].values, dtype=tf.int32)),
                    target=('user', tf.convert_to_tensor(interaction_df['user_id'].values, dtype=tf.int32))
                ),
                features={'score': tf.convert_to_tensor(interaction_df['score'].values, dtype=tf.float32)}
            )
        }
    )
    return graph

# 模型定义
class BipartiteGNN(tf.keras.Model):
    def __init__(self, user_dim, dish_dim, hidden_dim, output_dim):
        super(BipartiteGNN, self).__init__()
        self.user_conv1 = tfgnn.keras.layers.GraphUpdate(
            node_sets={'user': tfgnn.keras.layers.NodeSetUpdate(
                {('dish', 'interacts_reverse', 'user'): tfgnn.keras.layers.SimpleConv(
                    sender_node_feature='features',
                    message_fn=tf.keras.layers.Dense(hidden_dim, activation='relu')
                )},
                tfgnn.keras.layers.NextStateFromConcat(
                    tf.keras.layers.Dense(hidden_dim, activation='relu')
                )
            )}
        )
        self.user_conv2 = tf.keras.layers.Dense(output_dim)
        self.dish_conv1 = tfgnn.keras.layers.GraphUpdate(
            node_sets={'dish': tfgnn.keras.layers.NodeSetUpdate(
                {('user', 'interacts', 'dish'): tfgnn.keras.layers.SimpleConv(
                    sender_node_feature='features',
                    message_fn=tf.keras.layers.Dense(hidden_dim, activation='relu')
                )},
                tfgnn.keras.layers.NextStateFromConcat(
                    tf.keras.layers.Dense(hidden_dim, activation='relu')
                )
            )}
        )
        self.dish_conv2 = tf.keras.layers.Dense(output_dim)
    
    def call(self, graph):
        graph = self.user_conv1(graph)
        user_emb = self.user_conv2(graph.node_sets['user']['features'])
        graph = self.dish_conv1(graph)
        dish_emb = self.dish_conv2(graph.node_sets['dish']['features'])
        return user_emb, dish_emb

# 训练和损失
def compute_loss(model, graph):
    user_emb, dish_emb = model(graph)
    edge_indices = graph.edge_sets[('user', 'interacts', 'dish')].adjacency
    user_idx = edge_indices.source
    dish_idx = edge_indices.target
    pred_scores = tf.reduce_sum(
        tf.gather(user_emb, user_idx) * tf.gather(dish_emb, dish_idx),
        axis=1
    )
    true_scores = graph.edge_sets[('user', 'interacts', 'dish')]['score']
    return tf.keras.losses.mean_squared_error(true_scores, pred_scores)

def train_model(model, graph, epochs=100, lr=0.01):
    optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
    for epoch in range(epochs):
        with tf.GradientTape() as tape:
            loss = compute_loss(model, graph)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        if epoch % 10 == 0:
            print(f'Epoch {epoch}, Loss: {loss.numpy():.4f}')
    return model

# 推荐
def get_recommendations(model, graph, user_id, top_k=5):
    user_emb, dish_emb = model(graph)
    user_vec = user_emb[user_id]
    scores = tf.matmul(dish_emb, tf.expand_dims(user_vec, 1))
    top_indices = tf.nn.top_k(scores[:, 0], k=top_k).indices
    return top_indices.numpy()

# 主函数
def main():
    num_users, num_dishes = 100, 200
    user_feature_dim, dish_feature_dim = 16, 12
    user_features = np.random.rand(num_users, user_feature_dim)
    dish_features = np.random.rand(num_dishes, dish_feature_dim)
    interaction_data = {
        'user_id': np.random.randint(0, num_users, 1000),
        'dish_id': np.random.randint(0, num_dishes, 1000),
        'score': np.random.rand(1000) * 5
    }
    interaction_df = pd.DataFrame(interaction_data)
    
    graph_tensor = prepare_graph_tensor(user_features, dish_features, interaction_df)
    model = BipartiteGNN(user_feature_dim, dish_feature_dim, 32, 16)
    trained_model = train_model(model, graph_tensor)
    
    user_id = 0
    recommendations = get_recommendations(trained_model, graph_tensor, user_id)
    print(f"Top 5 recommended dishes for user {user_id}: {recommendations}")

if __name__ == "__main__":
    main()

KeyError: "Exception encountered when calling layer 'node_set_update_4' (type NodeSetUpdate).\n\nhidden_state\n\nCall arguments received by layer 'node_set_update_4' (type NodeSetUpdate):\n  • graph=GraphTensor(\n  context=Context(features={}, sizes=[1], shape=(), indices_dtype=tf.int32),\n  node_set_names=['user', 'dish'],\n  edge_set_names=[('user', 'interacts', 'dish'), ('dish', 'interacts_reverse', 'user')])\n  • node_set_name='user'"

In [None]:
import tensorflow as tf
import tensorflow_gnn as tfgnn
import pandas as pd
import numpy as np

# 数据准备
def prepare_graph_tensor(user_features, dish_features, interaction_df):
    graph = tfgnn.GraphTensor.from_pieces(
        node_sets={
            'user': tfgnn.NodeSet.from_fields(
                sizes=tf.constant([user_features.shape[0]]),
                features={'hidden_state': tf.convert_to_tensor(user_features, dtype=tf.float32)}
            ),
            'dish': tfgnn.NodeSet.from_fields(
                sizes=tf.constant([dish_features.shape[0]]),
                features={'hidden_state': tf.convert_to_tensor(dish_features, dtype=tf.float32)}
            )
        },
        edge_sets={
            'interacts': tfgnn.EdgeSet.from_fields(
                sizes=tf.constant([len(interaction_df)]),
                adjacency=tfgnn.Adjacency.from_indices(
                    source=('user', tf.convert_to_tensor(interaction_df['user_id'].values, dtype=tf.int32)),
                    target=('dish', tf.convert_to_tensor(interaction_df['dish_id'].values, dtype=tf.int32))
                ),
                features={'score': tf.convert_to_tensor(interaction_df['score'].values, dtype=tf.float32)}
            ),
            'interacts_reverse': tfgnn.EdgeSet.from_fields(
                sizes=tf.constant([len(interaction_df)]),
                adjacency=tfgnn.Adjacency.from_indices(
                    source=('dish', tf.convert_to_tensor(interaction_df['dish_id'].values, dtype=tf.int32)),
                    target=('user', tf.convert_to_tensor(interaction_df['user_id'].values, dtype=tf.int32))
                ),
                features={'score': tf.convert_to_tensor(interaction_df['score'].values, dtype=tf.float32)}
            )
        }
    )
    return graph

# 模型定义
class BipartiteGNN(tf.keras.Model):
    def __init__(self, hidden_dim, output_dim):
        super(BipartiteGNN, self).__init__()
        self.user_conv = tfgnn.keras.layers.GraphUpdate(
            node_sets={'user': tfgnn.keras.layers.NodeSetUpdate(
                edge_set_inputs={'interacts_reverse': tfgnn.keras.layers.SimpleConv(
                    sender_node_feature='hidden_state',
                    message_fn=tf.keras.layers.Dense(hidden_dim, activation='relu')
                )},
                next_state=tfgnn.keras.layers.NextStateFromConcat(
                    tf.keras.layers.Dense(hidden_dim, activation='relu')
                )
            )}
        )
        self.dish_conv = tfgnn.keras.layers.GraphUpdate(
            node_sets={'dish': tfgnn.keras.layers.NodeSetUpdate(
                edge_set_inputs={'interacts': tfgnn.keras.layers.SimpleConv(
                    sender_node_feature='hidden_state',
                    message_fn=tf.keras.layers.Dense(hidden_dim, activation='relu')
                )},
                next_state=tfgnn.keras.layers.NextStateFromConcat(
                    tf.keras.layers.Dense(hidden_dim, activation='relu')
                )
            )}
        )
        self.user_proj = tf.keras.layers.Dense(output_dim)
        self.dish_proj = tf.keras.layers.Dense(output_dim)
    
    def call(self, graph):
        graph = self.user_conv(graph)
        user_emb = self.user_proj(graph.node_sets['user']['hidden_state'])
        graph = self.dish_conv(graph)
        dish_emb = self.dish_proj(graph.node_sets['dish']['hidden_state'])
        return user_emb, dish_emb

# 计算损失
def compute_loss(model, graph):
    user_emb, dish_emb = model(graph)
    edge_indices = graph.edge_sets['interacts'].adjacency
    user_idx = edge_indices.source
    dish_idx = edge_indices.target
    pred_scores = tf.reduce_sum(
        tf.gather(user_emb, user_idx) * tf.gather(dish_emb, dish_idx),
        axis=1
    )
    true_scores = graph.edge_sets['interacts']['score']
    return tf.reduce_mean(tf.keras.losses.mean_squared_error(true_scores, pred_scores))

# 训练模型
def train_model(model, graph, epochs=100, lr=0.01):
    optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
    for epoch in range(epochs):
        with tf.GradientTape() as tape:
            loss = compute_loss(model, graph)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        if epoch % 10 == 0:
            print(f'Epoch {epoch}, Loss: {loss.numpy():.4f}')
    return model

# 推荐系统
def get_recommendations(model, graph, user_id, top_k=5):
    user_emb, dish_emb = model(graph)
    user_vec = tf.nn.l2_normalize(user_emb[user_id], axis=0)
    dish_emb = tf.nn.l2_normalize(dish_emb, axis=1)
    scores = tf.matmul(dish_emb, tf.expand_dims(user_vec, 1))[:, 0]
    top_indices = tf.argsort(scores, direction='DESCENDING')[:top_k]
    return top_indices.numpy()

# 主函数
def main():
    num_users, num_dishes = 100, 200
    user_feature_dim, dish_feature_dim = 16, 12
    user_features = np.random.rand(num_users, user_feature_dim)
    dish_features = np.random.rand(num_dishes, dish_feature_dim)
    interaction_data = {
        'user_id': np.random.randint(0, num_users, 1000),
        'dish_id': np.random.randint(0, num_dishes, 1000),
        'score': np.random.rand(1000) * 5
    }
    interaction_df = pd.DataFrame(interaction_data)
    
    graph_tensor = prepare_graph_tensor(user_features, dish_features, interaction_df)
    model = BipartiteGNN(32, 16)
    trained_model = train_model(model, graph_tensor)
    
    user_id = 0
    recommendations = get_recommendations(trained_model, graph_tensor, user_id)
    print(f"Top 5 recommended dishes for user {user_id}: {recommendations}")

if __name__ == "__main__":
    main()


Epoch 0, Loss: 1682.5598
Epoch 10, Loss: 36.0119
Epoch 20, Loss: 12.9151
Epoch 30, Loss: 5.9745
Epoch 40, Loss: 4.2275
Epoch 50, Loss: 3.7774
Epoch 60, Loss: 3.5373
Epoch 70, Loss: 3.3906
Epoch 80, Loss: 3.2759
Epoch 90, Loss: 3.1816
Top 5 recommended dishes for user 0: [139 153 116 145  47]


In [41]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MultiLabelBinarizer
from langchain_community.embeddings import OllamaEmbeddings
# 输入数据
user_data = [
    {'user_id': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]},
    {'user_feature': ['爱吃辣，爱吃咸', '辣，甜', '咸', '辣', '甜', '不辣', '苦', '甜', '咸', '辣']}
]

dish_data = [
    {'dish_id': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]},
    {'dish_feature': ['辣', '酸,辣', '麻辣', '甜', '咸', '苦', '辣', '甜', '咸', '辣']}
]

interaction_data = [
    {'user_id': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]},
    {'dish_id': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]},
    {'score': [5, 4, 3, 2, 1, 5, 4, 3, 2, 1]}
]

def get_data(user_data, dish_data, interaction_data):
    """
    处理用户、菜品和交互数据，生成适合 GNN 的输入格式
    Args:
        user_data: 用户数据列表
        dish_data: 菜品数据列表
        interaction_data: 交互数据列表
    Returns:
        user_features: 用户特征矩阵 [num_users, feature_dim]
        dish_features: 菜品特征矩阵 [num_dishes, feature_dim]
        interaction_df: 交互 DataFrame
    """
    # 提取用户特征
    user_features = user_data[1]['user_feature']
    for j in user_features:
        print(j)

    
    # 提取菜品特征
    dish_features = dish_data[1]['dish_feature']
    print(dish_features)
    embeddings = OllamaEmbeddings(model="smartcreation/bge-large-zh-v1.5:latest")

    user_feature = [].append(embeddings.embed_query(j) for j in user_features)
    dish_feature = [].append(embeddings.embed_query(j) for j in dish_features)

    
    # 处理交互数据
    interaction_df = pd.DataFrame({
        'user_id': interaction_data[0]['user_id'],
        'dish_id': interaction_data[1]['dish_id'],
        'score': interaction_data[2]['score']
    })
    
    return user_feature, dish_feature, interaction_df

# 调用函数
user_feature, dish_feature, interaction_df = get_data(user_data, dish_data, interaction_data)


爱吃辣，爱吃咸
辣，甜
咸
辣
甜
不辣
苦
甜
咸
辣
['辣', '酸,辣', '麻辣', '甜', '咸', '苦', '辣', '甜', '咸', '辣']


In [42]:
user_feature

In [29]:
import tensorflow as tf
import tensorflow_gnn as tfgnn
import pandas as pd
import numpy as np

def prepare_graph_tensor(user_features, dish_features, interaction_df):
    """
    将用户-菜品数据转换为 GraphTensor
    Args:
        user_features: [num_users, user_feature_dim]
        dish_features: [num_dishes, dish_feature_dim]
        interaction_df: DataFrame with user_id, dish_id, score
    Returns:
        GraphTensor: TF-GNN的图数据对象
    """
    num_users = user_features.shape[0]
    num_dishes = dish_features.shape[0]
    
    # 创建 GraphTensor
    graph = tfgnn.GraphTensor.from_pieces(
        node_sets={
            'user': tfgnn.NodeSet.from_fields(
                sizes=tf.constant([num_users]),
                features={'features': tf.convert_to_tensor(user_features, dtype=tf.float32)}
            ),
            'dish': tfgnn.NodeSet.from_fields(
                sizes=tf.constant([num_dishes]),
                features={'features': tf.convert_to_tensor(dish_features, dtype=tf.float32)}
            )
        },
        edge_sets={
            ('user', 'interacts', 'dish'): tfgnn.EdgeSet.from_fields(
                sizes=tf.constant([len(interaction_df)]),
                adjacency=tfgnn.Adjacency.from_indices(
                    source=('user', tf.convert_to_tensor(interaction_df['user_id'].values, dtype=tf.int32)),
                    target=('dish', tf.convert_to_tensor(interaction_df['dish_id'].values, dtype=tf.int32))
                ),
                features={'score': tf.convert_to_tensor(interaction_df['score'].values, dtype=tf.float32)}
            )
        }
    )
    
    return graph

# 示例数据
num_users, num_dishes = 100, 200
user_feature_dim, dish_feature_dim = 16, 12
user_features = np.random.rand(num_users, user_feature_dim)
dish_features = np.random.rand(num_dishes, dish_feature_dim)
interaction_data = {
    'user_id': np.random.randint(0, num_users, 1000),
    'dish_id': np.random.randint(0, num_dishes, 1000),
    'score': np.random.rand(1000) * 5
}
interaction_df = pd.DataFrame(interaction_data)

graph_tensor = prepare_graph_tensor(user_features, dish_features, interaction_df)

graph_tensor

GraphTensor(
  context=Context(features={}, sizes=[1], shape=(), indices_dtype=tf.int32),
  node_set_names=['user', 'dish'],
  edge_set_names=[('user', 'interacts', 'dish')])

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MultiLabelBinarizer
from langchain_community.embeddings import OllamaEmbeddings
# 输入数据
user_data = [
    {'user_id': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]},
    {'user_feature': ['爱吃辣，爱吃咸', '辣，甜', '咸', '辣', '甜', '不辣', '苦', '甜', '咸', '辣']}
]

dish_data = [
    {'dish_id': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]},
    {'dish_feature': ['辣', '酸,辣', '麻辣', '甜', '咸', '苦', '辣', '甜', '咸', '辣']}
]

interaction_data = [
    {'user_id': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]},
    {'dish_id': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]},
    {'score': [5, 4, 3, 2, 1, 5, 4, 3, 2, 1]}
]

def get_data(user_data, dish_data, interaction_data):

    # 提取用户特征
    user_features = user_data[1]['user_feature']  
    # 提取菜品特征
    dish_features = dish_data[1]['dish_feature']
    embeddings = OllamaEmbeddings(model="smartcreation/bge-large-zh-v1.5:latest")
    user_feature=[], dish_feature=[]
    for j in user_features:
        user_feature = user_features.append(embeddings.embed_query(j))
    for j in dish_features:
        dish_feature = dish_feature.append(embeddings.embed_query(j))

    
    # 处理交互数据
    interaction_df = pd.DataFrame({
        'user_id': interaction_data[0]['user_id'],
        'dish_id': interaction_data[1]['dish_id'],
        'score': interaction_data[2]['score']
    })
    
    return user_feature, dish_feature, interaction_df

# 调用函数
user_feature, dish_feature, interaction_df = get_data(user_data, dish_data, interaction_data)


ValueError: not enough values to unpack (expected 2, got 0)

In [59]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MultiLabelBinarizer
from langchain_community.embeddings import OllamaEmbeddings
# 输入数据
user_features = ['爱吃辣，爱吃咸', '辣，甜', '咸', '辣', '甜', '不辣', '苦', '甜', '咸', '辣']


dish_features = ['辣', '酸,辣', '麻辣', '甜', '咸', '苦', '辣', '甜', '咸', '辣']



embeddings = OllamaEmbeddings(model="smartcreation/bge-large-zh-v1.5:latest")
user_feature, dish_feature = [],[]
for j in user_features:
        user_feature.append(embeddings.embed_query(j))
for j in dish_features:
        dish_feature.append(embeddings.embed_query(j))


interaction_data = [
    {'user_id': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]},
    {'dish_id': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]},
    {'score': [5, 4, 3, 2, 1, 5, 4, 3, 2, 1]}
]

# 处理交互数据
interaction_df = pd.DataFrame({
        'user_id': interaction_data[0]['user_id'],
        'dish_id': interaction_data[1]['dish_id'],
        'score': interaction_data[2]['score']
})


    

In [65]:
from langchain_community.embeddings import OllamaEmbeddings
embeddings = OllamaEmbeddings(model="smartcreation/bge-large-zh-v1.5:latest")

a=embeddings.embed_query('爱吃川菜')
b=embeddings.embed_query('喜欢吃川菜')

# 计算余弦相似度
def cosine_similarity(vec1, vec2):
    dot_product = np.dot(vec1, vec2)
    norm_vec1 = np.linalg.norm(vec1)
    norm_vec2 = np.linalg.norm(vec2)
    return dot_product / (norm_vec1 * norm_vec2)

similarity = cosine_similarity(a, b)
print(f"余弦相似度: {similarity}")



余弦相似度: 0.9678800388809308


In [None]:
df.shapexx

(7843, 6)

In [3]:
! pip install python-docx
from docx import Document

# 创建 Word 文档
doc = Document()

# 添加标题
doc.add_heading('JSON 数据转换结果', level=1)

# 逐行写入数据
for index, row in df.iterrows():
    doc.add_paragraph(f"记录 {index + 1}:")
    for col_name, value in row.items():
        doc.add_paragraph(f"{col_name}: {value}")
    doc.add_paragraph("---")  # 分隔线

# 保存文档
doc.save('output.docx')

Collecting python-docx
  Downloading python_docx-1.1.2-py3-none-any.whl.metadata (2.0 kB)
Downloading python_docx-1.1.2-py3-none-any.whl (244 kB)
Installing collected packages: python-docx
Successfully installed python-docx-1.1.2


In [1]:
import json
import numpy as np
from langchain_community.embeddings import OllamaEmbeddings
def extract_features(json_file_path):
    """
    从 JSON 文件中读取数据，提取 taste 和 categories 作为特征
    
    参数:
    json_file_path (str): JSON 文件路径
    
    返回:
    dict: 包含 taste 和 categories 的特征字典
    """
    try:
        # 打开并读取 JSON 文件
        with open(json_file_path, 'r', encoding='utf-8') as file:
            data = json.load(file)
        # 提取 taste 和 categories
        features=[]
        names=[]
        for i in data:
            features.append(i['taste'])
            features.append(i['categories'])
            names.append(i['title'])
        return features,names
    
    except FileNotFoundError:
        print(f"错误：文件 {json_file_path} 未找到")
        return None
    except json.JSONDecodeError:
        print(f"错误：文件 {json_file_path} 不是有效的 JSON 格式")
        return None
    except Exception as e:
        print(f"发生未知错误：{str(e)}")
        return None

# 示例用法
if __name__ == "__main__":
    # 假设 JSON 数据已保存为 'recipe.json'
    json_file = "all_recipe.json"
    # 调用函数
    dish_features,names= extract_features(json_file)
    embeddings = OllamaEmbeddings(model="smartcreation/bge-large-zh-v1.5:latest")
    dish_feature = []
    for j in dish_features:
        dish_feature.append(embeddings.embed_query(j))
    dish_feature = np.array(dish_feature)
    print(dish_feature.shape)





  embeddings = OllamaEmbeddings(model="smartcreation/bge-large-zh-v1.5:latest")


KeyboardInterrupt: 

In [7]:
import json
import numpy as np
from langchain_community.embeddings import OllamaEmbeddings
def extract_features(json_file_path):
    """
    从 JSON 文件中读取数据，提取 taste 和 categories 作为特征
    
    参数:
    json_file_path (str): JSON 文件路径
    
    返回:
    dict: 包含 taste 和 categories 的特征字典
    """
    try:
        # 打开并读取 JSON 文件
        with open(json_file_path, 'r', encoding='utf-8') as file:
            data = json.load(file)
        # 提取 taste 和 categories
        print(len(data))
        features=[]
        names=[]
        for i in data:
            features.append(i['taste'])
            features.append(i['categories'])
            names.append(i['title'])
        return features,names
    
    except FileNotFoundError:
        print(f"错误：文件 {json_file_path} 未找到")
        return None
    except json.JSONDecodeError:
        print(f"错误：文件 {json_file_path} 不是有效的 JSON 格式")
        return None
    except Exception as e:
        print(f"发生未知错误：{str(e)}")
        return None

a,b=extract_features('all_recipe.json')
    






7843


In [11]:
len(dish_features)

15686

In [12]:
import random

def generate_user_features(num_users=1000):
    taste_options = ['甜', '咸', '酸', '辣', '麻辣', '苦', '清淡']
    cuisine_options = ['川菜', '湘菜', '粤菜', '东北菜', '沪菜', '鲁菜']
    features = cuisine_options + taste_options  # 可选：仅用于调试输出
    print(f"All available features: {features}")
    
    user_features = []
    for _ in range(num_users):
        # 随机选择一个口味和一个菜系组合
        taste = random.choice(taste_options)
        cuisine = random.choice(cuisine_options)
        user_feature = f"{taste}+{cuisine}"  # 例如 "甜+川菜"
        user_features.append(user_feature)
    return user_features


user_features=generate_user_features()
user_features


All available features: ['川菜', '湘菜', '粤菜', '东北菜', '沪菜', '鲁菜', '甜', '咸', '酸', '辣', '麻辣', '苦', '清淡']


['辣+东北菜',
 '清淡+湘菜',
 '麻辣+川菜',
 '酸+湘菜',
 '酸+东北菜',
 '辣+粤菜',
 '清淡+鲁菜',
 '苦+鲁菜',
 '苦+粤菜',
 '咸+鲁菜',
 '咸+东北菜',
 '咸+东北菜',
 '咸+川菜',
 '甜+鲁菜',
 '清淡+东北菜',
 '甜+鲁菜',
 '咸+东北菜',
 '麻辣+川菜',
 '清淡+湘菜',
 '苦+粤菜',
 '清淡+鲁菜',
 '清淡+沪菜',
 '咸+湘菜',
 '咸+湘菜',
 '酸+川菜',
 '清淡+川菜',
 '清淡+湘菜',
 '咸+东北菜',
 '酸+湘菜',
 '咸+湘菜',
 '清淡+东北菜',
 '酸+粤菜',
 '甜+东北菜',
 '麻辣+鲁菜',
 '咸+川菜',
 '苦+湘菜',
 '辣+湘菜',
 '酸+东北菜',
 '辣+川菜',
 '清淡+川菜',
 '麻辣+沪菜',
 '辣+鲁菜',
 '辣+沪菜',
 '甜+沪菜',
 '苦+粤菜',
 '麻辣+东北菜',
 '辣+鲁菜',
 '酸+粤菜',
 '酸+鲁菜',
 '清淡+粤菜',
 '辣+川菜',
 '苦+粤菜',
 '清淡+东北菜',
 '辣+川菜',
 '酸+川菜',
 '咸+沪菜',
 '咸+鲁菜',
 '清淡+东北菜',
 '苦+湘菜',
 '甜+川菜',
 '辣+沪菜',
 '麻辣+粤菜',
 '酸+湘菜',
 '咸+东北菜',
 '苦+鲁菜',
 '苦+湘菜',
 '甜+粤菜',
 '辣+沪菜',
 '酸+沪菜',
 '咸+沪菜',
 '咸+湘菜',
 '甜+鲁菜',
 '咸+粤菜',
 '麻辣+湘菜',
 '麻辣+川菜',
 '清淡+川菜',
 '苦+东北菜',
 '麻辣+沪菜',
 '苦+湘菜',
 '苦+东北菜',
 '清淡+川菜',
 '清淡+粤菜',
 '酸+东北菜',
 '甜+东北菜',
 '麻辣+东北菜',
 '辣+湘菜',
 '辣+沪菜',
 '清淡+川菜',
 '咸+东北菜',
 '辣+沪菜',
 '清淡+东北菜',
 '清淡+粤菜',
 '麻辣+鲁菜',
 '麻辣+沪菜',
 '甜+东北菜',
 '咸+鲁菜',
 '甜+东北菜',
 '酸+粤菜',
 '苦+鲁菜',
 '苦+东北菜',
 '甜+川菜',
 '苦+粤菜',
 '清淡+川菜',
 '苦+川菜',
 '咸+东北菜'

In [19]:
user_feature=[]
for i in user_features:
    user_feature.append(embeddings.embed_query(i))

user_features=np.array(user_feature)
user_features.shape

(1000, 1024)

In [18]:
import pandas as pd 

# 交互数据生成
def generate_interaction_data(num_interactions=10000, num_users=1000, num_dishes=15000):
    interaction_data = {
        'user_id': np.random.randint(0, num_users, size=num_interactions),  # 随机用户ID (0-999)
        'dish_id': np.random.randint(0, num_dishes, size=num_interactions),  # 随机菜品ID (0-9)
        'score': np.random.uniform(0, 5, size=num_interactions)  # 随机评分 (0-5)
    }
    return pd.DataFrame(interaction_data)

a=generate_interaction_data()
a

Unnamed: 0,user_id,dish_id,score
0,36,9007,3.739346
1,963,13817,4.820340
2,849,8314,2.068983
3,373,10844,4.756350
4,545,2244,0.049566
...,...,...,...
9995,249,11777,4.320426
9996,322,10030,3.845429
9997,793,6630,4.469949
9998,810,10087,4.694995


In [None]:
import tensorflow as tf
import tensorflow_gnn as tfgnn
import pandas as pd
import numpy as np
from langchain_community.embeddings import OllamaEmbeddings
# 数据准备
def prepare_graph_tensor(user_features, dish_features, interaction_df):
    graph = tfgnn.GraphTensor.from_pieces(
        node_sets={
            'user': tfgnn.NodeSet.from_fields(
                sizes=tf.constant([user_features.shape[0]]),
                features={'hidden_state': tf.convert_to_tensor(user_features, dtype=tf.float32)}
            ),
            'dish': tfgnn.NodeSet.from_fields(
                sizes=tf.constant([dish_features.shape[0]]),
                features={'hidden_state': tf.convert_to_tensor(dish_features, dtype=tf.float32)}
            )
        },
        edge_sets={
            'interacts': tfgnn.EdgeSet.from_fields(
                sizes=tf.constant([len(interaction_df)]),
                adjacency=tfgnn.Adjacency.from_indices(
                    source=('user', tf.convert_to_tensor(interaction_df['user_id'].values, dtype=tf.int32)),
                    target=('dish', tf.convert_to_tensor(interaction_df['dish_id'].values, dtype=tf.int32))
                ),
                features={'score': tf.convert_to_tensor(interaction_df['score'].values, dtype=tf.float32)}
            ),
            'interacts_reverse': tfgnn.EdgeSet.from_fields(
                sizes=tf.constant([len(interaction_df)]),
                adjacency=tfgnn.Adjacency.from_indices(
                    source=('dish', tf.convert_to_tensor(interaction_df['dish_id'].values, dtype=tf.int32)),
                    target=('user', tf.convert_to_tensor(interaction_df['user_id'].values, dtype=tf.int32))
                ),
                features={'score': tf.convert_to_tensor(interaction_df['score'].values, dtype=tf.float32)}
            )
        }
    )
    return graph

# 模型定义
class BipartiteGNN(tf.keras.Model):
    def __init__(self, hidden_dim, output_dim):
        super(BipartiteGNN, self).__init__()
        self.user_conv = tfgnn.keras.layers.GraphUpdate(
            node_sets={'user': tfgnn.keras.layers.NodeSetUpdate(
                edge_set_inputs={'interacts_reverse': tfgnn.keras.layers.SimpleConv(
                    sender_node_feature='hidden_state',
                    message_fn=tf.keras.layers.Dense(hidden_dim, activation='relu')
                )},
                next_state=tfgnn.keras.layers.NextStateFromConcat(
                    tf.keras.layers.Dense(hidden_dim, activation='relu')
                )
            )}
        )
        self.dish_conv = tfgnn.keras.layers.GraphUpdate(
            node_sets={'dish': tfgnn.keras.layers.NodeSetUpdate(
                edge_set_inputs={'interacts': tfgnn.keras.layers.SimpleConv(
                    sender_node_feature='hidden_state',
                    message_fn=tf.keras.layers.Dense(hidden_dim, activation='relu')
                )},
                next_state=tfgnn.keras.layers.NextStateFromConcat(
                    tf.keras.layers.Dense(hidden_dim, activation='relu')
                )
            )}
        )
        self.user_proj = tf.keras.layers.Dense(output_dim)
        self.dish_proj = tf.keras.layers.Dense(output_dim)
    
    def call(self, graph):
        graph = self.user_conv(graph)
        user_emb = self.user_proj(graph.node_sets['user']['hidden_state'])
        graph = self.dish_conv(graph)
        dish_emb = self.dish_proj(graph.node_sets['dish']['hidden_state'])
        return user_emb, dish_emb

# 计算损失
def compute_loss(model, graph):
    user_emb, dish_emb = model(graph)
    edge_indices = graph.edge_sets['interacts'].adjacency
    user_idx = edge_indices.source
    dish_idx = edge_indices.target
    pred_scores = tf.reduce_sum(
        tf.gather(user_emb, user_idx) * tf.gather(dish_emb, dish_idx),
        axis=1
    )
    true_scores = graph.edge_sets['interacts']['score']
    return tf.reduce_mean(tf.keras.losses.mean_squared_error(true_scores, pred_scores))

# 训练模型
def train_model(model, graph, epochs=10000, lr=0.01):
    optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
    for epoch in range(epochs):
        with tf.GradientTape() as tape:
            loss = compute_loss(model, graph)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        if epoch % 10 == 0:
            print(f'Epoch {epoch}, Loss: {loss.numpy():.4f}')
    return model

# 推荐系统
def get_recommendations(model, graph, user_id, top_k=5):
    user_emb, dish_emb = model(graph)
    user_vec = tf.nn.l2_normalize(user_emb[user_id], axis=0)
    dish_emb = tf.nn.l2_normalize(dish_emb, axis=1)
    scores = tf.matmul(dish_emb, tf.expand_dims(user_vec, 1))[:, 0]
    top_indices = tf.argsort(scores, direction='DESCENDING')[:top_k]
    return top_indices.numpy()




# 主函数
def main():

    
    graph_tensor = prepare_graph_tensor(user_features, dish_feature, a)
    print(graph_tensor)
    model = BipartiteGNN(512, 128)
    trained_model = train_model(model, graph_tensor)
    
    user_id = 1
    recommendations = get_recommendations(trained_model, graph_tensor, user_id)
    dish=[]
    for i in recommendations:
         dish.append(dish_features[i])

    print(f"Top 5 recommended dishes for user {user_id}: {dish}")

if __name__ == "__main__":
    main()


GraphTensor(
  context=Context(features={}, sizes=[1], shape=(), indices_dtype=tf.int32),
  node_set_names=['user', 'dish'],
  edge_set_names=['interacts', 'interacts_reverse'])

Epoch 0, Loss: 13757.9893
Epoch 10, Loss: 129580318720.0000
Epoch 20, Loss: 3897216512.0000
Epoch 30, Loss: 401284896.0000
Epoch 40, Loss: 100179952.0000
Epoch 50, Loss: 90656368.0000
Epoch 60, Loss: 41658720.0000
Epoch 70, Loss: 5790555.0000
Epoch 80, Loss: 3146791.7500
Epoch 90, Loss: 1802506.6250
Top 5 recommended dishes for user 1: ['甜味', '甜味', '甜味', '甜味', '甜味']
