In [5]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Bidirectional, LSTM, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split

# 准备数据
texts = ["I love this movie", "This movie is great", "I dislike this movie", "This movie is terrible"]
labels = [1, 1, 0, 0]

# 文本预处理
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
vocab_size = len(tokenizer.word_index) + 1
max_seq_length = max([len(seq) for seq in sequences])

# 序列填充
padded_sequences = pad_sequences(sequences, maxlen=max_seq_length)

# 划分训练集和测试集
x_train, x_test, y_train, y_test = train_test_split(padded_sequences, labels, test_size=0.2)

# 构建Bi-LSTM模型
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=100, input_length=max_seq_length))
model.add(Bidirectional(LSTM(units=64, return_sequences=True)))
model.add(Bidirectional(LSTM(units=32)))
model.add(Dense(1, activation='sigmoid'))

# 编译模型
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# 训练模型
model.fit(np.array(x_train), np.array(y_train), batch_size=16, epochs=10, validation_data=(np.array(x_test), np.array(y_test)))

# 使用模型进行预测
test_sequences = tokenizer.texts_to_sequences(["This movie is amazing"])
test_data = pad_sequences(test_sequences, maxlen=max_seq_length)
prediction = model.predict(np.array(test_data))
print(prediction)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[[0.55445606]]


In [10]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

# 读取train_90.csv文件
df = pd.read_csv('train_90.csv')

# 读取边文件
edges = pd.read_csv('edge_90.csv')

# 创建一个空的有向图
G = nx.DiGraph()

# 遍历每一行数据
for i in range(len(df)):
    geohash_id = df.loc[i, 'geohash_id']
    date_id = df.loc[i, 'date_id']

    # 添加节点
    G.add_node(geohash_id)

    # 添加节点属性
    node_attrs = {f'F_{j+1}': df.loc[i, f'F_{j+1}'] for j in range(35)}
    G.nodes[geohash_id].update(node_attrs)

# 添加边到图中
for index, row in edges.iterrows():
    G.add_edge(row['geohash6_point1'], row['geohash6_point2'], weight_f1=row['F_1'], weight_f2=row['F_2'])

# 添加节点属性
active_index = {row.geohash_id: row.active_index for row in df.itertuples(index=False)}
consume_index = {row.geohash_id: row.consume_index for row in df.itertuples(index=False)}
nx.set_node_attributes(G, active_index, 'active_index')
nx.set_node_attributes(G, consume_index, 'consume_index')

# 打印图的节点和边数量
print("图中的节点数量：", G.number_of_nodes())
print("图中的边数量：", G.number_of_edges())

# 创建词嵌入模型并生成栅格特征的向量表示
# 获取所有栅格特征
grid_features = [dict(G.nodes[geohash_id]) for geohash_id in G.nodes()]

# 训练词嵌入模型
model = Word2Vec(grid_features, size=100, window=5, min_count=1, workers=4)

# 将栅格特征转换为向量表示
for geohash_id in G.nodes():
    G.nodes[geohash_id]['embedding'] = model.wv[geohash_id]
# 绘制有向图
nx.draw(G, with_labels=True)

# 显示图形
plt.show()

图中的节点数量： 1155
图中的边数量： 458013


KeyboardInterrupt: 