In [5]:
import os

os.chdir("/content/drive/MyDrive/Notebooks/PAPER_EXPERIMENT")

In [6]:
import pandas as pd
import numpy as np
from keras.utils.np_utils import to_categorical
from tqdm import tqdm

# Construct Word Index for SA

In [None]:
X_train = np.load("MultiLabel_Classification/data/X_train.npy")
X_dev = np.load("MultiLabel_Classification/data/X_dev.npy")
X_test = np.load("MultiLabel_Classification/data/X_test.npy")

In [None]:
with open("Word2Vec/vocab.txt", "r") as fp:
    vocab = fp.read().split()

In [None]:
def construct_dataset(dataset):
    rtn = []
    for review in tqdm(dataset):
        tmp = []
        pointer = 1
        for idx, w in enumerate(review):
            word = vocab[w]
            if (word != "[PAD]") and (word != "[UNK]"):
                tmp.append([w, pointer, pointer+len(word)])
                pointer += len(word)
            elif word == "[UNK]":
                tmp.append([w, pointer, pointer+1])
                pointer += 1
            else:
                tmp.append([w, pointer, pointer])
        rtn.append(tmp)
    return np.array(rtn)

In [None]:
X_train_SA = construct_dataset(X_train)
X_dev_SA = construct_dataset(X_dev)
X_test_SA = construct_dataset(X_test)

100%|██████████| 36850/36850 [00:13<00:00, 2664.12it/s]
100%|██████████| 4940/4940 [00:00<00:00, 5513.85it/s]
100%|██████████| 4940/4940 [00:01<00:00, 4309.36it/s]


In [None]:
if not os.path.exists("Aspect_Sentiment_Analysis/data"):
    os.mkdir("Aspect_Sentiment_Analysis/data")


np.save("Aspect_Sentiment_Analysis/data/X_train_SA", X_train_SA)
np.save("Aspect_Sentiment_Analysis/data/X_dev_SA", X_dev_SA)
np.save("Aspect_Sentiment_Analysis/data/X_test_SA", X_test_SA)

# Construct Sentiment Label Vector for SA

In [None]:
train_reviews = pd.read_csv("ASAP_clean/train.csv")
dev_reviews = pd.read_csv("ASAP_clean/dev.csv")
test_reviews = pd.read_csv("ASAP_clean/test.csv")

In [None]:
y_train = to_categorical(train_reviews.iloc[:, 1:-1])
y_dev = to_categorical(dev_reviews.iloc[:, 1:-1])
y_test = to_categorical(test_reviews.iloc[:, 1:-1])

In [None]:
if not os.path.exists("Aspect_Sentiment_Analysis/data"):
    os.mkdir("Aspect_Sentiment_Analysis/data")


np.save("Aspect_Sentiment_Analysis/data/y_train", y_train)
np.save("Aspect_Sentiment_Analysis/data/y_dev", y_dev)
np.save("Aspect_Sentiment_Analysis/data/y_test", y_test)

# Contruct Predicted Dimension Label Set

In [None]:
y_train_pred = np.load("MultiLabel_Classification/data/y_train_pred.npy")
y_dev_pred = np.load("MultiLabel_Classification/data/y_dev_pred.npy")
y_test_pred = np.load("MultiLabel_Classification/data/y_test_pred.npy")

In [None]:
y_train_pred = pd.DataFrame(y_train_pred)
y_dev_pred = pd.DataFrame(y_dev_pred)
y_test_pred = pd.DataFrame(y_test_pred)

In [None]:
# label order is same as CNN-LSAN.ipynb

y_train_pred.columns = ["价格", "菜品", "位置", "环境", "服务"]
y_dev_pred.columns = ["价格", "菜品", "位置", "环境", "服务"]
y_test_pred.columns = ["价格", "菜品", "位置", "环境", "服务"]

In [None]:
with open("Word2Vec/vocab.txt", "r") as fp:
    vocab = fp.read().split()

In [None]:
def label2index(df):
    data = df.copy()
    for label in data.columns:
        data[label] = data[label].apply(
            lambda x: vocab.index(label) if x == 1 else vocab.index("[PAD]")
        )
    data = data.values
    data.sort(axis=1)
    return data[:, ::-1]

In [None]:
y_train_pred_index = label2index(y_train_pred)
y_dev_pred_index = label2index(y_dev_pred)
y_test_pred_index = label2index(y_test_pred)

In [None]:
np.save("Aspect_Sentiment_Analysis/data/pred_dim_train", y_train_pred_index)
np.save("Aspect_Sentiment_Analysis/data/pred_dim_dev", y_dev_pred_index)
np.save("Aspect_Sentiment_Analysis/data/pred_dim_test", y_test_pred_index)

# Construct Attention Mask for Predicted Dimension Label Set

In [None]:
y_train_pred_index = np.load("Aspect_Sentiment_Analysis/data/pred_dim_train.npy")
y_dev_pred_index = np.load("Aspect_Sentiment_Analysis/data/pred_dim_dev.npy")
y_test_pred_index = np.load("Aspect_Sentiment_Analysis/data/pred_dim_test.npy")

In [None]:
dim_attention_mask_train = (y_train_pred_index == 0) * 1
dim_attention_mask_dev = (y_dev_pred_index == 0) * 1
dim_attention_mask_test = (y_test_pred_index == 0) * 1

In [None]:
np.save("Aspect_Sentiment_Analysis/data/dim_attention_mask_train", dim_attention_mask_train)
np.save("Aspect_Sentiment_Analysis/data/dim_attention_mask_dev", dim_attention_mask_dev)
np.save("Aspect_Sentiment_Analysis/data/dim_attention_mask_test", dim_attention_mask_test)