In [6]:
import pandas as pd

df_arguments = pd.read_csv("./data/df_arguments.csv", sep=";")
df_chunks = pd.read_csv("./data/df_chunks.csv", sep=";")

In [7]:
df_arguments.head()

Unnamed: 0,argument,score
0,I always get a half size up in my tennis shoes...,3
1,Put them on and walked 3 hours with no problem...,5
2,excelente,5
3,The shoes fit well in the arch area. They are ...,4
4,Tried them on in a store before buying online ...,5


In [8]:
df_chunks.head()
df_chunks.drop(columns=["topic_prob", "embedding"]).to_csv("./data/df_chunks_no_ranks.csv", sep=";", index=False)

In [9]:
from orangecontrib.argument.miner.processor import ArgumentProcessor

processor = ArgumentProcessor(df_arguments)

In [10]:
processor.compute_readability()

In [11]:
processor.df.drop(columns=["readable"]).to_csv("./data/df_arguments_readability.csv", sep=";", index=False)

In [36]:
# merge topics of chunks for each argument
df_chunks["topic"] = df_chunks["topic"].apply(lambda x: [x])
df_chunks_merged_topic = df_chunks.groupby(by="argument_id", as_index=False)["topic"].agg("sum")
df_chunks_merged_topic.head()

Unnamed: 0,argument_id,topic
0,0,"[3, 10]"
1,1,"[24, 24, 2, 8]"
2,2,[-1]
3,3,"[21, 10, 10, 23]"
4,4,"[12, 0, 25, 0, 5]"


In [37]:
# remove -1
df_chunks_merged_topic["topic"] = df_chunks_merged_topic["topic"].apply(lambda x: set([t for t in x if t != -1]))
df_chunks_merged_topic.head()

Unnamed: 0,argument_id,topic
0,0,"{10, 3}"
1,1,"{24, 8, 2}"
2,2,{}
3,3,"{10, 21, 23}"
4,4,"{0, 25, 12, 5}"


# Start from here!

In [1]:
import pandas as pd

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

fpath = '../example/data/data_processed_1prod_full.json'
df_arguments = pd.read_json(fpath, lines=True)
df_arguments = df_arguments.rename(columns={
    "reviewText": "argument", 
    "overall": "score"
})
df_arguments.head()

Unnamed: 0,argument,score
0,I always get a half size up in my tennis shoes...,3
1,Put them on and walked 3 hours with no problem...,5
2,excelente,5
3,The shoes fit well in the arch area. They are ...,4
4,Tried them on in a store before buying online ...,5


In [2]:
from orangecontrib.argument.miner.topic import ArgumentChunker

chunker = ArgumentChunker(df_arguments["argument"].astype(str))
df_chunks = chunker.get_chunk_table()
df_chunks.head()

  from .autonotebook import tqdm as notebook_tqdm
  @numba.jit()
  @numba.jit()
  @numba.jit()
  @numba.jit()


Unnamed: 0,argument_id,chunk,topic,topic_prob,embedding,polarity_score
0,0,I always get a half size up in my tennis shoes .,2,0.0,"[6.204493999481201, -2.7216644287109375, -0.90...",-0.166667
1,0,For some reason these feel to big in the heel ...,12,0.741349,"[6.8902482986450195, -2.6051783561706543, -0.7...",-0.05
2,1,walked 3 hours with no problem,21,0.0,"[8.555655479431152, -1.1300060749053955, -2.04...",0.0
3,1,Put them on and !,21,1.0,"[7.501819610595703, -0.8035226464271545, -2.37...",0.0
4,1,Love them !,0,1.0,"[4.649739742279053, 1.449140191078186, -0.8750...",0.625


In [3]:
df_topics = chunker.topic_model.get_topic_table()
df_topics = df_topics.rename(columns={
    "Topic": "topic", 
    "Count": "count", 
    "Name": "name"
})
df_topics.head()

Unnamed: 0,topic,count,name,keywords,keyword_scores
0,-1,30,-1_excelente_stylish_footbed_hate,"[excelente, stylish, footbed, hate, restrict, ...","[1.005340421809361, 0.9126063268536753, 0.8237..."
1,0,79,0_favorite_liked_turned_absolute,"[favorite, liked, turned, absolute, love, easy...","[0.6667203262336039, 0.6401710492224683, 0.598..."
2,1,90,1_fits_glove_fit_perfect,"[fits, glove, fit, perfect, expected, appropri...","[0.47328045312033684, 0.4555179450340681, 0.43..."
3,2,77,2_half_ordered_large_bigger,"[half, ordered, large, bigger, size, larger, u...","[0.453080772349464, 0.43214954179755577, 0.420..."
4,3,54,3_attractive_sturdy_allow_bad,"[attractive, sturdy, allow, bad, people, issue...","[0.5199144783679485, 0.4826786059371195, 0.467..."


In [4]:
fpath_arguments = "./data/df_arguments.csv"
fpath_chunks = "./data/df_chunks.csv"
fpath_topics = "./data/df_topics.csv"
df_arguments.to_csv(fpath_arguments, sep=";", index=False)
df_chunks.to_csv(fpath_chunks, sep=";", index=False)
df_topics.to_csv(fpath_topics, sep=";", index=False)

In [5]:
pd.read_csv(fpath_topics, sep=";").head()

Unnamed: 0,topic,count,name,keywords,keyword_scores
0,-1,30,-1_excelente_stylish_footbed_hate,"['excelente', 'stylish', 'footbed', 'hate', 'r...","[1.005340421809361, 0.9126063268536753, 0.8237..."
1,0,79,0_favorite_liked_turned_absolute,"['favorite', 'liked', 'turned', 'absolute', 'l...","[0.6667203262336039, 0.6401710492224683, 0.598..."
2,1,90,1_fits_glove_fit_perfect,"['fits', 'glove', 'fit', 'perfect', 'expected'...","[0.47328045312033684, 0.4555179450340681, 0.43..."
3,2,77,2_half_ordered_large_bigger,"['half', 'ordered', 'large', 'bigger', 'size',...","[0.453080772349464, 0.43214954179755577, 0.420..."
4,3,54,3_attractive_sturdy_allow_bad,"['attractive', 'sturdy', 'allow', 'bad', 'peop...","[0.5199144783679485, 0.4826786059371195, 0.467..."
