### Imports

In [None]:
!pip install kaleido

In [None]:
import os
import re
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme()

import kaleido
import plotly.express as px

import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd

import networkx as nx

import 'warnings'
warnings.filterwarnings("ignore")

In [None]:
def import_df(path):
    '''
    import dataframe from stage_2_2_model_pipline
    '''
    df_orig = pd.read_csv(path, sep='|', error_bad_lines=False)
    
    return df_orig

def import_subtopics_list(subtopics_path):
    '''
    import subtopics from stage_2_1_models/topic_model
    '''
    subtopics_file = open(subtopics_path, "r")
    subtopics = subtopics_file.read().split(",")
    subtopics_file.close()

    return subtopics

def import_subtopics_dict(d_topics_subtopics_path):
    '''
    import subtopics from stage_2_1_models/topic_model
    deprecated
    '''
    return 0

In [None]:
#path = '../stage_2_2_model_pipline/output/future_statements.csv'
path = '../../datasets/test_dataset_model_pipeline/future_statements.csv'

df_orig = import_df(path)

#subtopics_path = '../stage_2_1_models/topic_model/subtopics.txt'
subtopics_path = '../topic_model/subtopics.txt'

subtopics = import_subtopics_list(subtopics_path)

#subtopics_path = '../stage_2_1_models/topic_model/subtopics.txt'
d_topics_subtopics_path = '../topic_model/d_topics_subtopics.txt'

d_topics_subtopics_merged = import_subtopics_dict(d_topics_subtopics_path)

### Preprocessing

In [None]:
def major_lda_topic_to_topic(row, d_topics_subtopics):
    '''
    lda topic label to topic (deprecated)
    '''
    for i in range(0, len(d_topics_subtopics)):
        if row['major_lda_topic'] == i:
            return list(d_topics_subtopics.keys())[i]

def topic_to_subtopic_by_list(row, subtopics):
    '''
    get subtopic for each statement based on topic_eval subtopics-list
    '''
    for i in subtopics:
        if re.search(i, row['statement'], re.IGNORECASE):
            return i
    return 'undefined'

def topic_to_subtopic_by_dict(row, d_topics_subtopics):
    '''
    get subtopic for each statement based on topic_eval topics-subtopics-dict
    '''
    for ii in range(0, len(d_topics_subtopics)):
        if row['major_lda_topic'] == ii:
            for i in list(d_topics_subtopics.values())[ii]:
                if re.search(i, row['statement'], re.IGNORECASE):
                    return i
            return 'undefined'

def subtopic_to_network_by_list(row, subtopics):
    '''
    create network attribute for each statement based on topic_eval subtopics_list
    '''
    network = []
    for index, item in enumerate(subtopics):
        if (re.search(item, row['statement'], re.IGNORECASE)) and (item.casefold() not in network):
            network.append(item)
    return network

def subtopic_to_network_by_dict(row, d_topics_subtopics_merged):
    '''
    create network attribute for each statement based on topic_eval subtopics_list
    '''
    network = []
    for index, item in enumerate(d_topics_subtopics_merged):
        if (re.search(item, row['statement'], re.IGNORECASE)) and (item.casefold() not in network):
            network.append(item)
    return network

In [None]:
# d_topics_subtopics_merged = []
# for i in range(0,len(d_topics_subtopics)):
#     d_topics_subtopics_merged += list(d_topics_subtopics.values())[i]

In [None]:
#df_orig['topic'] = df_orig.apply(lambda row: major_lda_topic_to_topic(row, d_topics_subtopics), axis=1)
df_orig['subtopic'] = df_orig.apply(lambda row: topic_to_subtopic_by_list(row, subtopics), axis=1)
#df_orig['subtopic'] = df_orig.apply(lambda row: topic_to_subtopic_by_dict(row, d_topics_subtopics), axis=1)
df_orig['network'] = df_orig.apply(lambda row: subtopic_to_network_by_list(row, subtopics), axis=1)
#df_orig['network'] = df_orig.apply(lambda row: subtopic_to_network_by_dict(row, d_topics_subtopics_merged), axis=1)

In [None]:
def rchop(s, suffix):
    '''
    chop substring at the end of path
    '''
    if suffix and s.endswith(suffix):
        return s[:-len(suffix)]
    return s

In [None]:
# export dataframe to .csv
path = rchop(path, '.csv')
path_for_vis = path + '_vis' + '.csv'

df_orig.to_csv(path_for_vis, sep='|')
df = pd.read_csv(path_for_vis, sep='|', error_bad_lines=False)

### Sentiments

In [None]:
d_sentiment = {'NEG':-1,'NEU':0,'POS':1}

def sentiment_label(row):
    if row['sentiment'] == 'NEG':
        return -1
    elif row['sentiment'] == 'NEU':
        return 0
    elif row['sentiment'] == 'POS':
        return 1

In [None]:
df['n_sentiment'] = df.apply(lambda row:sentiment_label(row),axis=1)

### Visualization

In [None]:
dfg_t = {'count' : df.groupby(['topic']).size()}
dfg_t = pd.DataFrame(dfg_t).reset_index()

dfg_s = {'count' : df.groupby(['sentiment']).size()}
dfg_s = pd.DataFrame(dfg_s).reset_index()

dfg_ts = {'count' : df.groupby(['topic', 'n_sentiment']).size()}
dfg_ts = pd.DataFrame(dfg_ts).reset_index()

dfg_tm = df.groupby(['topic'])\
        .agg({'sentiment':'size', 'n_sentiment':'mean'}) \
        .rename(columns={'sentiment':'count', 'n_sentiment':'mean_sen'}).reset_index()

dfg_tsm = df.groupby(['topic', 'subtopic'])\
        .agg({'sentiment':'size', 'n_sentiment':'mean'}) \
        .rename(columns={'sentiment':'count', 'n_sentiment':'mean_sen'}).reset_index()

### Seaborn Init

In [None]:
mid_sen = 0
mean_sen = df['n_sentiment'].mean()

In [None]:
sns.set(style="darkgrid")
sns.set(rc={'figure.figsize':(20,10)})

In [None]:
palette_1 = sns.color_palette("coolwarm_r", as_cmap=True)
palette_2 = sns.color_palette("coolwarm_r", 3)
colors_topics_pastel = sns.color_palette('pastel')[0:3]
palette_c = {}

### Seaborn Viz

#### Barplots

In [None]:
def barplots_topic(df, palette):
    fig, ax = plt.subplots(figsize=(20, 7))

    # topic + sentiment (bar)
    sns.barplot(x = 'topic'
                , y = 'count'
                , data = dfg_ts
                , hue='n_sentiment'
                , palette= palette
                #, dodge=False
                , ax=ax
                )
    plt.savefig('plots/bar_topics.png')

barplots_topic(df=dfg_ts, palette=palette_2)

#### Pieplots

In [None]:
def pieplots_topic(pieplots_input, size):
    if not os.path.exists('plots'):
        os.mkdir('plots')
        
    #sunburst chart of topics+planned subtopics
    fig = px.sunburst(dfg_tsm
        ,path=['topic']
        ,values='count'
        ,branchvalues='total'
        ,title="Topics by Occurence"
        )
    fig.update_layout(
        autosize=False,
        width=size,
        height=size).show()
    fig.write_image("plots/pie_topics_by_occ.png")

    #sunburst chart of topics+planned subtopics
    fig = px.sunburst(pieplots_input
        ,path=['subtopic']
        ,values='count'
        ,branchvalues='total'
        ,title="Subtopics by Occurence"
        )
    fig.update_layout(
        autosize=False,
        width=size,
        height=size).show()
    fig.write_image("plots/pie_subtopics_by_occ.png")

    #sunburst chart of topics+planned subtopics
    fig = px.sunburst(pieplots_input
        ,path=['topic', 'subtopic']
        ,values='count'
        #,branchvalues='total'
        ,title="Topics & Subtopics by Occurence"
        )
    fig.update_layout(
        autosize=False,
        width=size,
        height=size).show()
    fig.write_image("plots/pie_topics_subtopics_by_occ.png")

    fig = px.sunburst(pieplots_input #dfg_tsm[dfg_tsm['subtopic']!='undefined']
        ,path=['topic', 'subtopic']
        ,values='count'
        ,color='mean_sen'
        ,color_continuous_scale='RdBu'
        ,color_continuous_midpoint=mid_sen
        ,title="Topics & Subtopics (sentiment: NEUTRAL=0)"
        )
    fig.update_layout(
        autosize=False,
        width=size,
        height=size).show()
    fig.write_image("plots/pie_topics_subtopics_by_occ_sent_neu.png")

    fig = px.sunburst(pieplots_input #dfg_tsm[dfg_tsm['subtopic']!='undefined']
        ,path=['topic', 'subtopic']
        ,values='count'
        ,color='mean_sen'
        ,color_continuous_scale='RdBu'
        ,color_continuous_midpoint=mean_sen
        ,title="Topics & Subtopics (sentiment: MEAN=%s)"%round(mean_sen,4)
        )
    fig.update_layout(
        autosize=False,
        width=size,
        height=size).show()
    fig.write_image("plots/pie_topics_subtopics_by_occ_sent_mean.png")

In [None]:
pieplots_input = dfg_tsm
pieplots_input = dfg_tsm[dfg_tsm['subtopic']!='undefined']
pieplots_input = dfg_tsm[dfg_tsm['count']>=dfg_tsm['count'].mean()]

size = 600

pieplots_topic(pieplots_input, size)

In [None]:
#sunburst chart of topics
fig = px.sunburst(dfg_tsm
    ,path=['topic']
    ,values='count'
    ,branchvalues='total'
    ,title="Topics by Occurence"
    )
fig.update_layout(
    autosize=False,
    width=600,
    height=600).show()

In [None]:
#sunburst chart of topics + subtopics
fig = px.sunburst(dfg_tsm[dfg_tsm['subtopic']!='undefined']
    ,path=['subtopic']
    ,values='count'
    ,branchvalues='total'
    ,title="Subtopics by Occurence"
    )
fig.update_layout(
    autosize=False,
    width=600,
    height=600).show()

In [None]:
#sunburst chart of topics+planned subtopics
#https://plotly.com/python/sunburst-charts/
fig = px.sunburst(dfg_tsm #dfg_tsm[dfg_tsm['subtopic']!='undefined'] #dfg_tsm[dfg_tsm['count']>=dfg_tsm['count'].mean()]
    ,path=['topic', 'subtopic']
    ,values='count'
    #,branchvalues='total'
    ,title="Topics & Subtopics by Occurence"
    )
fig.update_layout(
    autosize=False,
    width=600,
    height=600).show()

In [None]:
fig = px.sunburst(dfg_tsm #dfg_tsm[dfg_tsm['subtopic']!='undefined']
    ,path=['topic', 'subtopic']
    ,values='count'
    ,color='mean_sen'
    ,color_continuous_scale='RdBu'
    ,color_continuous_midpoint=mid_sen
    ,title="Topics & Subtopics (sentiment: NEUTRAL=0)"
    )
fig.update_layout(
    autosize=False,
    width=600,
    height=600).show()

In [None]:
fig = px.sunburst(dfg_tsm #dfg_tsm[dfg_tsm['subtopic']!='undefined']
    ,path=['topic', 'subtopic']
    ,values='count'
    ,color='mean_sen'
    ,color_continuous_scale='RdBu'
    ,color_continuous_midpoint=mean_sen
    ,title="Topics & Subtopics (sentiment: MEAN=%s)"%round(mean_sen,4)
    )
fig.update_layout(
    autosize=False,
    width=600,
    height=600).show()

### Network Graph

In [None]:
G = nx.Graph()
pos = nx.spring_layout(G)  # positions for all nodes

In [None]:
G.add_node('money') #size=)
G.add_node('forex') #size=)
G.add_node('crypto') #size=)

In [None]:
G.add_edge('money', 'forex', weight=5)
G.add_edge('forex', 'crypto', weight=1)
G.add_edge('money', 'crypto', weight=4)

In [None]:
for node in G:
    print(node)

In [None]:
color_map = ['red' if node.size < 0 else 'blue' for node.size in G]

In [None]:
nx.draw_spring(G, node_color=)

In [None]:
nx.draw_networkx_nodes(G, pos, node_color='tab:red')