# Visualising Topic Similarities

### Imports

In [None]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [None]:
import pandas as pd
import numpy as np
import topicmodelling.utilities.plotting as plotting
import topicmodelling.classes
import networking.classes
import gensim

In [None]:
import apiIntegrations.ga

## Load Data

In [None]:
df = pd.read_pickle(r'C:\Users\Tobias Fechner\Documents\1_Uni\fyp\git_repo_fyp\data\processed\all_summaries_cutYr_cutLen.csv')

## Train Model

In [None]:
iterations = 2000
year = 2015

In [None]:
c = topicmodelling.classes.Corpus('test')
c.data = df.loc[df.yearPublished == year]
c.dictionary = gensim.corpora.dictionary.Dictionary(c.data.tokensProcessed)

In [None]:
initial_k = int(len(c.data)/3)
print(initial_k)

In [None]:
hdp = topicmodelling.classes.TomotopyHDP('HDP')
hdp.instantiateModel(c, gamma=1, alpha=0.1, initial_k=initial_k)
trainingResults = hdp.train(c, iterations=iterations, chunkSize=100, evaluate=False, printDuring=False)

In [None]:
fig = plotting.plotLine(np.arange(100, iterations+100, 100), trainingResults['Number of Topics'], xTitle="Iteration", yTitle="Number of Topics")
fig.update_layout(title_text=f"Model: HDP - Iterations: {iterations} - Year: {year}")

## Prepare Data for Network

In [None]:
c.getTopicsTable(hdp)
c.getTopicNetworkingData(maxDistance=0.46)

In [None]:
topicmodelling.utilities.plotting.plotBar(c.topics.index, c.topics.degree, xTitle="Topic ID", yTitle="No. Connecting Topics")

In [None]:
nodes, edges = networking.utilities.getNodesEdges(c, onlyConnectedTopics=True)

### Create Graph Object with Graph Class

In [None]:
netty = networking.classes.Graph(nodes, edges)
netty.printGraph(layout='spring', nodeSizeCorrection=0.3, weightCorrectionFactor=1, weightCorrectionExponent=1)