<a href="https://colab.research.google.com/github/1262PCS/B7_FINAL-YEAR-PROJECT/blob/main/visualization/visualization_0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports and installations

In [None]:
!pip install torch-geometric

Collecting torch-geometric
  Downloading torch_geometric-2.5.1-py3-none-any.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torch-geometric
Successfully installed torch-geometric-2.5.1


In [None]:
import tarfile
import gzip
import io
import pandas as pd
import re
from torch_geometric.datasets import Planetoid

#for hovering visualization
import networkx as nx
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [None]:
cora_url = 'https://people.cs.umass.edu/~mccallum/data/cora-ie.tar.gz'
!wget {cora_url}
cora_tarball_path = 'cora-ie.tar.gz'

--2024-03-13 06:55:36--  https://people.cs.umass.edu/~mccallum/data/cora-ie.tar.gz
Resolving people.cs.umass.edu (people.cs.umass.edu)... 128.119.240.99
Connecting to people.cs.umass.edu (people.cs.umass.edu)|128.119.240.99|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 465405 (454K) [application/x-gzip]
Saving to: ‘cora-ie.tar.gz’


2024-03-13 06:55:37 (1.15 MB/s) - ‘cora-ie.tar.gz’ saved [465405/465405]



# Extracting paper details

In [None]:
# Extract and choose relevant file
with tarfile.open(cora_tarball_path, 'r:gz') as tar:
    chosen_file = 'cora-ie/tagged_headers.txt.gz'
    gzipped_data = tar.extractfile(chosen_file).read()

In [None]:
# Decode the gzipped data
decoded_data = gzip.decompress(gzipped_data).decode('utf-8')

In [None]:
# regular expressions to extract information
pattern = re.compile(r'<NEW_HEADER>(.*?)<NEW_HEADER>', re.DOTALL)
matches = pattern.findall(decoded_data)

In [None]:
# Extract information from the matches and remove +L+\n\n or +L+
data = []
for idx, match in enumerate(matches):
    cleaned_match = match.replace('+L+\n\n', '').replace('+L+', '').replace('\n\n','')

    paper_info = {'paper_id': idx}
    for tag in ['title', 'author', 'pubnum', 'date', 'abstract', 'affiliation', 'address', 'page']:
        tag_pattern = re.compile(fr'<{tag}>(.*?)<\/{tag}>', re.DOTALL)
        tag_match = tag_pattern.search(cleaned_match)
        if tag_match:
            paper_info[tag] = tag_match.group(1).strip()
        else:
            paper_info[tag] = None
    data.append(paper_info)

In [None]:
cora_df = pd.DataFrame(data)
print(cora_df.head())

   paper_id                                              title  \
0         0  A Model-Based Approach to Analogical Reasoning...   
1         1  A Trip-based Multicasting Model in Wormhole-ro...   
2         2               Space Deformation using Ray Deectors   
3         3      Efficient Rasterization of Implicit Functions   
4         4  Alleviating Consumption Channel Bottleneck in ...   

                                              author        pubnum  \
0                                Sambasiva R. Bhatta  GIT-CC-92/60   
1  Yu-Chee Tseng, Dhabaleswar K. Panda, Member, I...          None   
2                        Yair Kurzion and Roni Yagel          None   
3                       Torsten Mller and Roni Yagel          None   
4            Debashis Basak and Dhabaleswar K. Panda          None   

            date                                           abstract  \
0  November 1992                                               None   
1           None  Abstract| This paper f

In [None]:
print(cora_df.loc[cora_df['paper_id'] == 260, "affiliation"].values)

['1 Federal Aviation Administration, Technical Center,']


# Aligning datasets

In [None]:
dataset = Planetoid(root='.', name='Cora')
data = dataset[0]

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!


In [None]:
# Get node attributes by linking to node id
aligned_titles = cora_df.set_index("paper_id")["title"].to_dict()
aligned_authors = cora_df.set_index("paper_id")["author"].to_dict()
aligned_affiliations = cora_df.set_index("paper_id")["affiliation"].to_dict()
aligned_dates = cora_df.set_index("paper_id")["date"].to_dict()

# Visualization

In [None]:
# Create a NetworkX graph
G = nx.Graph()
G.add_nodes_from(range(data.num_nodes))
G.add_edges_from(data.edge_index.t().tolist())

In [None]:
# Create a subplot with Plotly
fig = make_subplots(rows=1, cols=1, specs=[[{'type': 'scatter3d'}]])

In [None]:
# Extract node positions for plotting
pos = nx.spring_layout(G, dim=3)

In [None]:
# Add nodes to the plot with details in hover text
node_trace = go.Scatter3d(
    x=[pos[x][0] for x in G.nodes()],
    y=[pos[x][1] for x in G.nodes()],
    z=[pos[x][2] for x in G.nodes()],
    text=[f"Node ID: {x}<br>Title: {aligned_titles.get(x, 'N/A')}<br>Author: {aligned_authors.get(x, 'N/A')}<br>Affiliations: {aligned_affiliations.get(x, 'N/A')}<br>Date: {aligned_dates.get(x, 'N/A')}" for x in G.nodes()],
    mode='markers',
    hoverinfo='text',
    marker=dict(
        color='blue',
        size=10,
        line=dict(color='black', width=0.5)
    )
)

In [None]:
# Add edges to the plot
edge_trace = go.Scatter3d(
    x=[pos[x[0]][0] for x in G.edges()],
    y=[pos[x[0]][1] for x in G.edges()],
    z=[pos[x[0]][2] for x in G.edges()],
    line=dict(width=0.5, color='#888'),
    hoverinfo='none',
    mode='lines'
)


In [None]:
fig.add_trace(node_trace)
fig.add_trace(edge_trace)

fig.update_layout(
    hovermode='closest',
    hoverlabel=dict(bgcolor='white', bordercolor='gray'),
    showlegend=False,  # Hide legend
    plot_bgcolor='white',  # Set background color
    scene=dict(
        xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),  # Hide x-axis
        yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),  # Hide y-axis
        zaxis=dict(showgrid=False, zeroline=False, showticklabels=False)   # Hide z-axis
    )
)

fig.show()

In [None]:
# Save the visualization to an HTML file
fig.write_html('network_visualization.html')
