# Setup Environment

In [2]:
import os
import sys

current_dir = os.getcwd()
parent_dir = os.path.dirname(current_dir)
path_to_add = os.path.join(parent_dir, "src")
sys.path.insert(0, path_to_add)

# Get Data

## Free text comments

In [3]:
import requests
import pandas as pd
import janitor

# URL of the endpoint
url = "https://data.austintexas.gov/resource/jeyv-db9u.json"

# Fetch the data from the API
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    # Convert the JSON data to a pandas DataFrame
    df = pd.DataFrame(response.json()).clean_names()
else:
    print(f"Failed to fetch data. Status code: {response.status_code}")

## Questions data (likert)

In [18]:
url_likert = "https://data.austintexas.gov/resource/s2py-ceb7.json"

response = requests.get(url_likert)
if response.status_code == 200:
    # Convert the JSON data to a pandas DataFrame
    dfq = pd.DataFrame(response.json())
else:
    print(f"Failed to fetch data. Status code: {response.status_code}")

In [19]:
dfq.columns

Index(['id', 'year', 'method', 'the_city_of_austin_as_a_place',
       'the_city_of_austin_as_a_place_1', 'the_city_of_austin_as_a_place_2',
       'the_city_of_austin_as_a_place_3', 'overall_quality_of_life_in',
       'how_well_the_city_of_austin', 'access_to_affordable_quality',
       ...
       'date_as_of_date', 'which_of_the_following_best_7',
       'which_of_the_following_best_8', 'which_of_the_following_best_6',
       'which_of_the_following_best_3', 'which_of_the_following_best_2',
       'safety_in_city_parks_and', 'overall_quality_of_planning_1',
       'bicycle_accessibility_the', 'overall_maintenance_of_city_1'],
      dtype='object', length=159)

In [16]:
import numpy as np
set(np.hstack(dfq[['the_city_of_austin_as_a_place_1', 'the_city_of_austin_as_a_place_2',
       'the_city_of_austin_as_a_place_3', 'overall_quality_of_life_in',]].values))

{'Dissatisfied',
 "Don't Know",
 'Neutral',
 'Satisfied',
 'Very Dissatisfied',
 'Very Satisfied'}

In [17]:
dfq.to_excel('austin_likert.xlsx')

In [4]:
%load_ext autoreload

In [5]:
%autoreload 2
from pandas_survey_toolkit import analytics, nlp, vis
from pandas_survey_toolkit.vis import create_keyword_graph, visualize_keyword_graph,visualize_keyword_graph_force, create_keyword_sentiment_df_simple, create_sentiment_color_mapping

  from .autonotebook import tqdm as notebook_tqdm
  torch.utils._pytree._register_pytree_node(


In [6]:
df2 = df.cluster_comments("response").extract_keywords("response", top_n=4, ngram_range=(1,1), min_df=4)

  torch.utils._pytree._register_pytree_node(


In [8]:
df2.to_excel("austin_comments.xlsx")

In [124]:

dfx = df2[['response', 'cluster', 'keywords']]
dfx = dfx.reset_index()
dfx["comment_id"] = dfx["index"].astype(str).apply(lambda x: "comment_" + x)
dfx["cluster_id"] = dfx["cluster"].astype(int).astype(str).apply(lambda x: "category_" + x)
dfx = dfx.query("cluster >= 0 & cluster < 8")

In [125]:
import networkx as nx
import json
import networkx as nx
%autoreload 2
from pandas_survey_toolkit.vis import plot_pyvis_network, create_keyword_graph, create_comment_keyword_edges


In [126]:

G = nx.DiGraph()
#add cluster_id nodes

for category in dfx["cluster"]:
    category = int(category)
    if category == -1:
        continue
    G.add_node("category_" + str(category), group="category", index=category)
#G.add_nodes_from(dfx["cluster_id"].astype(str).unique(), group="category")
 # Create a list of tuples (node, attr_dict) for each row in the DataFrame
nodes_with_attrs = dfx.apply(lambda row: (row['comment_id'], {'text': row['response'], 'group' : 'comment'}), axis=1).tolist()
    
 # Add nodes to the graph with their attributes
G.add_nodes_from(nodes_with_attrs)

G.add_edges_from(dfx[["cluster_id", "comment_id"]].values)


#now add keyword graph
G_kw = create_keyword_graph(dfx, keyword_column="keywords")
#now add link from comment to keywords
edge_list_comment_keywords = create_comment_keyword_edges(dfx)

G2 = nx.compose(G, G_kw)

G2.add_edges_from(edge_list_comment_keywords)
plot_pyvis_network(G2)

Network plot saved to network.html


In [127]:
import networkx as nx
import json

def export_graph(G):
    nodes = []
    links = []
    
    for node, data in G.nodes(data=True):
        node_data = {
            "id": node,
            "group": data["group"],
            "size": data.get("node_count", 1)  # Default size to 1 if not specified
        }
        if data["group"] == "category":
            node_data["index"] = data["index"]  # For positioning around the circle
        nodes.append(node_data)
    
    for source, target, data in G.edges(data=True):
        links.append({
            "source": source,
            "target": target,
            "value": data.get("edge_count", 1)  # Default to 1 if not specified
        })
    
    return json.dumps({"nodes": nodes, "links": links}, indent=2)

# Usage
graph_json = export_graph(G2)  # G is your NetworkX graph
with open("graph_data.json", "w") as f:
    f.write(graph_json)

In [72]:
sentiment_df = create_keyword_sentiment_df_simple(df2)
sentiment_df = sentiment_df.query("word != 'not'")
color_mapping = create_sentiment_color_mapping(sentiment_df)

G = create_keyword_graph(df2, 'keywords', node_color_mapping=color_mapping)
G.remove_node("not")
visualize_keyword_graph_force(G, output_file='keyword_sentiment_graph.html', colormap='RdBu', min_edge_count=2, min_node_count=1)

Graph saved to keyword_sentiment_graph.html
