# Setup Environment

In [1]:
%load_ext autoreload
import os
import sys

current_dir = os.getcwd()
parent_dir = os.path.dirname(current_dir)
path_to_add = os.path.join(parent_dir, "src")
sys.path.insert(0, path_to_add)

# Get Data

## Free text comments

In [2]:
import requests
import pandas as pd
import janitor

# URL of the endpoint
url = "https://data.austintexas.gov/resource/jeyv-db9u.json"

# Fetch the data from the API
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    # Convert the JSON data to a pandas DataFrame
    df = pd.DataFrame(response.json()).clean_names()
else:
    print(f"Failed to fetch data. Status code: {response.status_code}")

## Questions data (likert)

In [46]:
url_likert = "https://data.austintexas.gov/resource/s2py-ceb7.json"
data_dictionary = "https://data.austintexas.gov/api/views/s2py-ceb7/columns.json"

response = requests.get(url_likert)
if response.status_code == 200:
    # Convert the JSON data to a pandas DataFrame
    dfq = pd.DataFrame(response.json())
else:
    print(f"Failed to fetch data. Status code: {response.status_code}")

In [47]:
likert_columns = ['the_city_of_austin_as_a_place_1', 'the_city_of_austin_as_a_place_2',
       'the_city_of_austin_as_a_place_3', 'overall_quality_of_life_in',]

column_rename = {'the_city_of_austin_as_a_place_1' : 'place to work',
                 'the_city_of_austin_as_a_place_2' : 'place to raise children',
                 'the_city_of_austin_as_a_place_3' : 'place to retire',
                 'overall_quality_of_life_in' : 'overall quality of life in the city'}

dfq = dfq.rename_columns(column_rename)

likert_encoded = ["likert_encoded_" + col for col in column_rename.values()]

In [40]:
dfq.columns

Index(['id', 'year', 'method', 'the_city_of_austin_as_a_place',
       'place to work', 'place to raise children', 'place to retire',
       'overall quality of life in the city', 'how_well_the_city_of_austin',
       'access_to_affordable_quality',
       ...
       'date_as_of_date', 'which_of_the_following_best_7',
       'which_of_the_following_best_8', 'which_of_the_following_best_6',
       'which_of_the_following_best_3', 'which_of_the_following_best_2',
       'safety_in_city_parks_and', 'overall_quality_of_planning_1',
       'bicycle_accessibility_the', 'overall_maintenance_of_city_1'],
      dtype='object', length=159)

In [54]:
%autoreload 2
from pandas_survey_toolkit import analytics, nlp, vis
from pandas_survey_toolkit.vis import cluster_heatmap_plot


  @pf.register_dataframe_method
  @pf.register_dataframe_method
  @pf.register_dataframe_method
  @pf.register_dataframe_method
  @pf.register_dataframe_method
  @pf.register_dataframe_method
  @pf.register_dataframe_method
  @pf.register_dataframe_method
  @pf.register_dataframe_method
  @pf.register_dataframe_method
  @pf.register_dataframe_method


In [55]:
dfq2 = dfq.cluster_questions(column_rename.values())

Using default mapping:
-1: Phrases containing 'disagree', 'do not agree', etc.
 0: Phrases containing 'neutral', 'neither', 'unsure', etc.
+1: Phrases containing 'agree' (but not 'disagree' or 'not agree')
NaN: NaN values are preserved
  Neutral -> 0: 754 times
  Very Satisfied -> 1: 764 times
  Satisfied -> 1: 1477 times
  Don't Know -> 0: 416 times
  Very Dissatisfied -> -1: 201 times
  Dissatisfied -> -1: 388 times


In [49]:
cluster_heatmap_plot(dfq2, x="question_cluster_id", y = likert_encoded)

In [9]:
dfq.cluster_questions()

Index(['id', 'year', 'method', 'the_city_of_austin_as_a_place',
       'the_city_of_austin_as_a_place_1', 'the_city_of_austin_as_a_place_2',
       'the_city_of_austin_as_a_place_3', 'overall_quality_of_life_in',
       'how_well_the_city_of_austin', 'access_to_affordable_quality',
       ...
       'which_of_the_following_best_3', 'which_of_the_following_best_2',
       'safety_in_city_parks_and', 'overall_quality_of_planning_1',
       'bicycle_accessibility_the', 'overall_maintenance_of_city_1',
       'likert_encoded_the_city_of_austin_as_a_place_1',
       'likert_encoded_the_city_of_austin_as_a_place_2',
       'likert_encoded_the_city_of_austin_as_a_place_3',
       'likert_encoded_overall_quality_of_life_in'],
      dtype='object', length=163)

In [17]:
dfq.to_excel('austin_likert.xlsx')

In [68]:
%autoreload 2
from pandas_survey_toolkit import analytics, nlp, vis
from pandas_survey_toolkit.vis import create_keyword_graph, visualize_keyword_graph,visualize_keyword_graph_force, create_keyword_sentiment_df_simple, create_sentiment_color_mapping

In [70]:
df2 = df.extract_keywords("response", top_n=4, ngram_range=(1,1), min_df=4).extract_sentiment("response")

In [71]:
df2.keywords

0                     [level, manage]
1                     [like, theater]
2                                  []
3      [need, expansion, accommodate]
4                            [update]
                    ...              
857        [staff, helpful, friendly]
858                                []
859                            [hope]
860      [feature, artist, welcoming]
861            [amazing, opportunity]
Name: keywords, Length: 862, dtype: object

In [72]:
sentiment_df = create_keyword_sentiment_df_simple(df2)
sentiment_df = sentiment_df.query("word != 'not'")
color_mapping = create_sentiment_color_mapping(sentiment_df)

G = create_keyword_graph(df2, 'keywords', node_color_mapping=color_mapping)
G.remove_node("not")
visualize_keyword_graph_force(G, output_file='keyword_sentiment_graph.html', colormap='RdBu', min_edge_count=2, min_node_count=1)

Graph saved to keyword_sentiment_graph.html
