In [12]:
import requests
from datetime import datetime
import json
import yaml
from itertools import cycle
import pandas as pd

SERVER_ADDRESS = 'http://admindb:Adm1nC04chDB@172.26.134.73:5984/'
headers = {'Content-type': 'application/json; charset=UTF-8'}

SYND_GCC = "1gsyd"
MELB_GCC = "2gmel"

In [2]:
def parseDate(date_str):
    date_obj = datetime.strptime(date_str, "%Y-%m-%d")
    return date_obj.year,date_obj.month,date_obj.day

In [3]:
def getTopicRanks(start_date,end_date):
    database_name = "processed_twitter/"
    design_name = "dateTopic/"
    view_name = "dateTopic"
    sy,sm,sd = parseDate(start_date)
    ey,em,ed = parseDate(end_date)
    params = {'startkey':"[%d,%d,%d]"%(sy,sm,sd), 'endkey':"[%d,%d,%d]"%(ey,em,ed)}
    url = SERVER_ADDRESS+database_name+"_design/" + \
          design_name+"_view/"+view_name
    response = requests.get(url,params=params)
    data = json.loads(response.content)
    weightMap = {}
    data_topics = data['rows'][0]['value']
    for i in range(len(data_topics)):
        weightMap[data_topics[i][0]] = data_topics[i][1]
    return weightMap

In [4]:
def queryTopicByGcc(gcc,start_date,end_date):
    database_name = "processed_twitter/"
    design_name = "gccDate/"
    view_name = "gccDate"
    sy, sm, sd = parseDate(start_date)
    ey, em, ed = parseDate(end_date)
    params = {'startkey': "[%s,%d,%d,%d]" % ("\""+gcc+"\"",sy, sm, sd), 'endkey': "[%s,%d,%d,%d]" % ("\""+gcc+"\"",ey, em, ed)}
    url = SERVER_ADDRESS + database_name + "_design/" + \
          design_name + "_view/" + view_name
    response = requests.get(url,params=params)
    data = json.loads(response.content)
    weightMap = {}
    data_topics = data['rows'][0]['value']
    for i in range(len(data_topics)):
        weightMap[data_topics[i][0]] = data_topics[i][1]
    return weightMap

In [8]:
def querySenByGcc(gcc,sen,start_date,end_date):
    database_name = "processed_twitter/"
    design_name = "compose/"
    view_name = "compose"
    sy, sm, sd = parseDate(start_date)
    ey, em, ed = parseDate(end_date)
    params = {'startkey': "[%s,%s,%d,%d,%d]" % ("\""+gcc+"\"","\""+sen+"\"",sy, sm, sd),
              'endkey': "[%s,%s,%d,%d,%d]" % ("\""+gcc+"\"","\""+sen+"\"",ey, em, ed)}
    url = SERVER_ADDRESS + database_name + "_design/" + \
          design_name + "_view/" + view_name
    response = requests.get(url,params=params)
    data = json.loads(response.content)
    weightMap = {}
    try:
        data_topics = data['rows'][0]['value']
        for i in range(len(data_topics)):
            weightMap[data_topics[i][0]] = data_topics[i][1]
    except:
        pass
    return weightMap

# Sydney


In [66]:
pos_S = pd.DataFrame.from_dict(querySenByGcc("1gsyd","POSITIVE", "2021-5-1","2022-3-1"), orient='index', columns=['POSITIVE'])
pos_topics_S = pos_S.unstack().reset_index()
pos_topics_S.columns = ['Sentiment', 'Topic', 'Interactions']

neg_S = pd.DataFrame.from_dict(querySenByGcc("1gsyd","NEGATIVE", "2021-5-1","2022-3-1"), orient='index', columns=['NEGATIVE'])
neg_topics_S = neg_S.unstack().reset_index()
neg_topics_S.columns = ['Sentiment', 'Topic', 'Interactions']

topics_S = pd.concat([pos_topics_S.sort_values('Interactions'), neg_topics_S.sort_values('Interactions')])
topics_S.head()

Unnamed: 0,Sentiment,Topic,Interactions
18,POSITIVE,other_hobbies,12
14,POSITIVE,fashion_&_style,13
17,POSITIVE,gaming,20
10,POSITIVE,family,21
12,POSITIVE,arts_&_culture,29


In [67]:
import plotly.express as px
fig = px.funnel(topics_S, x='Interactions', y='Topic', color='Sentiment')
fig.show()

In [68]:
import plotly.graph_objects as go
import plotly.figure_factory as ff

# Add table data
table_data = pd.concat([pos_S,neg_S], axis=1).reset_index(names='TOPICS').sort_values('NEGATIVE', ascending=False)

# table_data['stats'] = ['median weekly family income (WI)',
#  'median monthly mortgage repayment (MR)',
#  'median weekly household income (HI)',
#  'median weekly personal income (PI)',
#  'median weekly rent (WR)',
#  'median age (A)',
#  'average household size (HS)',
#  'average number of persons per bedroom (PPB)']

# Initialize a figure with ff.create_table(table_data)
fig = ff.create_table(table_data, height_constant=60)

x_labels = table_data['TOPICS'] #['(WI)', '(MR)', '(HI)', '(PI)', '(WR)', '(A)', '(HS)', '(PPB)']

# Make traces for graph
trace1 = go.Bar(x=x_labels, y=table_data['POSITIVE'], xaxis='x2', yaxis='y2',
                marker=dict(color='#0099ff'),
                name='POSITIVE')
trace2 = go.Bar(x=x_labels, y=table_data['NEGATIVE'], xaxis='x2', yaxis='y2',
                marker=dict(color='#404040'),
                name='NEGATIVE')

# Add trace data to figure
fig.add_traces([trace1, trace2])

# initialize xaxis2 and yaxis2
fig['layout']['xaxis2'] = {}
fig['layout']['yaxis2'] = {}

# Edit layout for subplots
fig.layout.yaxis.update({'domain': [0, .45]})
fig.layout.yaxis2.update({'domain': [.6, 1]})

# The graph's yaxis2 MUST BE anchored to the graph's xaxis2 and vice versa
fig.layout.yaxis2.update({'anchor': 'x2'})
fig.layout.xaxis2.update({'anchor': 'y2'})
fig.layout.yaxis2.update({'title': 'Total'})

# Update the margins to add a title and see graph x-labels.
fig.layout.margin.update({'t':75, 'l':50})
fig.layout.update({'title': 'Income stats'})

# Update the height because adding a graph vertically will interact with
# the plot height calculated for the table
fig.layout.update({'height':800})

# Plot!
fig.show()

# MELBOURNE

In [74]:
pos_M = pd.DataFrame.from_dict(querySenByGcc("2gmel","POSITIVE", "2021-5-1","2022-3-1"), orient='index', columns=['POSITIVE'])
pos_topics_M = pos_M.unstack().reset_index()
pos_topics_M.columns = ['Sentiment', 'Topic', 'Interactions']

neg_M = pd.DataFrame.from_dict(querySenByGcc("2gmel","NEGATIVE", "2021-5-1","2022-3-1"), orient='index', columns=['NEGATIVE'])
neg_topics_M = neg_M.unstack().reset_index()
neg_topics_M.columns = ['Sentiment', 'Topic', 'Interactions']

topics_M = pd.concat([pos_topics_M.sort_values('Interactions'), neg_topics_M.sort_values('Interactions')])
topics_M.head()

Unnamed: 0,Sentiment,Topic,Interactions
18,POSITIVE,other_hobbies,1
6,POSITIVE,family,3
13,POSITIVE,fashion_&_style,4
12,POSITIVE,gaming,4
4,POSITIVE,science_&_technology,5


In [75]:
import plotly.express as px
fig = px.funnel(topics_M, x='Interactions', y='Topic', color='Sentiment')
fig.show()

In [70]:
import plotly.graph_objects as go
import plotly.figure_factory as ff

# Add table data
table_data = pd.concat([pos_M,neg_M], axis=1).reset_index(names='TOPICS').sort_values('NEGATIVE', ascending=False)

# table_data['stats'] = ['median weekly family income (WI)',
#  'median monthly mortgage repayment (MR)',
#  'median weekly household income (HI)',
#  'median weekly personal income (PI)',
#  'median weekly rent (WR)',
#  'median age (A)',
#  'average household size (HS)',
#  'average number of persons per bedroom (PPB)']

# Initialize a figure with ff.create_table(table_data)
fig = ff.create_table(table_data, height_constant=60)

x_labels = table_data['TOPICS'] #['(WI)', '(MR)', '(HI)', '(PI)', '(WR)', '(A)', '(HS)', '(PPB)']

# Make traces for graph
trace1 = go.Bar(x=x_labels, y=table_data['POSITIVE'], xaxis='x2', yaxis='y2',
                marker=dict(color='#0099ff'),
                name='POSITIVE')
trace2 = go.Bar(x=x_labels, y=table_data['NEGATIVE'], xaxis='x2', yaxis='y2',
                marker=dict(color='#404040'),
                name='NEGATIVE')

# Add trace data to figure
fig.add_traces([trace1, trace2])

# initialize xaxis2 and yaxis2
fig['layout']['xaxis2'] = {}
fig['layout']['yaxis2'] = {}

# Edit layout for subplots
fig.layout.yaxis.update({'domain': [0, .45]})
fig.layout.yaxis2.update({'domain': [.6, 1]})

# The graph's yaxis2 MUST BE anchored to the graph's xaxis2 and vice versa
fig.layout.yaxis2.update({'anchor': 'x2'})
fig.layout.xaxis2.update({'anchor': 'y2'})
fig.layout.yaxis2.update({'title': 'Total'})

# Update the margins to add a title and see graph x-labels.
fig.layout.margin.update({'t':75, 'l':50})
fig.layout.update({'title': 'Income stats'})

# Update the height because adding a graph vertically will interact with
# the plot height calculated for the table
fig.layout.update({'height':800})

# Plot!
fig.show()

# MELBOURNE VS SYDNEY

In [142]:
topics_M['CITY'] = 'MELBOURNE'
topics_S['CITY'] = 'SYDNEY'

mixed = pd.concat([topics_M, topics_S]).set_index(['Sentiment','Topic','CITY']).unstack().droplevel(0, axis=1).reset_index()
ff.create_table(mixed, height_constant=60)

# SUDO

In [82]:
import json
# Change route depending where the demepndencies are loaded
with open('../Output/SUDO_mel.json', 'r') as rf:
    df = pd.DataFrame.from_dict(json.load(rf))

melb = df.drop(['sa2_code', 'SA2_NAME21'], axis=1).groupby('GCC_CODE21').agg({
    'AREASQKM21':'sum',
    'median_tot_prsnl_inc_weekly': 'mean',
    'median_tot_hhd_inc_weekly': 'mean',
    'median_tot_fam_inc_weekly': 'mean',
    'median_rent_weekly': 'mean',
    'median_mortgage_repay_monthly': 'mean',
    'median_age_persons': 'mean',
    'average_num_psns_per_bedroom': 'mean',
    'average_household_size': 'mean'
}).unstack().reset_index()

melb.columns = ['stats', 'city', 'value']
melb['city'] = 'Melbourne'
melb = melb.sort_values('value', ascending=False)
melb

Unnamed: 0,stats,city,value
0,AREASQKM21,Melbourne,8003.0937
3,median_tot_fam_inc_weekly,Melbourne,2319.30321
5,median_mortgage_repay_monthly,Melbourne,2068.035392
2,median_tot_hhd_inc_weekly,Melbourne,1949.993371
1,median_tot_prsnl_inc_weekly,Melbourne,848.571106
4,median_rent_weekly,Melbourne,411.901535
6,median_age_persons,Melbourne,39.646026
8,average_household_size,Melbourne,2.588646
7,average_num_psns_per_bedroom,Melbourne,0.817888


In [83]:
# Change route depending where the demepndencies are loaded
with open('../Output/SUDO_syd.json', 'r') as rf:
    df_syd = pd.DataFrame.from_dict(json.load(rf))

syd = df_syd.drop(['sa2_code', 'SA2_NAME21'], axis=1).groupby('GCC_CODE21').agg({
    'AREASQKM21':'sum',
    'median_tot_prsnl_inc_weekly': 'mean',
    'median_tot_hhd_inc_weekly': 'mean',
    'median_tot_fam_inc_weekly': 'mean',
    'median_rent_weekly': 'mean',
    'median_mortgage_repay_monthly': 'mean',
    'median_age_persons': 'mean',
    'average_num_psns_per_bedroom': 'mean',
    'average_household_size': 'mean'
}).unstack().reset_index()

syd.columns = ['stats', 'city', 'value']
syd['city'] = 'Sydney'
syd = syd.sort_values('value', ascending=False)
syd.iloc[1:,:]

Unnamed: 0,stats,city,value
3,median_tot_fam_inc_weekly,Sydney,2439.692308
5,median_mortgage_repay_monthly,Sydney,2411.511538
2,median_tot_hhd_inc_weekly,Sydney,2064.4
1,median_tot_prsnl_inc_weekly,Sydney,894.703846
4,median_rent_weekly,Sydney,473.896154
6,median_age_persons,Sydney,39.196154
8,average_household_size,Sydney,2.717692
7,average_num_psns_per_bedroom,Sydney,0.904231


In [84]:
df_cities = pd.concat([syd.iloc[1:,:], melb.iloc[1:,:]], axis=0)
df_cities['value'] = round(df_cities['value'],3)
df_cities

Unnamed: 0,stats,city,value
3,median_tot_fam_inc_weekly,Sydney,2439.692
5,median_mortgage_repay_monthly,Sydney,2411.512
2,median_tot_hhd_inc_weekly,Sydney,2064.4
1,median_tot_prsnl_inc_weekly,Sydney,894.704
4,median_rent_weekly,Sydney,473.896
6,median_age_persons,Sydney,39.196
8,average_household_size,Sydney,2.718
7,average_num_psns_per_bedroom,Sydney,0.904
3,median_tot_fam_inc_weekly,Melbourne,2319.303
5,median_mortgage_repay_monthly,Melbourne,2068.035


In [85]:
fig = px.funnel(df_cities, x='value', y='stats', color='city')
fig.show()

In [87]:
import plotly.graph_objects as go
import plotly.figure_factory as ff

# Add table data
table_data = df_cities.set_index(['stats','city']).unstack().droplevel(0, axis=1).reset_index().sort_values(['Melbourne','Sydney'], ascending=[False,False])

table_data['stats'] = ['median weekly family income (WI)',
 'median monthly mortgage repayment (MR)',
 'median weekly household income (HI)',
 'median weekly personal income (PI)',
 'median weekly rent (WR)',
 'median age (A)',
 'average household size (HS)',
 'average number of persons per bedroom (PPB)']

# Initialize a figure with ff.create_table(table_data)
fig = ff.create_table(table_data, height_constant=60)

x_labels = ['(WI)', '(MR)', '(HI)', '(PI)', '(WR)', '(A)', '(HS)', '(PPB)']

# Make traces for graph
trace1 = go.Bar(x=x_labels, y=table_data['Melbourne'], xaxis='x2', yaxis='y2',
                marker=dict(color='#0099ff'),
                name='Melbourne')
trace2 = go.Bar(x=x_labels, y=table_data['Sydney'], xaxis='x2', yaxis='y2',
                marker=dict(color='#404040'),
                name='Sydney')

# Add trace data to figure
fig.add_traces([trace1, trace2])

# initialize xaxis2 and yaxis2
fig['layout']['xaxis2'] = {}
fig['layout']['yaxis2'] = {}

# Edit layout for subplots
fig.layout.yaxis.update({'domain': [0, .45]})
fig.layout.yaxis2.update({'domain': [.6, 1]})

# The graph's yaxis2 MUST BE anchored to the graph's xaxis2 and vice versa
fig.layout.yaxis2.update({'anchor': 'x2'})
fig.layout.xaxis2.update({'anchor': 'y2'})
fig.layout.yaxis2.update({'title': 'Total'})

# Update the margins to add a title and see graph x-labels.
fig.layout.margin.update({'t':75, 'l':50})
fig.layout.update({'title': 'Income stats'})

# Update the height because adding a graph vertically will interact with
# the plot height calculated for the table
fig.layout.update({'height':800})

# Plot!
fig.show()

In [88]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np

y_mel = table_data['Melbourne'].sort_values()

y_syd = table_data['Sydney'].sort_values()

x = ['(PPB)', '(HS)', '(A)', '(WR)', '(PI)', '(HI)', '(MR)', '(WI)']


# Creating two subplots
fig = make_subplots(rows=1, cols=2, specs=[[{}, {}]], shared_xaxes=True,
                    shared_yaxes=False, vertical_spacing=0.001)

fig.append_trace(go.Bar(
    x=y_mel,
    y=x,
    marker=dict(
        color='rgba(50, 171, 96, 0.6)',
        line=dict(
            color='rgba(50, 171, 96, 1.0)',
            width=1),
    ),
    name='Melbourne stats',
    orientation='h',
), 1, 1)

fig.append_trace(go.Bar(
    x=y_syd,
    y=x,
    marker=dict(
        color='rgba(66, 135, 245, 0.6)',
        line=dict(
            color='rgba(66, 135, 245, 1.0)',
            width=1),
    ),
    name='Sydney stats',
    orientation='h',
), 1, 2)

fig.update_layout(
    title='Spacial Urban Data Observatory - Selected Medians and Averages',
    yaxis=dict(
        showgrid=False,
        showline=False,
        showticklabels=True,
        domain=[0, 0.85],
    ),
    yaxis2=dict(
        showgrid=False,
        showline=False,
        showticklabels=True,
        linecolor='rgba(102, 102, 102, 0.8)',
        linewidth=2,
        domain=[0, 0.85],
    ),
    xaxis=dict(
        zeroline=False,
        showline=False,
        showticklabels=True,
        showgrid=True,
        domain=[0, 0.42],
    ),
    xaxis2=dict(
        zeroline=False,
        showline=False,
        showticklabels=True,
        showgrid=True,
        domain=[0.47, 1],
    ),
    legend=dict(x=0.029, y=1.038, font_size=10),
    margin=dict(l=100, r=20, t=70, b=70),
    paper_bgcolor='rgb(248, 248, 255)',
    plot_bgcolor='rgb(248, 248, 255)',
)

annotations = []

y_m = np.round(y_mel, decimals=2)
y_s = np.round(y_syd, decimals=2)

# Adding labels
for ym, ys, xd in zip(y_m, y_s, x):
    # labeling the bar net worth
    annotations.append(dict(xref='x1', yref='y1',
                            y=xd, x=ym*0.95,
                            text=str(ym),
                            font=dict(family='Arial', size=12,
                                      color='rgb(0, 0, 0)'),
                            showarrow=False))
    # labeling the bar net worth
    annotations.append(dict(xref='x2', yref='y2',
                            y=xd, x=ys*0.95,
                            text=str(ys),
                            font=dict(family='Arial', size=12,
                                      color='rgb(0, 0, 0)'),
                            showarrow=False))
# Source
annotations.append(dict(xref='paper', yref='paper',
                        x=-0.07, y=-0.2,
                        text='https://sudo.eresearch.unimelb.edu.au/',
                        font=dict(family='Arial', size=10, color='rgb(150,150,150)'),
                        showarrow=False))

fig.update_layout(annotations=annotations)

fig.show()

# Add table data
table_data = df_cities.set_index(['stats','city']).unstack().droplevel(0, axis=1).reset_index().sort_values(['Melbourne','Sydney'], ascending=[False,False])

table_data['stats'] = ['median weekly family income (WI)',
 'median monthly mortgage repayment (MR)',
 'median weekly household income (HI)',
 'median weekly personal income (PI)',
 'median weekly rent (WR)',
 'median age (A)',
 'average household size (HS)',
 'average number of persons per bedroom (PPB)']

# Initialize a figure with ff.create_table(table_data)
fig = ff.create_table(table_data, height_constant=60)
fig