# Pre Analysis of Climate Sensor Data

##### Imports

In [31]:
# Imports
# Standard
import pandas as pd
import numpy as np
import os

# Plots
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
import seaborn as sns

sns.set_theme()

sns.set_palette(sns.color_palette("colorblind"))
sns.set(rc={"figure.facecolor": "white"})

#Plotly
import chart_studio.plotly as py
import plotly.figure_factory as ff
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
import json
import re
pio.renderers.default = "notebook"


##### Load Data

In [24]:
with open('../../data/raw/VIC_Suburbs.json') as fp:
    val = fp.read()
    data = json.loads(val)

In [26]:
data['features'][1]['properties']['vic_lga__2']

{'lg_ply_pid': '465',
 'dt_create': '2017-09-08Z',
 'dt_retire': None,
 'lga_pid': 'VIC246',
 'vic_lga_sh': '2017-09-08Z',
 'vic_lga__1': None,
 'vic_lga__2': 'MOUNT BULLER ALPINE RESORT (UNINCORPORATED)',
 'vic_lga__3': 'MOUNT BULLER ALPINE RESORT (UNINC)',
 'vic_lga__4': None,
 'vic_lga__5': '2'}

In [27]:
# Get Local Government
LGA_Names = [
'Banyule',
'Bayside',
'Boroondara',
'Brimbank',
'Cardinia',
'Casey',
'Darebin',
'Frankston',
'Glen Eira',
'Greater Dandenong',
'Hobsons Bay',
'Hume',
'Kingston',
'Knox',
'Macedon Ranges',
'Manningham',
'Maribyrnong',
'Maroondah',
'Melbourne',
'Melton',
'Mitchell',
'Monash',
'Moonee Valley',
'Moreland',
'Mornington Peninsula',
'Murrindindi',
'Nillumbik',
'Port Phillip',
'Stonnington',
'Whitehorse',
'Whittlesea',
'Wyndham',
'Yarra',
'Yarra Ranges']


In [30]:
def normalize_text(string):
    """ Text normalization from
    https://github.com/yoonkim/CNN_sentence/blob/23e0e1f735570/process_data.py
    as specified in Yao's paper.
    """
    string = re.sub(r"[^A-Za-z0-9(),!?\'\`]", " ", string)
    string = re.sub(r"\'s", " \'s", string)
    string = re.sub(r"\'ve", " \'ve", string)
    string = re.sub(r"n\'t", " n\'t", string)
    string = re.sub(r"\'re", " \'re", string)
    string = re.sub(r"\'d", " \'d", string)
    string = re.sub(r"\'ll", " \'ll", string)
    string = re.sub(r",", " , ", string)
    string = re.sub(r"!", " ! ", string)
    string = re.sub(r"\(", " \( ", string)
    string = re.sub(r"\)", " \) ", string)
    string = re.sub(r"\?", " \? ", string)
    string = re.sub(r"\s{2,}", " ", string)
    return string.strip().lower() 

In [50]:
names = []
for i in range(len(data['features'])):
    names.append(data['features'][i]['properties']['vic_lga__2'])

names = [normalize_text(name) for name in names]
LGA_Names = [normalize_text(name) for name in LGA_Names]

names = [name.replace(' city','') for name in names]
names = [name.replace(' shire','') for name in names]
names = [name.replace(' \\( unincorporated \\)','') for name in names]
names = [name.replace(' \\( uninc \\)','') for name in names]
names = [name.replace(' alpine resort','') for name in names]
names = [name.replace(' borough','') for name in names]
names = [name.replace(' \\( vic \\)','') for name in names]

names

['falls creek',
 'mount buller',
 'corangamite',
 'kingston',
 'frankston',
 'golden plains',
 'moira',
 'indigo',
 'wodonga',
 'darebin',
 'maroondah',
 'west wimmera',
 'maribyrnong',
 'hobsons bay',
 'manningham',
 'whitehorse',
 'boroondara',
 'knox',
 'monash',
 'yarra',
 'stonnington',
 'murrindindi',
 'hindmarsh',
 'bass coast',
 'greater shepparton',
 'bass coast',
 'moorabool',
 'ballarat',
 'bass coast',
 'hepburn',
 'mount alexander',
 'central goldfields',
 'buloke',
 'gannawarra',
 'mildura rural',
 'strathbogie',
 'alpine',
 'surf coast',
 'greater geelong',
 'benalla rural',
 'moreland',
 'mount hotham',
 'moonee valley',
 'wangaratta rural',
 'wyndham',
 'latrobe',
 'mansfield',
 'glenelg',
 'southern grampians',
 'moyne',
 'northern grampians',
 'south gippsland',
 'yarriambiack',
 'ararat rural',
 'pyrenees',
 'horsham rural',
 'macedon ranges',
 'baw baw',
 'melton',
 'hume',
 'whittlesea',
 'brimbank',
 'east gippsland',
 'loddon',
 'campaspe',
 'yarra ranges',
 'we

In [51]:
np.array(LGA_Names)[np.array([name in names for name in LGA_Names])==False]

array(['bayside \\( vic \\)', 'kingston \\( vic \\)'], dtype='<U20')

In [52]:
print(sum(name in names for name in LGA_Names))
print(len(LGA_Names))

32
34


In [13]:
import plotly.express as px

fig = px.choropleth_mapbox(None, geojson=data, locations='DISTRICT', color='randdata',
                           color_continuous_scale="Viridis",
                           range_color=(0, 1),
                           mapbox_style="carto-positron",
                           zoom=11, center = {"lat": 141.77, "lon": -35.026},
                           opacity=0.5,
                           labels={'randdata':'Random data for district'}
                          )
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()