# Find the distribution of each sport played


In [1]:
import json

with open("vic_sport_and_recreation_2015-1231352463903835354.json", "r") as file:
    data = json.load(file)

# Count unique objectids for each sportsplayed value
sportsplayed_objectids = {}

for feature in data["features"]:
    properties = feature["properties"]
    sportsplayed = properties["sportsplayed"]
    objectid = properties["objectid"]

    if sportsplayed is not None:
        if sportsplayed not in sportsplayed_objectids:
            sportsplayed_objectids[sportsplayed] = set()
        sportsplayed_objectids[sportsplayed].add(objectid)

# Sort sportsplayed values by the count of unique objectids in descending order
sorted_sportsplayed_objectids = sorted(sportsplayed_objectids.items(), key=lambda x: len(x[1]), reverse=True)

# Print the count of unique objectids for each sportsplayed
print("Unique objectid count for each sportsplayed (sorted in descending order):")
cnt = 0
for sport, objectids in sorted_sportsplayed_objectids:
    if cnt > 10 : break
    print(f"{sport}: {len(objectids)}")
    cnt += 1

Unique objectid count for each sportsplayed (sorted in descending order):
Cricket: 1566
Tennis (Outdoor): 1208
Australian Rules Football: 1000
Fitness / Gymnasium Workouts: 676
Netball: 546
Lawn Bowls: 528
Soccer: 411
Golf: 372
Swimming: 335
Basketball: 253
Equestrian: 212


# Find the numer of sports played in each suburb

In [3]:
with open("vic_sport_and_recreation_2015-1231352463903835354.json", "r") as file:
    data = json.load(file)

# Count sportsplayed for each suburbtown
suburbtown_sportsplayed = {}

for feature in data["features"]:
    properties = feature["properties"]
    sportsplayed = properties["sportsplayed"]
    suburbtown = properties["suburbtown"]

    if suburbtown is not None:
        if suburbtown not in suburbtown_sportsplayed:
            suburbtown_sportsplayed[suburbtown] = {}

        if sportsplayed is not None:
            if sportsplayed not in suburbtown_sportsplayed[suburbtown]:
                suburbtown_sportsplayed[suburbtown][sportsplayed] = 0

            suburbtown_sportsplayed[suburbtown][sportsplayed] += 1

# Calculate total sportsplayed count for each suburbtown
suburbtown_total_sportsplayed = {suburb: sum(sportsplayed_count.values()) for suburb, sportsplayed_count in suburbtown_sportsplayed.items()}

# Sort suburbtowns by total sportsplayed count in descending order
sorted_suburbtown_total_sportsplayed = sorted(suburbtown_total_sportsplayed.items(), key=lambda x: x[1], reverse=True)

# Print the total sportsplayed count for each suburbtown
print("Total sportsplayed count for each suburbtown (sorted in descending order):")
cnt = 0
for suburb, total_count in sorted_suburbtown_total_sportsplayed:
    if cnt > 20 : break
    print(f"{suburb}: {total_count}")
    cnt += 1

Total sportsplayed count for each suburbtown (sorted in descending order):
WARRNAMBOOL: 82
HORSHAM: 61
SHEPPARTON: 60
FRANKSTON: 58
BALLARAT: 57
BENDIGO: 55
MELTON: 53
WANGARATTA: 53
WERRIBEE: 51
SALE: 51
RICHMOND: 49
WARRAGUL: 45
CROYDON: 43
SUNBURY: 43
PORTLAND: 42
BUNDOORA: 41
TORQUAY: 41
WODONGA: 40
TRARALGON: 40
OAKLEIGH: 39
MORNINGTON: 38


# Extract the data needed for the step

In [32]:
with open("vic_sport_and_recreation_2015-1231352463903835354.json", "r") as file:
    data = json.load(file)

# Simplify features
simplified_features = []

for feature in data["features"]:
    properties = feature["properties"]
    objectid = properties["objectid"]
    sportsplayed = properties["sportsplayed"]
    suburbtown = properties["suburbtown"]

    if sportsplayed is not None and suburbtown is not None:
        simplified_feature = {
            "objectid": objectid,
            "sportsplayed": sportsplayed,
            "suburbtown": suburbtown
        }

        simplified_features.append(simplified_feature)

# Save simplified features as a new JSON file
with open("simplified_vic_sport_and_recreation_2015.json", "w") as file:
    json.dump(simplified_features, file, indent=2)

# Check the consistency with the previous data



In [4]:
from collections import defaultdict

# Load simplified JSON data
with open("simplified_vic_sport_and_recreation_2015.json", "r") as file:
    simplified_data = json.load(file)

# Count sportsplayed for each suburbtown
suburbtown_sportsplayed_count = defaultdict(int)

for item in simplified_data:
    suburbtown = item["suburbtown"]
    suburbtown_sportsplayed_count[suburbtown] += 1

# Sort suburbtowns by sportsplayed count in descending order
sorted_suburbtown_sportsplayed_count = sorted(suburbtown_sportsplayed_count.items(), key=lambda x: x[1], reverse=True)

# Print sportsplayed count for each suburbtown in descending order
cnt = 0
for suburbtown, count in sorted_suburbtown_sportsplayed_count:
    if cnt > 10 : break
    print(f"{suburbtown}: {count}")
    cnt += 1

WARRNAMBOOL: 82
HORSHAM: 61
SHEPPARTON: 60
FRANKSTON: 58
BALLARAT: 57
BENDIGO: 55
MELTON: 53
WANGARATTA: 53
WERRIBEE: 51
SALE: 51
RICHMOND: 49
