## Importing required packages

In [31]:
#importing required packages
import requests
import pandas as pd
import numpy as np
import plotly.express as px
import ipywidgets as widgets
from IPython.display import display, clear_output, HTML
import plotly.graph_objects as go
import plotly.colors as colors
from sklearn.linear_model import LinearRegression
from export_functionality import add_export_button

## Fetching and Consolidating Food Security Data from Melbourne Open Data API

In [2]:
#the base URL for the Explore API v2
url_use = "https://data.melbourne.vic.gov.au/api/explore/v2.1/catalog/datasets/{dataset_id}/exports/json"

#list of dataset IDs for each year
DATASET_IDS = [
    "social-indicators-for-city-of-melbourne-residents-2023",
    "social-indicators-for-city-of-melbourne-residents-2022",
    "social-indicators-for-city-of-melbourne-residents-2021",
    "social-indicators-for-city-of-melbourne-residents-2020",
    "social-indicators-for-city-of-melbourne-residents-2019",
    "social-indicators-for-city-of-melbourne-residents-2018"
]

#parameters for the api call
all_years_data = []  # For storing all records across years

#fetching data through each dataset ID
for dataset_id in DATASET_IDS:
    print(f"Fetching data for {dataset_id}...")

    #constructing the API request URL
    dataset_url = url_use.format(dataset_id=dataset_id)

    #apiCall
    response = requests.get(dataset_url)
    if response.status_code != 200:
        print(f"Error: Unable to fetch data for {dataset_id}. HTTP {response.status_code}")
        continue

    #parsing json response
    data = response.json()

    #checking if response empty
    if not data:
        print(f"No data found for dataset {dataset_id}.")
        continue

    #converting json into datframe
    df = pd.DataFrame(data)

    #filtering for "Food security" in the topic column as I will be working with those records only
    if "topic" in df.columns:
        df = df[df["topic"].str.contains("Food security", na=False)]

    #appending the filtered data to the list
    all_years_data.extend(df.to_dict("records"))

    print(f"Fetched {len(df)} records for {dataset_id}.")

Fetching data for social-indicators-for-city-of-melbourne-residents-2023...
Fetched 90 records for social-indicators-for-city-of-melbourne-residents-2023.
Fetching data for social-indicators-for-city-of-melbourne-residents-2022...
Fetched 90 records for social-indicators-for-city-of-melbourne-residents-2022.
Fetching data for social-indicators-for-city-of-melbourne-residents-2021...
Fetched 72 records for social-indicators-for-city-of-melbourne-residents-2021.
Fetching data for social-indicators-for-city-of-melbourne-residents-2020...
Fetched 72 records for social-indicators-for-city-of-melbourne-residents-2020.
Fetching data for social-indicators-for-city-of-melbourne-residents-2019...
Fetched 72 records for social-indicators-for-city-of-melbourne-residents-2019.
Fetching data for social-indicators-for-city-of-melbourne-residents-2018...
Fetched 72 records for social-indicators-for-city-of-melbourne-residents-2018.


In [3]:
#converting the combined data to a DataFrame
all_df = pd.DataFrame(all_years_data)

#saving the combined dataframe to a csv file
all_df.to_csv("food_security_all_years.csv", index=False)
print("All data saved to 'food_security_all_years.csv'\n.")

#display
print("Few rows from the combined DataFrame (all_df):")
print(all_df.head())

All data saved to 'food_security_all_years.csv'
.
Few rows from the combined DataFrame (all_df):
  indicator                    type          topic  \
0         6  Council Plan Indicator  Food security   
1         6  Council Plan Indicator  Food security   
2         6  Council Plan Indicator  Food security   
3        6a                   Other  Food security   
4        6a                   Other  Food security   

                                         description  \
0  Experienced food insecurity (worried food woul...   
1  Experienced food insecurity (worried food woul...   
2  Experienced food insecurity (worried food woul...   
3                         Worried food would run out   
4                         Worried food would run out   

                     response respondent_group  year  sample_size  result  \
0  Yes, in the last 12 months      55-64 years  2023          115    13.2   
1  Yes, in the last 12 months        65+ years  2023          203     5.0   
2  Yes, in

In [4]:
all_df.head()

Unnamed: 0,indicator,type,topic,description,response,respondent_group,year,sample_size,result,format,respondent_group0
0,6,Council Plan Indicator,Food security,Experienced food insecurity (worried food woul...,"Yes, in the last 12 months",55-64 years,2023,115,13.2,Per cent,
1,6,Council Plan Indicator,Food security,Experienced food insecurity (worried food woul...,"Yes, in the last 12 months",65+ years,2023,203,5.0,Per cent,
2,6,Council Plan Indicator,Food security,Experienced food insecurity (worried food woul...,"Yes, in the last 12 months",Docklands 3008,2023,113,25.7,Per cent,
3,6a,Other,Food security,Worried food would run out,"Yes, in the last 12 months",35-44 years,2023,228,19.3,Per cent,
4,6a,Other,Food security,Worried food would run out,"Yes, in the last 12 months",55-64 years,2023,115,6.7,Per cent,


## Data Cleaning and Handling Missing Values

In [5]:
#checking missing values
missing_values = all_df.isnull().sum()
print("Missing values:\n", missing_values)

#data types
data_types = all_df.dtypes
print("Data Types:\n", data_types)

Missing values:
 indicator              0
type                   0
topic                  0
description            0
response               0
respondent_group      72
year                   0
sample_size            0
result                 0
format                 0
respondent_group0    396
dtype: int64
Data Types:
 indicator             object
type                  object
topic                 object
description           object
response              object
respondent_group      object
year                  object
sample_size            int64
result               float64
format                object
respondent_group0     object
dtype: object


In [6]:
all_df.shape

(468, 11)

* respondent_group    72
* respondent_group0    396 empty
* total record = 468
* difference = 72
* checked the fetched dataset after merging csv
* also print where all the respondent_group0 and respondent_group are empty.
* probably for those 72 records missing for respondent_group, the relevant data is in respondent_group

In [7]:
#checking where only respondent_group is null
respondent_group_null = all_df[all_df['respondent_group'].isnull() & all_df['respondent_group0'].notnull()]
print(f"Number of records where respondent_group is null but respondent_group0 is not: {len(respondent_group_null)}")
if len(respondent_group_null) > 0:
    print("Sample records where respondent_group is null but respondent_group0 is not:")
    print(respondent_group_null.head())

print("\n************************************\n")

#checking where only respondent_group0 is null
respondent_group0_null = all_df[all_df['respondent_group0'].isnull() & all_df['respondent_group'].notnull()]
print(f"Number of records where respondent_group0 is null but respondent_group is not: {len(respondent_group0_null)}")
if len(respondent_group0_null) > 0:
    print("Sample records where respondent_group0 is null but respondent_group is not:")
    print(respondent_group0_null.head())

Number of records where respondent_group is null but respondent_group0 is not: 72
Sample records where respondent_group is null but respondent_group0 is not:
    indicator                    type          topic  \
180         6  Council Plan Indicator  Food security   
181         6  Council Plan Indicator  Food security   
182         6  Council Plan Indicator  Food security   
183         6  Council Plan Indicator  Food security   
184         6  Council Plan Indicator  Food security   

                                           description  \
180  Experienced food insecurity (worried food woul...   
181  Experienced food insecurity (worried food woul...   
182  Experienced food insecurity (worried food woul...   
183  Experienced food insecurity (worried food woul...   
184  Experienced food insecurity (worried food woul...   

                       response respondent_group  year  sample_size  result  \
180  Yes, in the last 12 months              NaN  2021          371    49.5  

* from above observed result and cross checking the dataset in a `.csv` format, I can see for `year 2021` dataset, `respondent_group` column is empty (for 72 records).
* so, instead the data for those 72 records of `year 2021` is in `respondent_group0` column. And hence other than 72 records, the rest 396 (498-72) records are empty in the case of `respondent_group0`, as those are stored in `respondent_group` column.
* so now i will proceed to combine these two column in one unifined column as `respondent_group`

In [8]:
#combining respondent_group and respondent_group0 into a unified column
all_df['respondent_group'] = all_df['respondent_group'].combine_first(all_df['respondent_group0'])

#droping the respondent_group0 column as it is not needed now post combination
all_df.drop(columns=['respondent_group0'], inplace=True)

#verifying 
missing_values = all_df.isnull().sum()
print("Missing values:\n", missing_values)

#saving for comparing and verifying with the previous fetched data 
all_df.to_csv("food_security_corrected_all_years.csv", index=False)
print("\nAll data saved to 'food_security_corrected_all_years.csv'.")

# Preview the updated DataFrame
print(all_df.head())

Missing values:
 indicator           0
type                0
topic               0
description         0
response            0
respondent_group    0
year                0
sample_size         0
result              0
format              0
dtype: int64

All data saved to 'food_security_corrected_all_years.csv'.
  indicator                    type          topic  \
0         6  Council Plan Indicator  Food security   
1         6  Council Plan Indicator  Food security   
2         6  Council Plan Indicator  Food security   
3        6a                   Other  Food security   
4        6a                   Other  Food security   

                                         description  \
0  Experienced food insecurity (worried food woul...   
1  Experienced food insecurity (worried food woul...   
2  Experienced food insecurity (worried food woul...   
3                         Worried food would run out   
4                         Worried food would run out   

                     respons

In [9]:
all_df.head()

Unnamed: 0,indicator,type,topic,description,response,respondent_group,year,sample_size,result,format
0,6,Council Plan Indicator,Food security,Experienced food insecurity (worried food woul...,"Yes, in the last 12 months",55-64 years,2023,115,13.2,Per cent
1,6,Council Plan Indicator,Food security,Experienced food insecurity (worried food woul...,"Yes, in the last 12 months",65+ years,2023,203,5.0,Per cent
2,6,Council Plan Indicator,Food security,Experienced food insecurity (worried food woul...,"Yes, in the last 12 months",Docklands 3008,2023,113,25.7,Per cent
3,6a,Other,Food security,Worried food would run out,"Yes, in the last 12 months",35-44 years,2023,228,19.3,Per cent
4,6a,Other,Food security,Worried food would run out,"Yes, in the last 12 months",55-64 years,2023,115,6.7,Per cent


In [10]:
#dropping uneccessary columns
all_df = all_df.drop(columns=['indicator', 'type', 'topic', 'response', 'format'])

print("Filtered Data Sample:\n", all_df.head())

Filtered Data Sample:
                                          description respondent_group  year  \
0  Experienced food insecurity (worried food woul...      55-64 years  2023   
1  Experienced food insecurity (worried food woul...        65+ years  2023   
2  Experienced food insecurity (worried food woul...   Docklands 3008  2023   
3                         Worried food would run out      35-44 years  2023   
4                         Worried food would run out      55-64 years  2023   

   sample_size  result  
0          115    13.2  
1          203     5.0  
2          113    25.7  
3          228    19.3  
4          115     6.7  


In [11]:
#unique values in each columns
unique_counts = {col: len(all_df[col].unique()) for col in all_df.columns}
print("\nCounts of unique values in each column:\n", unique_counts)


Counts of unique values in each column:
 {'description': 6, 'respondent_group': 21, 'year': 6, 'sample_size': 222, 'result': 266}


In [12]:
#checking for decription, year, respondent group
unique_descriptions = all_df['description'].unique()
unique_years = all_df['year'].unique()
unique_respondent_groups = all_df['respondent_group'].unique()

print("Unique values in DESCRIPTION:\n", unique_descriptions)
print("\nUnique values in YEAR:\n", unique_years)
print("\nUnique values in RESPONDENT GROUP:\n", unique_respondent_groups)

Unique values in DESCRIPTION:
 ['Experienced food insecurity (worried food would run out and/or skipped meals and/or ran out of food and/or accessed emergency food relief services)'
 'Worried food would run out' 'Skipped meals' 'Ran out of food'
 'Accessed emergency food relief services'
 'Experienced food insecurity (worried food would run out and/or skipped meals and/or ran out of food)']

Unique values in YEAR:
 ['2023' '2022' '2021' '2020' '2019' '2018']

Unique values in RESPONDENT GROUP:
 ['55-64 years' '65+ years' 'Docklands 3008' '35-44 years' 'Female' 'Male'
 'South Wharf / Southbank 3006' '45-54 years' 'Carlton 3053'
 'City of Melbourne' '25-34 years'
 'North Melbourne 3051 / West Melbourne 3003' 'East Melbourne 3002'
 'Kensington / Flemington 3031' 'Melbourne 3000' 'Parkville 3052'
 '18-24 years' 'South Yarra 3141 / Melbourne/St Kilda Road 3004'
 'Kensington/ Flemingon 3031' 'Southbank/ South Wharf 3006'
 'South Yarra 3141 / Melbourne (St Kilda Road) 3004']


In [13]:
#replacing the inconsistent values
#Kensington/ Flemingon 3031 with Kensington / Flemington 3031
#Southbank/ South Wharf 3006 with South Wharf / Southbank 3006
#South Yarra 3141 / Melbourne (St Kilda Road) 3004 with South Yarra 3141 / Melbourne/St Kilda Road 3004

all_df['respondent_group'] = all_df['respondent_group'].replace({
    'Kensington/ Flemingon 3031': 'Kensington / Flemington 3031',
    'Southbank/ South Wharf 3006': 'South Wharf / Southbank 3006',
    'South Yarra 3141 / Melbourne (St Kilda Road) 3004': 'South Yarra 3141 / Melbourne/St Kilda Road 3004'
})

#lets verify
print("Unique values in RESPONDENT GROUP after correction:\n", all_df['respondent_group'].unique())
print("\nCount of unique values: ",len(all_df['respondent_group'].unique()))

Unique values in RESPONDENT GROUP after correction:
 ['55-64 years' '65+ years' 'Docklands 3008' '35-44 years' 'Female' 'Male'
 'South Wharf / Southbank 3006' '45-54 years' 'Carlton 3053'
 'City of Melbourne' '25-34 years'
 'North Melbourne 3051 / West Melbourne 3003' 'East Melbourne 3002'
 'Kensington / Flemington 3031' 'Melbourne 3000' 'Parkville 3052'
 '18-24 years' 'South Yarra 3141 / Melbourne/St Kilda Road 3004']

Count of unique values:  18


In [14]:
all_df.head()

Unnamed: 0,description,respondent_group,year,sample_size,result
0,Experienced food insecurity (worried food woul...,55-64 years,2023,115,13.2
1,Experienced food insecurity (worried food woul...,65+ years,2023,203,5.0
2,Experienced food insecurity (worried food woul...,Docklands 3008,2023,113,25.7
3,Worried food would run out,35-44 years,2023,228,19.3
4,Worried food would run out,55-64 years,2023,115,6.7


## Visualization of all the food insecurity types by showing their relative proportions

In [15]:
#maping the descriptions to shorter labels for the legend
short_labels = {
    "Ran out of food": "Ran out of food",

    "Skipped meals": "Skipped meals",
    
    "Worried food would run out": "Worried food would run out",
    
    "Experienced food insecurity (worried food would run out and/or skipped meals "
    "and/or ran out of food)": "Insecurity (multiple concerns)",
    
    "Experienced food insecurity (worried food would run out and/or skipped meals "
    "and/or ran out of food and/or accessed emergency food relief services)": "Insecurity (multiple concerns + relief)",
    
    "Accessed emergency food relief services": "Accessed food relief services",
}

#applying new labels in the DataFrame
all_df['food_insecurity'] = all_df['description'].map(short_labels)
description_counts = all_df['food_insecurity'].value_counts().reset_index()
description_counts.columns = ['food_insecurity', 'Count']

#pie chart
fig = px.pie(
    description_counts, 
    names='food_insecurity',  
    values='Count', 
    title="Distribution of Food Insecurity:"
)

#hover and legend layout customizing
fig.update_traces(
    textposition='inside', 
    textinfo='percent',  
    hovertemplate='<b>%{label}</b><br>Count: %{value}<extra></extra>'
)

#legend layout to make it more compact

fig.update_layout(
    width=600,  # Adjust the width
    height=600,  # Adjust the height
    title_x=0.5,  # Center the title
    legend=dict(
        orientation="h",  # Horizontal legend
        yanchor="bottom",
        y=-0.2,  # Position the legend slightly below the chart
        xanchor="center",
        x=0.5
    ),
    margin=dict(l=0, r=0, t=50, b=0),  # Center the pie chart by removing side margins
    template="plotly_white"
)

fig.show()

# Add export functionality
add_export_button(fig, file_name_prefix="food_insecurity_pie_chart")

Button(button_style='success', description='Export', icon='download', style=ButtonStyle())

HTML(value='')

In [16]:
all_df.head()

Unnamed: 0,description,respondent_group,year,sample_size,result,food_insecurity
0,Experienced food insecurity (worried food woul...,55-64 years,2023,115,13.2,Insecurity (multiple concerns + relief)
1,Experienced food insecurity (worried food woul...,65+ years,2023,203,5.0,Insecurity (multiple concerns + relief)
2,Experienced food insecurity (worried food woul...,Docklands 3008,2023,113,25.7,Insecurity (multiple concerns + relief)
3,Worried food would run out,35-44 years,2023,228,19.3,Worried food would run out
4,Worried food would run out,55-64 years,2023,115,6.7,Worried food would run out


## Analyzing Demographic Distributions Across Years

* creating new columns (Age, Suburb, Gender) based on the values in respondent_group. these columns classify and separate the demographic information into meaningful groups.

In [17]:
#functions to classify entries as Age, Suburb, or Gender with basic standardization
def classify_age(value):
    value = value.strip().title()  #standardize spacing and capitalization
    if 'Years' in value:
        return value
    return None

def classify_suburb(value):
    value = value.strip().title()
    if 'Years' not in value and value not in ['Male', 'Female']:
        return value
    return None

def classify_gender(value):
    value = value.strip().title()
    if value in ['Male', 'Female']:
        return value
    return None

#applying the functions to create new columns
all_df['Age'] = all_df['respondent_group'].apply(classify_age)
all_df['Suburb'] = all_df['respondent_group'].apply(classify_suburb)
all_df['Gender'] = all_df['respondent_group'].apply(classify_gender)

#verifying
print("Our dataframe with new columns:\n", all_df[['respondent_group', 'Age', 'Suburb', 'Gender']].head())

Our dataframe with new columns:
   respondent_group          Age          Suburb Gender
0      55-64 years  55-64 Years            None   None
1        65+ years    65+ Years            None   None
2   Docklands 3008         None  Docklands 3008   None
3      35-44 years  35-44 Years            None   None
4      55-64 years  55-64 Years            None   None


In [18]:
all_df.head(10)

Unnamed: 0,description,respondent_group,year,sample_size,result,food_insecurity,Age,Suburb,Gender
0,Experienced food insecurity (worried food woul...,55-64 years,2023,115,13.2,Insecurity (multiple concerns + relief),55-64 Years,,
1,Experienced food insecurity (worried food woul...,65+ years,2023,203,5.0,Insecurity (multiple concerns + relief),65+ Years,,
2,Experienced food insecurity (worried food woul...,Docklands 3008,2023,113,25.7,Insecurity (multiple concerns + relief),,Docklands 3008,
3,Worried food would run out,35-44 years,2023,228,19.3,Worried food would run out,35-44 Years,,
4,Worried food would run out,55-64 years,2023,115,6.7,Worried food would run out,55-64 Years,,
5,Worried food would run out,Female,2023,758,24.6,Worried food would run out,,,Female
6,Worried food would run out,Male,2023,572,19.4,Worried food would run out,,,Male
7,Worried food would run out,Docklands 3008,2023,113,14.5,Worried food would run out,,Docklands 3008,
8,Worried food would run out,South Wharf / Southbank 3006,2023,147,16.0,Worried food would run out,,South Wharf / Southbank 3006,
9,Skipped meals,35-44 years,2023,228,16.3,Skipped meals,35-44 Years,,


In [None]:
#dropdowns for interactivity
years = ['Select the Year'] + sorted(all_df['year'].unique().tolist()) + ['All Years']
category_options = ['Select the Category', 'Age', 'Gender', 'Suburb']

#dropdown for year selection
year_dropdown = widgets.Dropdown(
    options=years,
    value='Select the Year',
    description='Year:',
)

#dropdown for category selection
category_dropdown = widgets.Dropdown(
    options=category_options,
    value='Select the Category',
    description='Category:',
)

#defining a function to update the plot based on selections
def update_plot(change=None):
    clear_output(wait=True)
    display(year_dropdown, category_dropdown)
    
    #getting selected values
    selected_year = year_dropdown.value
    selected_category = category_dropdown.value
    
    #checking for valid selections
    if selected_year == 'Select the Year' or selected_category == 'Select the Category':
        display(HTML("<b>Please select both a year and a category to view the distribution.</b>"))
        return
    
    #filtering data based on year selection
    filtered_df = all_df if selected_year == 'All Years' else all_df[all_df['year'] == selected_year]
    
    #plotting based on selected category
    if selected_category == 'Age':
        age_counts = filtered_df['Age'].value_counts().reset_index()
        age_counts.columns = ['Age', 'Count']
        fig = px.bar(age_counts, x='Age', y='Count', title=f"Age Distribution ({selected_year})")
    elif selected_category == 'Gender':
        gender_counts = filtered_df['Gender'].value_counts().reset_index()
        gender_counts.columns = ['Gender', 'Count']
        fig = px.bar(gender_counts, x='Gender', y='Count', title=f"Gender Distribution ({selected_year})")
    elif selected_category == 'Suburb':
        suburb_counts = filtered_df['Suburb'].value_counts().reset_index()
        suburb_counts.columns = ['Suburb', 'Count']
        fig = px.bar(suburb_counts, x='Suburb', y='Count', title=f"Suburb Distribution ({selected_year})")
    
    #plot layout
    fig.update_layout(
        template="plotly_white",
        xaxis_title=selected_category,
        yaxis_title="Count",
        title_x=0.5  # Center the title
    )
    fig.show()

    #export
    add_export_button(fig, file_name_prefix=f"{selected_category}_distribution_{selected_year}")

#linking dropdowns to update function
year_dropdown.observe(update_plot, names='value')
category_dropdown.observe(update_plot, names='value')

#showing the dropdowns with the initial message
display(year_dropdown, category_dropdown)
display(HTML("<b>Please select both a year and a category to view the corresponding distribution.</b>"))

Dropdown(description='Year:', index=4, options=('Select the Year', 2018, 2019, 2020, 2021, 2022, 2023, 'All Ye…

Dropdown(description='Category:', index=2, layout=Layout(width='400px'), options=('Select Category', 'Age', 'G…

Button(button_style='success', description='Export', icon='download', style=ButtonStyle())

HTML(value='')

## Visualizing Food Insecurity Across Demographics

In [20]:
#preparing the melted data for plotting
melted_df = pd.melt(
    all_df,
    id_vars=['food_insecurity'],  #column using
    value_vars=['Age', 'Gender', 'Suburb'],  #categories
    var_name='Category',
    value_name='Category Value'
).dropna(subset=['Category Value'])  #combining melting and NaN dropping

#group by food insecurity type, category, and demographic value
description_counts = melted_df.groupby(
    ['food_insecurity', 'Category', 'Category Value']
).size().reset_index(name='Count')

#creating the bar plot with facets based on Category
fig = px.bar(
    description_counts,
    x='Category Value',
    y='Count',
    color='food_insecurity',
    facet_row='Category',
    title="Distribution of Food Insecurity Across Demographics:",
    labels={'Count': 'Number of Responses', 'Category Value': 'Category Subgroup'},
    category_orders={"Category": ["Age", "Gender", "Suburb"]}
)

#layout for better visualization
fig.update_layout(
    height=1000,
    template="plotly_white",
    yaxis_title="Number of Responses",
    xaxis_title="Category Subgroup",
    legend_title="Food Insecurity Type",
)

#axis title to each facet row for better visualization and understand
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))

fig.show()

#export
add_export_button(fig, file_name_prefix="food_insecurity_across_demographics")

Button(button_style='success', description='Export', icon='download', style=ButtonStyle())

HTML(value='')

In [21]:
# print("Melted DataFrame:")
# print(melted_df.head())
# melted_df.to_csv("melt.csv", index=False)

In [22]:
all_df.head(10)

Unnamed: 0,description,respondent_group,year,sample_size,result,food_insecurity,Age,Suburb,Gender
0,Experienced food insecurity (worried food woul...,55-64 years,2023,115,13.2,Insecurity (multiple concerns + relief),55-64 Years,,
1,Experienced food insecurity (worried food woul...,65+ years,2023,203,5.0,Insecurity (multiple concerns + relief),65+ Years,,
2,Experienced food insecurity (worried food woul...,Docklands 3008,2023,113,25.7,Insecurity (multiple concerns + relief),,Docklands 3008,
3,Worried food would run out,35-44 years,2023,228,19.3,Worried food would run out,35-44 Years,,
4,Worried food would run out,55-64 years,2023,115,6.7,Worried food would run out,55-64 Years,,
5,Worried food would run out,Female,2023,758,24.6,Worried food would run out,,,Female
6,Worried food would run out,Male,2023,572,19.4,Worried food would run out,,,Male
7,Worried food would run out,Docklands 3008,2023,113,14.5,Worried food would run out,,Docklands 3008,
8,Worried food would run out,South Wharf / Southbank 3006,2023,147,16.0,Worried food would run out,,South Wharf / Southbank 3006,
9,Skipped meals,35-44 years,2023,228,16.3,Skipped meals,35-44 Years,,


## Food Security Trend Analysis and prediction by Demographics

In [None]:
#year -> numeric
all_df['year'] = pd.to_numeric(all_df['year'], errors='coerce')

#dropdown for demographic categories
category_options = ['Age', 'Gender', 'Suburb']
category_dropdown = widgets.Dropdown(
    options=['Select Category'] + category_options,
    value='Select Category',
    description='Category:'
)

#dropdown for specific values
value_dropdown = widgets.Dropdown(
    options=['Select Value'],
    value='Select Value',
    description='Value:'
)

#method to update the value dropdown
def update_value_dropdown(change=None):
    selected_category = category_dropdown.value
    if selected_category == 'Select Category':
        value_dropdown.options = ['Select Value']
    else:
        value_dropdown.options = ['Select Value'] + all_df[selected_category].dropna().unique().tolist()

#method to update the plot
def update_plot(change=None):
    clear_output(wait=True)
    display(category_dropdown, value_dropdown)
    
    selected_category = category_dropdown.value
    selected_value = value_dropdown.value
    
    if selected_category == 'Select Category' or selected_value == 'Select Value':
        display(HTML("<b>Please select both a category and a value to proceed.</b>"))
        return
    
    #filtering data
    filtered_df = all_df[all_df[selected_category] == selected_value]
    
    if filtered_df.empty:
        display(HTML("<b>No data available for the selected combination.</b>"))
        return
    
    #aggregating data
    aggregated_df = filtered_df.groupby('year', as_index=False).agg({'result': 'mean'})
    
    #data for the model
    X = aggregated_df[['year']].values
    y = aggregated_df['result'].values
    
    #train Linear Regression
    model = LinearRegression()
    model.fit(X, y)
    
    #prediction for future years
    future_years = np.array([[2024], [2025]])
    future_predictions = model.predict(future_years)
    
    #plotly figure
    fig = go.Figure()
    
    #historical data trace
    fig.add_trace(go.Scatter(
        x=aggregated_df['year'],
        y=aggregated_df['result'],
        mode='lines+markers',
        name='Historical Data',
        line=dict(color='blue')
    ))
    
    #predictions trace
    fig.add_trace(go.Scatter(
        x=future_years.flatten(),
        y=future_predictions,
        mode='markers+text',
        name='Predictions',
        marker=dict(color='red', size=10),
        text=[f"{pred:.2f}%" for pred in future_predictions],
        textposition="top center"
    ))
    
    #layout
    fig.update_layout(
        title=f"Food Security Trend for {selected_value} ({selected_category})",
        xaxis_title="Year",
        yaxis_title="Percentage",
        template="plotly_white",
        legend=dict(x=0.5, y=-0.2, xanchor='center', orientation='h'),
        height=600
    )
    
    fig.show()
    
    #export
    add_export_button(fig, file_name_prefix=f"Food_Security_Trend_{selected_category}_{selected_value}")

#linking dropdowns to functions
category_dropdown.observe(update_value_dropdown, names='value')
value_dropdown.observe(update_plot, names='value')

#display
display(category_dropdown, value_dropdown)
display(HTML("<b>Please select both a category and a value to proceed.</b>"))

Dropdown(description='Category:', index=1, options=('Select the Category', 'Age', 'Gender', 'Suburb'), value='…

Dropdown(description='Value:', index=4, options=('Select Value', '55-64 Years', '65+ Years', '35-44 Years', '4…

Button(button_style='success', description='Export', icon='download', style=ButtonStyle())

HTML(value='')

## Combined Trend Analysis by Demographic Category

In [30]:
#dropdown for category selection
category_options = ['Age', 'Gender', 'Suburb']
category_dropdown = widgets.Dropdown(
    options=['Select Category'] + category_options,
    value='Select Category',
    description='Category:'
)

#function to generate the combined plot for all trends in a category
def update_plot(change=None):
    clear_output(wait=True)
    display(category_dropdown)
    
    selected_category = category_dropdown.value
    
    if selected_category == 'Select Category':
        display(HTML("<b>Please select a category to view combined trends.</b>"))
        return
    
    #getting unique values for the selected category
    unique_values = all_df[selected_category].dropna().unique()
    
    if len(unique_values) == 0:
        display(HTML("<b>No data available for the selected category.</b>"))
        return
    
    #plotly figure
    fig = go.Figure()
    
    #separate color palettes for historical and predicted
    historical_colors = colors.qualitative.Plotly[:len(unique_values)]
    predicted_colors = colors.qualitative.Pastel[:len(unique_values)]
    
    #looping through each unique value in the category and add to the plot
    for idx, value in enumerate(unique_values):
        filtered_df = all_df[all_df[selected_category] == value]
        if filtered_df.empty:
            continue
        
        #aggregating data by year
        aggregated_df = filtered_df.groupby('year', as_index=False).agg({'result': 'mean'})
        
        #the data for the linear regression model
        X = aggregated_df[['year']].values
        y = aggregated_df['result'].values
        
        #training
        model = LinearRegression()
        model.fit(X, y)
        
        #prediction
        future_years = np.array([[2024], [2025]])
        future_predictions = model.predict(future_years)
        
        #historical
        fig.add_trace(go.Scatter(
            x=aggregated_df['year'],
            y=aggregated_df['result'],
            mode='lines+markers',
            name=f'{value} (Historical)',
            legendgroup='Historical',
            line=dict(color=historical_colors[idx], dash='solid'),
            marker=dict(size=8),
            hovertemplate="<b>%{fullData.name}</b><br>Percentage: %{y:.2f}%<extra></extra>"
        ))
        
        #predictions
        fig.add_trace(go.Scatter(
            x=future_years.flatten(),
            y=[round(pred, 2) for pred in future_predictions],  
            mode='lines+markers',
            name=f'{value} (Predicted)',
            legendgroup='Predicted',
            line=dict(color=predicted_colors[idx], dash='dot'),
            marker=dict(size=8),
            hovertemplate="<b>%{fullData.name}</b><br>Predicted: %{y:.2f}%<extra></extra>"
        ))
    
    #layout
    fig.update_layout(
        title=f"Combined Trends by {selected_category}",
        xaxis_title="Year",
        yaxis_title="Percentage",
        template="plotly_white",
        height=650,
        width=1300,
        legend=dict(
            x=1.05,
            y=1,
            orientation='v',
            title_text='',
            font=dict(size=10),
        ),
        margin=dict(l=40, r=250, t=50, b=50),
        hovermode="x unified"
    )
    
    fig.show()

    #export
    add_export_button(fig, file_name_prefix=f"Combined_Trends_{selected_category}")

#linking dropdown to function
category_dropdown.observe(update_plot, names='value')

#show
display(category_dropdown)
display(HTML("<b>Please select a category to view combined trends.</b>"))

Dropdown(description='Category:', index=3, options=('Select Category', 'Age', 'Gender', 'Suburb'), value='Subu…

Button(button_style='success', description='Export', icon='download', style=ButtonStyle())

HTML(value='')

## Trends by Food Insecurity Type and Demographic Category

In [None]:
#dropdown for category selection
category_options = ['Age', 'Gender', 'Suburb']
category_dropdown = widgets.Dropdown(
    options=['Select Category'] + category_options,
    value='Select Category',
    description='Category:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='400px')
)

#dropdown for food insecurity type
food_insecurity_options = ['Select the type'] + all_df['food_insecurity'].dropna().unique().tolist()
food_insecurity_dropdown = widgets.Dropdown(
    options=food_insecurity_options,
    value='Select the type',
    description='Food Insecurity:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='400px')
)

#function to update the plot based on the selected filters
def update_plot(change=None):
    clear_output(wait=True)
    display(food_insecurity_dropdown, category_dropdown)
    
    selected_food_insecurity = food_insecurity_dropdown.value
    selected_category = category_dropdown.value
    
    #checking if both filters are selected
    if selected_food_insecurity == 'Select the type' or selected_category == 'Select Category':
        display(HTML("<b>Please select both Food Insecurity type and a Category to view trends.</b>"))
        return
    
    #filtering the data based on the selected filters
    filtered_df = all_df[
        (all_df['food_insecurity'] == selected_food_insecurity) & 
        (all_df[selected_category].notnull())
    ]
    
    if filtered_df.empty:
        display(HTML("<b>No data available for the selected combination of filters.</b>"))
        return
    
    #getting unique values for the selected category
    unique_values = filtered_df[selected_category].dropna().unique()
    
    if len(unique_values) == 0:
        display(HTML("<b>No data available for the selected category.</b>"))
        return
    
    #figure
    fig = go.Figure()
    
    #looping through each unique value in the category and add to the plot
    for value in unique_values:
        category_filtered_df = filtered_df[filtered_df[selected_category] == value]
        
        #aggregrating data by year
        aggregated_df = category_filtered_df.groupby('year', as_index=False).agg({'result': 'mean'})
        
        #outdata for the linear regression model
        X = aggregated_df[['year']].values
        y = aggregated_df['result'].values
        
        #training
        model = LinearRegression()
        model.fit(X, y)
        
        #prediction
        future_years = np.array([[2024], [2025]])
        future_predictions = model.predict(future_years)
        future_predictions_rounded = [round(pred, 2) for pred in future_predictions]
        
        #historical
        fig.add_trace(go.Scatter(
            x=aggregated_df['year'],
            y=aggregated_df['result'],
            mode='lines+markers',
            name=f'{value} (Historical)',
            line=dict(dash='solid')
        ))
        
        #predictions
        fig.add_trace(go.Scatter(
            x=future_years.flatten(),
            y=future_predictions_rounded,
            mode='lines+markers',
            name=f'{value} (Predicted)',
            line=dict(dash='dot'),
            marker=dict(size=8),
        ))
    
    #layout
    fig.update_layout(
        title=f"Trends by {selected_category} for '{selected_food_insecurity}'",
        xaxis_title="Year",
        yaxis_title="Percentage",
        template="plotly_white",
        height=600,
        legend=dict(x=0.5, y=-0.2, xanchor='center', orientation='h'),
        hovermode="x unified"
    )
    
    fig.show()

    #export
    add_export_button(fig, file_name_prefix=f"Trends_{selected_food_insecurity}_{selected_category}")

#linking dropdowns to the function
food_insecurity_dropdown.observe(update_plot, names='value')
category_dropdown.observe(update_plot, names='value')

#show
display(food_insecurity_dropdown, category_dropdown)
display(HTML("<b>Please select both Food Insecurity type and a Category to view trends.</b>"))

Dropdown(description='Food Insecurity:', index=2, layout=Layout(width='400px'), options=('Select the type', 'I…

Dropdown(description='Category:', index=2, layout=Layout(width='400px'), options=('Select Category', 'Age', 'G…

Button(button_style='success', description='Export', icon='download', style=ButtonStyle())

HTML(value='')

In [26]:
all_df.head()

Unnamed: 0,description,respondent_group,year,sample_size,result,food_insecurity,Age,Suburb,Gender
0,Experienced food insecurity (worried food woul...,55-64 years,2023,115,13.2,Insecurity (multiple concerns + relief),55-64 Years,,
1,Experienced food insecurity (worried food woul...,65+ years,2023,203,5.0,Insecurity (multiple concerns + relief),65+ Years,,
2,Experienced food insecurity (worried food woul...,Docklands 3008,2023,113,25.7,Insecurity (multiple concerns + relief),,Docklands 3008,
3,Worried food would run out,35-44 years,2023,228,19.3,Worried food would run out,35-44 Years,,
4,Worried food would run out,55-64 years,2023,115,6.7,Worried food would run out,55-64 Years,,


## Next step:
* Create Dashboard for visualizations
* Try Flask Injection
* Make a short Walk through video of the code
* Explore API document done