In [4]:
import pandas as pd
from sklearn.linear_model import LinearRegression
import ipywidgets as widgets
import matplotlib.pyplot as plt

df = pd.read_csv('safety_at_night.csv')

# pull the categories and subcategories from the csv
genders = df[df['Category'] == 'Gender']['Subcategory'].unique()
age_ranges = df[df['Category'] == 'AgeRange']['Subcategory'].unique()
suburbs = df[df['Category'] == 'Suburb']['Subcategory'].unique()

# dropdowns for selection in the widget
gender_dropdown = widgets.Dropdown(
    options=['Select a Gender'] + list(genders),
    description='Gender:',
    disabled=False
)

age_range_dropdown = widgets.Dropdown(
    options=['Select an Age Range'] + list(age_ranges),
    description='Age Range:',
    disabled=False
)

suburb_dropdown = widgets.Dropdown(
    options=['Select a Suburb'] + list(suburbs),
    description='Suburb:',
    disabled=False
)

# function to label points on graph
def plot_labels(x, y, labels, color, offset=0.1):
    for xi, yi, label in zip(x, y, labels):
        plt.text(xi + offset, yi, f'{label:.2f}%', fontsize=9, ha='left', va='center', color=color)

def update_prediction(gender, age_range, suburb):

    selected_data = []
    
    # filter data based on selections
    if gender != 'Select a Gender':
        gender_data = df[(df['Category'] == 'Gender') & (df['Subcategory'] == gender)]
        selected_data.append(gender_data)
        
    if age_range != 'Select an Age Range':
        age_data = df[(df['Category'] == 'AgeRange') & (df['Subcategory'] == age_range)]
        selected_data.append(age_data)
        
    if suburb != 'Select a Suburb':
        suburb_data = df[(df['Category'] == 'Suburb') & (df['Subcategory'] == suburb)]
        selected_data.append(suburb_data)
    
    if not selected_data:
        print("Select at least 1 option")
        return
    
    # combine dataframes
    combo_data = pd.concat(selected_data, ignore_index=True)
    combo_data = combo_data.groupby(['Year']).agg({'Percentage': 'mean'}).reset_index()
    
    # prepare features and target variable
    X = combo_data[['Year']]
    y = combo_data['Percentage']
    
    # create and train the model using linearregression
    model = LinearRegression()
    model.fit(X, y)
    
    # predict for 2024 and 2025
    years_to_predict = pd.DataFrame({'Year': [2024, 2025]})
    predicted_percentages = model.predict(years_to_predict)

    #  describe profile we have selected
    description = f"{gender if gender != 'Select a Gender' else '[Select a Gender]'}s " \
                f"aged {age_range if age_range != 'Select an Age Range' else '[Select an Age Range]'} " \
                f"in {suburb if suburb != 'Select a Suburb' else 'Select a Suburb'}"

    # print predictions
    print(f"Predicted percentage of {description} that would feel safe out at night in 2024: {predicted_percentages[0]:.2f}%")
    print(f"Predicted percentage of {description} that would feel safe out at night in 2025: {predicted_percentages[1]:.2f}%")

    # plot + label past data and our predictions
    plt.figure(figsize=(12, 6))
    plt.plot(combo_data['Year'], combo_data['Percentage'], marker='o', linestyle='-', color='b', label='Historical Data')
    plot_labels(combo_data['Year'], combo_data['Percentage'], combo_data['Percentage'], 'blue')
    plt.scatter([2024, 2025], predicted_percentages, color='r', zorder=5, label='Predictions')
    plot_labels([2024, 2025], predicted_percentages, predicted_percentages, 'red')
    
    # update plot title based on selections
    plt.title(f'Percentage of {gender}s '
              f'aged {age_range} '
              f'in {suburb} '
              'feeling safe at night over the years')
    plt.xlabel('Year')
    plt.ylabel('Percentage Reported Feeling Safe')
    plt.legend()
    plt.grid(True)
    plt.show()

widgets.interactive(update_prediction, 
                    gender=gender_dropdown, 
                    age_range=age_range_dropdown, 
                    suburb=suburb_dropdown)

interactive(children=(Dropdown(description='Gender:', options=('Select a Gender', 'Female', 'Male'), value='Se…