In [None]:
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt

In [None]:

#read in the datafile containing the fuel type and postcode
top10_for2021_df = pd.read_csv('Cleaned_post_code_registration.csv')

#remove the extra columns using column indices
columns_to_keep_indices = [0,1,2,4]
top10_for2021_df = top10_for2021_df.iloc[:, columns_to_keep_indices]

#Sort the df based on number of registrations
sorted_df = top10_for2021_df.sort_values(by= 'Registrations as at 31 January 2022', ascending=False)

#Group by the fuel type
grouped_df = sorted_df.groupby('Fuel Type')

#Create a dictionary to store each fuel type
fuel_type_ds = {}
#for loop to get the top 10 and store in dictionary, also dropping Hydrogen Cell Vehicle.
for fuel_type, group in grouped_df:
    if fuel_type != 'Hydrogen Cell Vehicle':
        fuel_type_ds[fuel_type] = group.head(10).reset_index(drop=True)


In [None]:
resource_path = Path('Resources')
#read in the income data to merge with the above dictionary
income_df = pd.read_csv('Resources/C_obs_clean.csv')

#create an empty dictionary for the merge
merged_ds = {}

#for loop to merge the datasets
for fuel_type, fuel_type_df in fuel_type_ds.items():
    merged_df = pd.merge(fuel_type_df, income_df, on='Postcode', how ='left')
    #filter rows to remove Population of 0
    merged_df = merged_df[merged_df['Population'] !=0]
    merged_ds[fuel_type]= merged_df
    #Save the output to a csv file for each fuel type
    filename = resource_path / f'{fuel_type}_top10.csv'
    merged_df.to_csv(filename, index=False)
    #add print statement to show named files created
    print(f'Saved {fuel_type} merge to {filename}')



Manipulate the data in the Battery Electric Vehicle file to produce age distribution and income distributions

In [None]:
#read in the file
csv_file_path = 'Resources/Battery Electric Vehicle_top10.csv'
BEV_df = pd.read_csv(csv_file_path)
#Filter the rows where Income is 'Total'
BEV_subset = BEV_df[BEV_df['Total personal income (weekly)'].str.contains('Total', case=False, na=False)]
#Sort by registrations and then age group
BEV_subset = BEV_subset.sort_values(by=['Registrations as at 31 January 2022', 'Age'], ascending=[False, True])
subset_to_save = 'Resources/BEV_top10_total.csv'
BEV_subset.to_csv(subset_to_save, index=False)

Manipulate the data in the Hybrid Vehicle file to produce age distribution and income distributions



In [None]:
#read in the file
csv_file_path = 'Resources/Hybrid_top10.csv'
Hybrid_df = pd.read_csv(csv_file_path)
#Filter the rows where Income is 'Total'
Hybrid_subset = Hybrid_df[Hybrid_df['Total personal income (weekly)'].str.contains('Total', case=False, na=False)]
#Sort by registrations and then age group
Hybrid_subset = Hybrid_subset.sort_values(by=['Registrations as at 31 January 2022', 'Age'], ascending=[False, True])
subset_to_save = 'Resources/Hybrid_top10_total.csv'
Hybrid_subset.to_csv(subset_to_save, index=False)

Manipulate the data in the Internal Combustion Engine Vehicle file to produce age distribution and income distributions

In [None]:
#read in the file
csv_file_path = 'Resources/Internal Combustion Engine_top10.csv'
ICE_df = pd.read_csv(csv_file_path)
#Filter the rows where Income is 'Total'
ICE_subset = ICE_df[ICE_df['Total personal income (weekly)'].str.contains('Total', case=False, na=False)]
#Sort by registrations and then age group
ICE_subset = ICE_subset.sort_values(by=['Registrations as at 31 January 2022', 'Age'], ascending=[False, True])
subset_to_save = 'Resources/ICE_top10_total.csv'
ICE_subset.to_csv(subset_to_save, index=False)

## Plot the age breakdown of the suburbs

Battery Electric Vehicles

In [None]:
# read in the file
plot_file_path = 'Resources/BEV_top10_total.csv'
plot_df = pd.read_csv(plot_file_path)
#filter out the total rows, first remove any leading or trailling spaces
plot_df['Age'] = plot_df['Age'].str.strip()
f_plot_df = plot_df[plot_df['Age'] != 'Total']
#get the postcodes to plot from the datafile
pc_to_plot = f_plot_df['Postcode'].unique()


In [None]:
# Set up the figure and axis
plt.figure(figsize=(12, 8))
ax = plt.subplot(111)

# Set the width of each bar
bar_width = 0.4

# Plot all postcodes on one graph using a grouped bar plot
for i, postcode in enumerate(pc_to_plot):
    subset_by_postcode = f_plot_df[f_plot_df['Postcode'] == postcode]
    
    # Calculate the x-coordinates for each bar
    x_positions = [j + i * (bar_width) for j in range(len(subset_by_postcode))]
        
    # Plot the bars
    ax.bar(x_positions,
           subset_by_postcode['Population'],
           width=bar_width,
           label=f'Postcode {postcode}', 
           alpha=0.7
          )

# Set x-axis ticks and labels
ax.set_xticks([i + bar_width * (len(pc_to_plot) - 1) / 2 for i in range(len(subset_by_postcode))])
ax.set_xticklabels(subset_by_postcode['Age'])

plt.title('Population Breakdown by Age Groups - Top 10 BEV Postcodes')
plt.xlabel('Age Groups')
plt.ylabel('Population')
plt.xticks(rotation=45, ha='right')  # Adjust rotation for better visibility
plt.legend()
plt.tight_layout()
plt.savefig(f'Output/Population_Breakdown_Postcode_BEV.png')
plt.show()

Hybrid Vehicles

In [None]:
# read in the file
plot_file_path = 'Resources/Hybrid_top10_total.csv'
plot_df = pd.read_csv(plot_file_path)
#filter out the total rows, first remove any leading or trailling spaces
plot_df['Age'] = plot_df['Age'].str.strip()
f_plot_df = plot_df[plot_df['Age'] != 'Total']
#get the postcodes to plot from the datafile
pc_to_plot = f_plot_df['Postcode'].unique()


In [None]:
# Set up the figure and axis
plt.figure(figsize=(12, 8))
ax = plt.subplot(111)

# Set the width of each bar
bar_width = 0.4

# Plot all postcodes on one graph using a grouped bar plot
for i, postcode in enumerate(pc_to_plot):
    subset_by_postcode = f_plot_df[f_plot_df['Postcode'] == postcode]
    
    # Calculate the x-coordinates for each bar
    x_positions = [j + i * (bar_width) for j in range(len(subset_by_postcode))]
    
    # Plot the bars
    ax.bar(x_positions,
           subset_by_postcode['Population'],
           width=bar_width,
           label=f'Postcode {postcode}', 
           alpha=0.5
          )

# Set x-axis ticks and labels
ax.set_xticks([i + bar_width * (len(pc_to_plot) - 1) / 2 for i in range(len(subset_by_postcode))])
ax.set_xticklabels(subset_by_postcode['Age'])

plt.title('Population Breakdown by Age Groups - Top 10 Hybrid Postcodes')
plt.xlabel('Age Groups')
plt.ylabel('Population')
plt.xticks(rotation=45, ha='right')  # Adjust rotation for better visibility
plt.legend()
plt.tight_layout()
plt.savefig(f'Output/Population_Breakdown_Postcode_Hybrid.png')
plt.show()

Internal Combustion Engines

In [None]:
# read in the file
plot_file_path = 'Resources/ICE_top10_total.csv'
plot_df = pd.read_csv(plot_file_path)
#filter out the total rows, first remove any leading or trailling spaces
plot_df['Age'] = plot_df['Age'].str.strip()
f_plot_df = plot_df[plot_df['Age'] != 'Total']
#get the postcodes to plot from the datafile
pc_to_plot = f_plot_df['Postcode'].unique()


In [None]:
# Set up the figure and axis
plt.figure(figsize=(12, 8))
ax = plt.subplot(111)

# Set the width of each bar
bar_width = 0.4

# Plot all postcodes on one graph using a grouped bar plot
for i, postcode in enumerate(pc_to_plot):
    subset_by_postcode = f_plot_df[f_plot_df['Postcode'] == postcode]
    
    # Calculate the x-coordinates for each bar
    x_positions = [j + i * (bar_width) for j in range(len(subset_by_postcode))]
    
    # Plot the bars
    ax.bar(x_positions,
           subset_by_postcode['Population'],
           width=bar_width,
           label=f'Postcode {postcode}', 
           alpha=0.5
          )

# Set x-axis ticks and labels
ax.set_xticks([i + bar_width * (len(pc_to_plot) - 1) / 2 for i in range(len(subset_by_postcode))])
ax.set_xticklabels(subset_by_postcode['Age'])

plt.title('Population Breakdown by Age Groups - Top 10 ICE Postcodes')
plt.xlabel('Age Groups')
plt.ylabel('Population')
plt.xticks(rotation=45, ha='right')  # Adjust rotation for better visibility
plt.legend()
plt.tight_layout()
plt.savefig(f'Output/Population_Breakdown_Postcode_ICE.png')
plt.show()