In [20]:
# Import necessary libraries
import altair as alt
import pandas as pd

# Deal with setting with copy warning in Pandas
pd.options.mode.chained_assignment = None

In [21]:
# Read in the commute popularity data and assign colors to each commute type
commute_data = pd.read_excel('Data/Commute Data.xlsx')
colors = ['yellow', 'purple', 'pink', 'green', 'gray', 'orange']

# Visualize the commute data
commute_data

Unnamed: 0,Group,Year,Commute Means,Geography,ID Geography,Slug Geography,share
0,Drove Alone,2021,109898,,,,
1,Carpooled,2021,16435,,,,
2,Public Transit,2021,59998,,,,
3,Walked,2021,32400,,,,
4,"Taxicab, motorcycle, bicycle, or other means",2021,11678,,,,
5,Worked At Home,2021,103301,,,,
6,Drove Alone,2020,141079,"Boston, MA",16000US2507000,boston-ma,0.375298
7,Carpooled,2020,21609,"Boston, MA",16000US2507000,boston-ma,0.057484
8,Public Transit,2020,115561,"Boston, MA",16000US2507000,boston-ma,0.307415
9,Walked,2020,54979,"Boston, MA",16000US2507000,boston-ma,0.146255


In [22]:
# Create a selection tool to brush and link two charts
brush = alt.selection_interval(encodings=['x'])

# Extract a list of all the commute types in the data 
domain = list(commute_data['Group'].unique())

# Create the multi-series line chart illustrating fluctuations in commute type popularities throughout 2019, 2020, and 2021
commutes = alt.Chart(commute_data).mark_line(point=True).encode(
        x=alt.X('Year:N', axis=alt.Axis(labelAngle=-0, grid=True)),
        y=alt.Y('Commute Means:Q', axis=alt.Axis(title='Number of Boston Households Using Each Commute Type')),
        color=alt.Color('Group:N', scale=alt.Scale(
            domain=domain, range=colors), legend = alt.Legend(title='Commute Type', orient='left'))).properties(
    width=200, 
    height=400
).add_selection(
    brush)


# Visualize this chart
commutes

In [23]:
# Read in the air quality data
air_data = pd.read_excel('Data/aqi_data.xlsx')

# For every year (2019, 2020, and 2021), create a line chart showing monthly fluctuations in air quality
yr_avgs = []
for year in air_data['Year'].unique().tolist():
    # Get the necessary data for the current line chart
    yr_df = air_data.loc[air_data['Year'] == year]
    yr_avgs.append(yr_df['Overall AQI Value'].mean())
    yr_df['Month'] = pd.DatetimeIndex(yr_df['Date']).month
    
    # Create the current line chart
    month_line = alt.Chart(yr_df).mark_line(point=True).encode(
    x = alt.X('Month:N', axis=alt.Axis(labelAngle=0, title=f'Months in {year}')),
    y = alt.Y('average(Overall AQI Value):Q', title='Monthly Average AQI Value'), 
    color = alt.value('blue'),
        tooltip=[alt.Tooltip('average(Overall AQI Value)', title='Average AQI Value')]).properties(
    width=200, 
    height=400
)
    # Create a red average line to cut across the line chart
    avg_line = alt.Chart(yr_df).mark_rule(color='firebrick').encode(
    y='average(Overall AQI Value):Q',
    size=alt.SizeValue(3), 
    tooltip=[alt.Tooltip('average(Overall AQI Value)')]).properties(
    width=200, 
    height=400
)
    # Layer these charts and save them as html files (to include these images in a tooltip, they must be saved in .png form.
    # This required me to download library packages onto my computer that would not operate, so the saved html files were 
    # Manually converted to .png form
    mini_chart = alt.layer(month_line, avg_line)
    title = str(year) + '_image.html'
    #mini_chart.save(title)
    
# Create a DataFrame containing the data needed to plot the air quality line chart with image tooltip capability
dct = {'Year': [2019, 2020, 2021], 'Average AQI Value': yr_avgs, 'image':["Image Tooltips/2019_image.png", 
                                                                          "Image Tooltips/2020_image.png",
                                                                          "Image Tooltips/2021_image.png"]}
condensed_aird = pd.DataFrame(dct)

# Create the air quality line chart
air_line = alt.Chart(condensed_aird, title='Air Quality in Boston').mark_line(point=True).encode(
        x=alt.X('Year:N', axis=alt.Axis(labelAngle=0, grid=True)), 
        y = alt.Y('Average AQI Value:Q',
                  axis=alt.Axis(grid=True, title='Yearly Average AQI Value'), 
                  scale=alt.Scale(domain=(0, 40))),
        color = alt.value('blue'),
        tooltip=[alt.Tooltip('Average AQI Value'), alt.Tooltip('image')]).properties(
    width=200, 
    height=400
)

# Visualize the line chart
air_line

In [24]:
# Read in the Boston 311 Program noise complaint data
requests_data = pd.read_csv('Data/noise_complaints.csv')

noise_dists = ['Automotive Noise Disturbance', 'Aircraft Noise Disturbance', 'Animal Noise Disturbances', 
               'Dumpster & Loading Noise Disturbances', 'Loud Parties/Music/People', 'Undefined Noise Disturbance', 
               'Work Hours-Loud Noise Complaints']

# Add a column to the data to order the stacks 
noise_dists_idx = {
    dist: idx 
    for idx, dist in enumerate(noise_dists) # Reverse list to align colors
}                                                        # with legend order

# Create an "idx" column with integer values indicating order of stacked bars
cols = requests_data["type"].map(noise_dists_idx)
noise_poll = requests_data.assign(idx= cols)

# Create the horizontal stacked bar chart
bars = alt.Chart(noise_poll, 
                 title='The Portion of Noise Pollution Created by Autombiles in Various Areas Around Mass Ave').mark_bar(
).encode(
    x=alt.X('count(type)', axis=alt.Axis(title='Number of Noise Disturbance Complaints')),
    y=alt.Y('neighborhood:N', axis=alt.Axis(title='Neighborhood')),
    color = alt.Color('type',
        scale=alt.Scale(
            domain=noise_dists,
            range=['red', 'grey', 'grey', 'grey', 'grey', 'grey', 'grey']), 
                      legend = alt.Legend(title='Noise Disturbance Type')),
    order='idx', 
    tooltip = [alt.Tooltip('type', title='Noise Disturbance Type'), 
               alt.Tooltip('count(type)', title='Number of Complaints')]
).transform_filter(brush)

In [25]:
# Position the multi-series commute data line chart next to the air quality line chart to show the data's relationship
lines = alt.hconcat(commutes, air_line, center=True, 
                    title='The Relationship Between Air Quality in Boston and the Popularity of Various Types of Commutes')


# Position these horizontally concatted charts on top of the horizontal stacked bar chart
chart = alt.vconcat(lines, bars, center=True).resolve_scale(
    color='independent')

# Make this chart more understandable and aesthetic
chart = chart.configure_title(offset=5, orient='top', anchor='middle').configure_legend(
titleFontSize=10,
labelFontSize=8.1
) 

# Visualize the chart
chart

In [26]:
# Save the chart as an html file
chart.save('FinalVis1.html')