In [9]:
# Import the dependencies:
import pandas as pd
from pathlib import Path
import hvplot.pandas
import os

In [10]:
# CREATING CUSTOMIZED VISUALIZATIONS
# Visual communication relies on your ability to create customized visualizations.
# Specifically, you want these visualizations to enhance your audience's understanding of the information that you're presenting.
# How your visualizations incorporate color, size, shape, and even chart options can have a tremendous impact on how your audience digests the information.
# With VISUALIZATION OPTIONS, we can customize and curate our visualizations.
# Advanced options exist for customizing titles, fin-tuning axes labels, and formatting plots with attractive color themes.
# They do exist for adding effects, such as those for on-hover, selection, and deselection events.
# And the hvPlot library has the necessary functions for incorporating these custom visualization options.

In [11]:
# Create the house_sale_df DataFrame:
house_sale_df = pd.read_csv(
    Path('housing_sale_data.csv')
)

# Review the DataFrame:
display(house_sale_df.head())
display(house_sale_df.tail())

Unnamed: 0,salesHistoryKey,propertyKey,streetAddress,salesTypeDsc,saleAmt,saleDate
0,117701,64736,1121 ARLINGTON BLVD 831,O-Assignment of Lease,275000,2019-06-24
1,270452,64190,989 S BUCHANAN ST 418,B-Not Previously Assessed,565000,2019-06-21
2,117663,23057,1121 ARLINGTON BLVD 820,O-Assignment of Lease,165000,2019-06-20
3,117485,23019,1121 ARLINGTON BLVD 717,O-Assignment of Lease,171900,2019-06-20
4,86768,53495,3800 FAIRFAX DR 1-83,C-Condo Parking Space,34000,2019-06-17


Unnamed: 0,salesHistoryKey,propertyKey,streetAddress,salesTypeDsc,saleAmt,saleDate
21280,163323,52647,3817 16th ST S,"1-Foreclosure, Auction, Bankru",165000,1999-10-07
21281,115652,22633,1200 N NASH ST 1130,C-Condo Parking Space,5100,1999-10-07
21282,194989,67748,3429 22nd ST S,"1-Foreclosure, Auction, Bankru",178598,1999-10-07
21283,88529,17508,900 N STAFFORD ST 1611,"4-Multiple RPCs, Not A Coded S",111350,1999-10-02
21284,89926,17779,901 N STUART ST 4-202,"4-Multiple RPCs, Not A Coded S",111350,1999-10-02


In [12]:
# This DataFrame contains sales data from 1999-10-02 to 2019-06-24.
# We want to focus on the sales only during June 2019, so we'll filter our data using the `loc` function.
# To do this, we'll slice into the `saleDate` column and use conditional and logical operators >,&,< to single out the date window we need:
home_sale_prices_june_2019 = house_sale_df.loc[
    (house_sale_df['saleDate'] > '2019-06-01')
    & (house_sale_df['saleDate']< '2019-06-30')
]

# Review the first and last five rows of the resulting DataFrame:
display(home_sale_prices_june_2019.head())
display(home_sale_prices_june_2019.tail())

Unnamed: 0,salesHistoryKey,propertyKey,streetAddress,salesTypeDsc,saleAmt,saleDate
0,117701,64736,1121 ARLINGTON BLVD 831,O-Assignment of Lease,275000,2019-06-24
1,270452,64190,989 S BUCHANAN ST 418,B-Not Previously Assessed,565000,2019-06-21
2,117663,23057,1121 ARLINGTON BLVD 820,O-Assignment of Lease,165000,2019-06-20
3,117485,23019,1121 ARLINGTON BLVD 717,O-Assignment of Lease,171900,2019-06-20
4,86768,53495,3800 FAIRFAX DR 1-83,C-Condo Parking Space,34000,2019-06-17


Unnamed: 0,salesHistoryKey,propertyKey,streetAddress,salesTypeDsc,saleAmt,saleDate
15,120441,23642,1011 ARLINGTON BLVD 844,O-Assignment of Lease,141000,2019-06-07
16,269587,64136,1140 N STUART ST,L-Land Sale,1642147,2019-06-07
17,229369,44262,851 N GLEBE RD 1801,5-Not Market Sale,520000,2019-06-05
18,91703,18000,900 N TAYLOR ST 1924,5-Not Market Sale,40000,2019-06-05
19,270437,51720,989 S BUCHANAN ST 409,B-Not Previously Assessed,306456,2019-06-03


In [14]:
# GROUP THE DATA
# Now that we have the data for the month of June in 2019, the task is to evaluate the changes in the amoount of sales per day.
# To do this, we need to group the data by sale date

# Create a DataFrame containing the amount of housing sold for each day in June.
# Using the 'saleAmt' and 'saleDate' columns from the June sales.
# `groupby` 'saleDate' and then sum the sale amount for each date:
june_sale_amt_date = (
    home_sale_prices_june_2019[['saleAmt', 'saleDate']]
    .groupby('saleDate')
    .sum()
    .sort_values('saleDate')
)

# Review the resulting Series:
june_sale_amt_date

Unnamed: 0_level_0,saleAmt
saleDate,Unnamed: 1_level_1
2019-06-03,306456
2019-06-05,560000
2019-06-07,1957147
2019-06-10,2168998
2019-06-12,187000
2019-06-13,830000
2019-06-14,510000
2019-06-16,1531210
2019-06-17,1664000
2019-06-20,336900


In [16]:
# VISUALIZE THE DATA IN A BAR CHART
# Plot a bar chart of the June sale data using hvPlot.
# Specify the variables for the x and y axes using the syntax (x='saleDate', y='saleAmt'):
june_sale_amt_date.hvplot.bar(
    x='saleDate',
    y='saleAmt',
    title='Home Sales per Day - June 2019',
    rot=90
)

In [17]:
# ADDING FORMATTING OPTIONS
# We intend our visualizations to communicate information to an audience.
# Therefore, it's important that the visualizations be clear.
# For example, if your audience can't interpret the numbers associated with your plot when they're in scientific notation, they won't understand the story your visualization is trying to convey.
# Formatting options, like those for rotation, axes labels, titles, and axes inversion help make visualizations easily digestible to your audience.

# USE FORMATTERS
# With the `.opts(xformatter)` and `.opts(yformatter)` options in hvPlot, you can format label values.
# To do that, you use syntax that's similar to the syntax for rounding the decimal places of a variable inside an f-stirng.
# We can use formatters to adjust how the numbers on the y-axis display, which will make the chart scale easier to interpret.
# We'll thus add `.opts(yformatter='%.0f') to end the code for the plot:
june_sale_amt_date.hvplot.bar(
    x='saleDate',
    y='saleAmt',
    title='Home Sales per Day - June 2019',
    rot=90
).opts(yformatter='%.0f')

In [18]:
# USE AXES INVERSION
# The last formatting option that we'll highlight is AXES INVERSION.
# With th `inver_axes` optin, you can swap, or invert, the x and y axes along with the data that's associated with those axes.
# In the case of our June home sales plot, we can use the `invert_axes` option to position the bars of bar chart horizontally rather than vertically:

# Plot a bar chart of the June sale data using hvPlot.
# Specify the variables for the x and y axes using the syntax (x='saleDate', y='saleAmt')
# Include rotation of x-axis labels
# Include a y-formatter that rounds the y-axis labels to the whole number
# Include an informative title that describes the visualization 
# Invert the axes for dramatic effect and be sure to adjust the `yformatter` to the `xformatter`:
june_sale_amt_date.hvplot.bar(
    x='saleDate',
    y='saleAmt',
    title='Home Sales per Day - Arlington, VA - June 2019',
    rot=90
).opts(
    invert_axes=True,
    xformatter='%.0f'
)

In [20]:
# ADDING STYLING OPTIONS
# With styling options, you can customize the overall aesthetic of your visualizatons to make them unique and engaging.
# Options exist for customizing the color, vibrancy, and size of each element that the visualization depicts.
# Other styling options exist for adding dynamic effects, such as those for on-hover and selection events.
# The styling options work like the formatting options.
# We specify the option parameter and the corresponding value in our code.
# For example, we can change the color of the bars in our June home sales plot from blue to green:
june_sale_amt_date.hvplot.bar(
    x='saleDate',
    y='saleAmt',
    title='Home Sales per Day - Arlington, VA - June 2019',
    color='orange'
).opts(
    invert_axes=True,
    xformatter='%.0f'
)