# Fall 2024: Homework 6

## Analysis using Altair

In [1]:
import pandas as pd
import altair as alt
import numpy as np
from vega_datasets import data


## Visualization 1

##### Data Loading and Transformation

In [2]:
######### Reading Data #########
bigfoot_df = pd.read_csv("https://raw.githubusercontent.com/UIUC-iSchool-DataViz/is445_data/main/bfro_reports_fall2022.csv", 
                   parse_dates=['date'])

######### Handling missing values #########
for col in bigfoot_df.select_dtypes(include=['number']).columns:
    median_value = bigfoot_df[col].median()
    bigfoot_df.fillna({col: median_value}, inplace=True)
    #bigfoot_df[col].fillna(median_value, inplace=True)
    
for col in bigfoot_df.select_dtypes(include=['object', 'category']).columns:
    #bigfoot_df[col].fillna(value="None", inplace=True)
    bigfoot_df.fillna({col: "None"}, inplace=True)


##### Creating Visualization

In [3]:
######### Aggregating sightings by state and season #########
state_season_sightings = bigfoot_df.groupby(['state', 'season']).size().reset_index(name='sightings')

######### Creating altair bar chart #########
bigfoot_bar_chart = alt.Chart(state_season_sightings).mark_bar().encode(
    x=alt.X('sightings:Q', title='Number of Sightings'),
    y=alt.Y('state:N', sort='x', title='State'),
    color=alt.Color('season:N', title='Season', scale=alt.Scale(scheme='inferno', reverse=True)),
    tooltip=[
        alt.Tooltip('state:N', title='State'),
        alt.Tooltip('season:N', title='Season'),
        alt.Tooltip('sightings:Q', title='Sightings')
]
).properties(
    title='Bigfoot Sightings by State and Season',
    width='container',
    height = 450
)

bigfoot_bar_chart

##### Creating json for jekyll webpage

In [4]:
myJekyllDir = '/Users/sharanya/Documents/SEMESTERS/7- FALL 2024/IS445/ClassProjects/Sharanya-20.github.io/assets/json/'
bigfoot_bar_chart.save(myJekyllDir + 'bigfoot_bar_chart_viz1.json')

## Visualization 2

In [5]:
# Group sightings by year and season
line_data = bigfoot_df.groupby([bigfoot_df['date'].dt.year, 'season']).size().reset_index(name='sightings')
line_data.columns = ['year', 'season', 'sightings']

start_year_slider = alt.binding_range(min=line_data['year'].min(), max=line_data['year'].max(), step=1, name='Start Year')
end_year_slider = alt.binding_range(min=line_data['year'].min(), max=line_data['year'].max(), step=1, name='End Year')

start_year = alt.param(value=line_data['year'].min(), bind=start_year_slider)
end_year = alt.param(value=line_data['year'].max(), bind=end_year_slider)

filtered_line_chart = alt.Chart(line_data).mark_line(point=True).encode(
    x=alt.X('year:O', title='Year'),
    y=alt.Y('sightings:Q', title='Number of Sightings'),
    color=alt.Color('season:N', title='Season'),  
    tooltip=['year:O', 'season:N', 'sightings:Q']
).transform_filter(
    alt.datum.year >= start_year
).transform_filter(
    alt.datum.year <= end_year
).add_params(
    start_year, end_year
).properties(
    width=800,
    height=400,
    title="Bigfoot Sightings Over Time with Year Range Selection"
)

filtered_line_chart

In [6]:
filtered_line_chart.save(myJekyllDir + 'slider_line_chart.json')

## Visualization 2 - First Alternate

(Errors encountered while embedding this line chart and bar graph on the jekyll page, but presented in notebook to show work)

In [7]:

# Group sightings by year and season
line_data = bigfoot_df.groupby([bigfoot_df['date'].dt.year, 'season']).size().reset_index(name='sightings')
line_data.columns = ['year', 'season', 'sightings']

brush = alt.selection_interval(encodings=['x'])

line_chart = alt.Chart(line_data).mark_line(point=True).encode(
    x=alt.X('year:O', title='Year'),
    y=alt.Y('sightings:Q', title='Number of Sightings'),
    color=alt.Color('season:N', title='Season'),  
    tooltip=['year:O', 'season:N', 'sightings:Q'], 
    opacity=alt.condition(brush, alt.value(1), alt.value(0.2))  
).properties(
    width=650,
    height=400,
    title="Bigfoot Sightings Over Time by Season"
).add_params(
    brush
)


# Bar chart filtered by brush selection
bars = alt.Chart(line_data).mark_bar().encode(
    x=alt.X('season:N', axis=alt.Axis(title='Season')),
    y=alt.Y('sightings:Q', axis=alt.Axis(title='Total No. of Sightings')),  
    color=alt.Color('season:N'),  
    tooltip=['season:N', 'sum(sightings):Q']  
).transform_filter(
    brush
)

final_line_chart = line_chart | bars 
final_line_chart

In [8]:
line_chart.save(myJekyllDir + 'sample_line_chart_viz2.json')
final_line_chart.save(myJekyllDir + 'bar_line_chart_viz2.json')

## Visualization 2 - Second Alternate

(Errors encountered while embedding this map on the jekyll page, but presented in notebook to show work)

In [9]:
# Load U.S. state map TopoJSON
states = alt.topo_feature(data.us_10m.url, 'states')

# Enable the JSON transformer
alt.data_transformers.enable('json')

# Select variables for dropdown
variable_options = ['temperature_mid', 'dew_point', 'humidity', 'cloud_cover', 'precip_intensity', 'visibility', 'wind_speed']

bigfoot_long = bigfoot_df.melt(
    id_vars=['latitude', 'longitude'], 
    value_vars=variable_options, 
    var_name='variable', 
    value_name='value'
)

bigfoot_long['sightings'] = 1

# Pre-aggregate the data by latitude, longitude, and variable
bigfoot_agg = bigfoot_long.groupby(['latitude', 'longitude', 'variable']).agg(
    sightings=('sightings', 'sum'),
    value=('value', 'mean')  

# Dropdown menu for variable selection
variable_dropdown = alt.binding_select(options=variable_options, name="Variable")
variable_selection = alt.selection_point(fields=['variable'], bind=variable_dropdown, value=[{'variable': 'cloud_cover'}])

# Base map
base_map = alt.Chart(states).mark_geoshape(
    fill='lightgray',
    stroke='white'
).properties(
    width='container',
    height=500
).project(
    type='albersUsa'
)

# Scatter plot using aggregated data
scatter = alt.Chart(bigfoot_agg).mark_circle(opacity=0.7).encode(
    longitude='longitude:Q',
    latitude='latitude:Q',
    color=alt.Color('value:Q', scale=alt.Scale(scheme='viridis'), title='Variable Value'),
    size=alt.Size('sightings:Q', bin=alt.Bin(maxbins=5), title='Binned Sightings'),
    tooltip=['latitude', 'longitude', 'variable', 'value', 'sightings']
).transform_filter(
    variable_selection
)

# Combine base map and scatter plot
map_chart = (base_map + scatter).add_params(variable_selection).properties(
    title="Bigfoot Sightings Linked to Variables"
).interactive()

map_chart

In [10]:
map_chart.save(myJekyllDir + 'bigfoot_scatter_map_viz2.json')