In [14]:
import pandas as pd
import altair as alt
alt.data_transformers.disable_max_rows()
# Load data and column names
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/ufo-scrubbed-geocoded-time-standardized-00.csv"
ufo = pd.read_csv(url, header=None, names=[
    'sighting_datetime',
    'location_city',
    'location_state',
    'location_country',
    'ufo_shape',
    'duration_sec',
    'duration_desc',
    'witness_description',
    'report_date',
    'latitude',
    'longitude'
])

# Data cleaning
ufo['sighting_datetime'] = pd.to_datetime(ufo['sighting_datetime'], errors='coerce')
ufo['report_date'] = pd.to_datetime(ufo['report_date'], errors='coerce')
ufo['year'] = ufo['sighting_datetime'].dt.year
ufo['location_state'] = ufo['location_state'].str.upper().str.strip()
ufo['location_country'] = ufo['location_country'].str.upper().str.strip()
us_states = ['AL','AK','AZ','AR','CA','CO','CT','DE','FL','GA','HI','ID','IL','IN','IA',
             'KS','KY','LA','ME','MD','MA','MI','MN','MS','MO','MT','NE','NV','NH','NJ',
             'NM','NY','NC','ND','OH','OK','OR','PA','RI','SC','SD','TN','TX','UT','VT',
             'VA','WA','WV','WI','WY']
ufo_us = ufo[
    (ufo['location_country'] == 'US') & 
    (ufo['location_state'].isin(us_states)) &
    (ufo['year'].between(1950, 2023)) &
    (ufo['latitude'].notna()) & 
    (ufo['longitude'].notna()) &
    (ufo['ufo_shape'].notna())
].copy()

# Clean UFO shapes
ufo_us['ufo_shape'] = ufo_us['ufo_shape'].str.strip().str.title()

shape_colors = alt.Color(
    'ufo_shape:N',
    legend=alt.Legend(title='UFO Shape'),
    scale=alt.Scale(scheme='category20')
)

map_plot = alt.Chart(ufo_us).mark_circle(
    size=40,
    opacity=0.7
).encode(
    longitude='longitude:Q',
    latitude='latitude:Q',
    color=shape_colors,
    tooltip=[
        alt.Tooltip('location_city:N', title='City'),
        alt.Tooltip('location_state:N', title='State'),
        alt.Tooltip('yearmonth(sighting_datetime):T', title='Date'),
        alt.Tooltip('ufo_shape:N', title='Shape'),
        alt.Tooltip('duration_sec:Q', title='Duration (sec)')
    ]
).properties(
    width=800,
    height=400,
    title='UFO Sightings in the United States'
).project(
    type='albersUsa'
).interactive()

# dropdown
state_dropdown = alt.binding_select(
    options=sorted(ufo_us['location_state'].unique()),
    name='Select State:'
)
state_selection = alt.param(
    name='State',
    bind=state_dropdown,
    value='CA'  
)
shape_counts = ufo_us.groupby(['location_state', 'ufo_shape']).size().reset_index(name='count')

# Get top 15 shapes
top_shapes = shape_counts['ufo_shape'].value_counts().nlargest(15).index.tolist()
shape_counts['ufo_shape_clean'] = shape_counts['ufo_shape'].where(
    shape_counts['ufo_shape'].isin(top_shapes), 'Other')

shape_hist = alt.Chart(shape_counts).mark_bar().encode(
    x=alt.X('count:Q', title='Number of Sightings'),
    y=alt.Y('ufo_shape_clean:N', 
           title='UFO Shape',
           sort=alt.EncodingSortField(field='count', op='sum', order='descending')),
    color=alt.Color('ufo_shape_clean:N', 
                  scale=alt.Scale(scheme='category20'),
                  legend=None),
    tooltip=[
        alt.Tooltip('ufo_shape_clean:N', title='Shape'),
        alt.Tooltip('count:Q', title='Sightings', format=',')
    ]
).properties(
    width=800,
    height=400,
    title='UFO Shapes Distribution by State'
).add_params(
    state_selection
).transform_filter(
    state_selection
)
shape_hist = alt.Chart(shape_counts).mark_bar().encode(
    x=alt.X('count:Q', title='Number of Sightings'),
    y=alt.Y('ufo_shape_clean:N', 
           title='UFO Shape',
           sort=alt.EncodingSortField(field='count', op='sum', order='descending')),
    color=alt.Color('ufo_shape_clean:N', 
                  scale=alt.Scale(scheme='category20'),
                  legend=None),
    tooltip=[
        alt.Tooltip('ufo_shape_clean:N', title='Shape'),
        alt.Tooltip('count:Q', title='Sightings', format=',')
    ]
).properties(
    width=800,
    height=400,
    title='UFO Shapes Distribution by State'
).add_params(
    state_selection
).transform_filter(
    alt.datum.location_state == state_selection
)
# Display chart