In [7]:
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/ufo-scrubbed-geocoded-time-standardized-00.csv"

import pandas as pd
import altair as alt
OUT_DIR = "../assets/hw5"

df = pd.read_csv("https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/ufo-scrubbed-geocoded-time-standardized-00.csv")
df.head()

Unnamed: 0,10/10/1949 20:30,san marcos,tx,us,cylinder,2700,45 minutes,This event took place in early fall around 1949-50. It occurred after a Boy Scout meeting in the Baptist Church. The Baptist Church sit,4/27/2004,29.8830556,-97.9411111
0,10/10/1949 21:00,lackland afb,tx,,light,7200.0,1-2 hrs,1949 Lackland AFB&#44 TX. Lights racing acros...,12/16/2005,29.38421,-98.581082
1,10/10/1955 17:00,chester (uk/england),,gb,circle,20.0,20 seconds,Green/Orange circular disc over Chester&#44 En...,1/21/2008,53.2,-2.916667
2,10/10/1956 21:00,edna,tx,us,circle,20.0,1/2 hour,My older brother and twin sister were leaving ...,1/17/2004,28.978333,-96.645833
3,10/10/1960 20:00,kaneohe,hi,us,light,900.0,15 minutes,AS a Marine 1st Lt. flying an FJ4B fighter/att...,1/22/2004,21.418056,-157.803611
4,10/10/1961 19:00,bristol,tn,us,sphere,300.0,5 minutes,My father is now 89 my brother 52 the girl wit...,4/27/2007,36.595,-82.188889


In [8]:
# Rename columns & clean dataset
expected_cols = [
    "datetime",              
    "city",
    "state",
    "country",
    "shape",
    "duration_seconds",     
    "duration_text",         
    "comments",
    "date_posted",           
    "latitude",
    "longitude",
]
assert len(df.columns) == len(expected_cols), f"Expected {len(expected_cols)} columns, got {len(df.columns)}"
df.columns = expected_cols

# parse timestamps
df["datetime"]  = pd.to_datetime(df["datetime"],  errors="coerce")
df["date_posted"] = pd.to_datetime(df["date_posted"], errors="coerce")
df = df.dropna(subset=["datetime"])

# keep only US rows
df = df[df["country"].str.lower() == "us"]

# normalize state codes
df["state"] = df["state"].astype(str).str.upper().str.strip()
df = df[df["state"].str.match(r"^[A-Z]{2}$", na=False)]

# create year
df["year"] = df["datetime"].dt.year

In [9]:
# Chart 1: Time Series

# aggregate
counts_by_year_state = (
    df.groupby(["year","state"], as_index=False)
      .size().rename(columns={"size":"count"})
)

# dropdown selector
state_list = sorted(counts_by_year_state["state"].unique().tolist())
state_select = alt.selection_point(
    fields=["state"],
    bind=alt.binding_select(options=state_list, name="State: "),
    value="IL" 
)

chart1 = (
    alt.Chart(counts_by_year_state, title="UFO Sightings per Year (by State)")
      .mark_line()
      .encode(
          x=alt.X("year:O", title="Year"),
          y=alt.Y("count:Q", title="Sightings"),
          color=alt.Color("state:N", legend=None)
      )
      .add_params(state_select)
      .transform_filter(state_select)
      .properties(width=650, height=350)
)

chart1
chart1.save(f"{OUT_DIR}/chart1.json")
f"Saved {OUT_DIR}/chart1.json"

'Saved ../assets/hw5/chart1.json'

Plot 1:
I visualize annual UFO sighting counts for U.S.state (selected). I first parsed datetime to extract year and grouped by year and state, and then let the x-axis encodes year (ordinal) and the y-axis encodes counts (quantitative). I also set a dropdown "State" selector filters the line, which avoids clutter from many overlapping lines and makes state-by-state trends easier to compare.

In [10]:
# Chart 2: Top-15 states & click to highlight
state_total = (
    df.groupby("state", as_index=False)
      .size().rename(columns={"size":"total"})
      .sort_values("total", ascending=False)
)

top15 = state_total.head(15)
click = alt.selection_point(fields=["state"])

chart2 = (
    alt.Chart(top15, title="Top 15 States by Total UFO Sightings")
      .mark_bar()
      .encode(
          x=alt.X("total:Q", title="Total sightings"),
          y=alt.Y("state:N", sort="-x", title="State"),
          color=alt.condition(click, alt.value("steelblue"), alt.value("lightgray")),
          tooltip=["state:N","total:Q"]
      )
      .add_params(click)
      .properties(width=650, height=420)
)

chart2

chart2.save(f"{OUT_DIR}/chart2.json")
f"Saved {OUT_DIR}/chart2.json"

'Saved ../assets/hw5/chart2.json'

Plot 2 (Top-15 States Bar & Click Highlight):
I aggregated total sightings by state and sorted them, and kept the top 15. The x-axis shows total UFO sightings per states and the y-axis lists states sorted descending. Click a single bar could also highlight that bar while dimming others, helping quick picking out the observed state situation without losing context.