In [None]:
import pandas as pd
import altair as alt
alt.data_transformers.disable_max_rows()

In [None]:
columns = [
    "date_time", "city", "state", "country", "shape",
    "duration_seconds", "duration_hours_mins", "comments",
    "date_posted", "latitude", "longitude"
]


In [None]:
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/ufo-scrubbed-geocoded-time-standardized-00.csv"

df = pd.read_csv(url, header=None, names=columns, skiprows=1)
df['date_time'] = pd.to_datetime(df['date_time'], errors='coerce')
df = df.dropna(subset=['latitude', 'longitude'])
df['year'] = df['date_time'].dt.year

df.head()


In [None]:
import os

os.makedirs('charts', exist_ok=True)


In [None]:

sightings_per_year = df.groupby('year').size().reset_index(name='count')

chart1 = alt.Chart(sightings_per_year).mark_line().encode(
    x=alt.X('year:Q', title='Year', axis=alt.Axis(format='d', tickCount=10)),
    y=alt.Y('count:Q', title='Number of Sightings'),
    tooltip=['year', 'count']
).properties(
    title='UFO Sightings Over the Years',
    width=700,
    height=400
)

chart1.save('charts/chart1.json')
chart1

In [None]:

selection = alt.selection_multi(fields=['shape'], bind='legend')

chart2 = alt.Chart(df).mark_circle(size=60).encode(
    longitude='longitude:Q',
    latitude='latitude:Q',
    color=alt.Color('shape:N'),
    tooltip=['city', 'state', 'date_time', 'shape'],
    opacity=alt.condition(selection, alt.value(1), alt.value(0.1))
).add_selection(
    selection
).properties(
    title='UFO Sightings by Shape and Location',
    width=700,
    height=400
)

chart2.save('charts/chart2.json')
chart2


In [None]:
df.to_csv("cleaned_ufo_data.csv", index=False)

