In [None]:
!pip install --upgrade altair

In [None]:
import pandas as pd
import altair as alt
import numpy as np
alt.renderers.enable('default')

alt.data_transformers.disable_max_rows()

In [None]:
df=pd.read_csv("https://raw.githubusercontent.com/UIUC-iSchool-DataViz/is445_data/main/ufo-scrubbed-geocoded-time-standardized-00.csv")

In [None]:
df.head(1)

In [None]:
df.columns=['datetime','city','state','country','shape','duration (seconds)','duration (hours/min)','comments','date_posted','latitude','longitude']

In [None]:
df

In [None]:
df.columns

In [None]:
data=df.dropna()

In [None]:
df.country.nunique()

In [None]:
box_chart=alt.Chart(data).mark_boxplot().encode(
    alt.X('duration (seconds):Q').scale(type='log').title("Log-Seconds"),
    alt.Y('shape:N').title("Boxplot of Unique shape"),
    alt.Color('country:N').scale(scheme='rainbow'),
).properties(
    width=1500,
    height=600
).interactive()
box_chart

The chart visualizes UFO sighting durations by country. It shows the distribution of durations using box plots. Key insights include:

    1. Duration Range: Most sightings last between 1 and 10 seconds, but some countries have longer durations.
    2. Outliers: Several countries have outliers, indicating exceptionally long sightings.
    3. Country-Specific Patterns: Different countries exhibit distinct patterns in sighting durations.

This chart helps identify countries with unique sighting characteristics and potential anomalies.

In [None]:
# Create a selection object for single selection based on country
selection = alt.selection_point(fields=['country'], bind='legend')
df_filtered = df.dropna(subset=['shape', 'country', 'duration (seconds)'])
df_filtered['year'] = pd.to_datetime(df_filtered['datetime']).dt.year
# Bar Chart: Sum of UFO sighting durations by country with log scale
bchart = alt.Chart(df_filtered).mark_bar().encode(
    alt.X('country:N', title='Country'),
    alt.Y('average(duration (seconds)):Q', scale=alt.Scale(type='log'), title='Average Duration (seconds) (Log Scale)'),
    alt.Color('country:N', legend=alt.Legend(title='Country')),
    # Apply opacity condition directly
    alt.Opacity(
        condition=selection,  # Use selection directly
        value=0.1  # Fallback value for unselected items
    )
).properties(
    width=600,
    height=400,
    title="UFO Sightings by Country"
).add_params(selection).interactive()

# Line Chart: Average duration per year by country
lchart = alt.Chart(df_filtered).mark_line().encode(
    alt.X('year:O', title='Year'),
    alt.Y('average(duration (seconds)):Q', scale=alt.Scale(type='log'), title='Average Duration (seconds) (Log Scale)'),
    alt.Color('country:N', legend=alt.Legend(title='Country')),
    alt.Tooltip(['year', 'average(duration (seconds))']),
    # Apply opacity condition directly
    alt.Opacity(
        condition=selection,  # Use selection directly
        value=0.1  # Fallback value for unselected items
    )
).properties(
    width=900,
    height=400,
    title="Average Duration Distribution by Year and Country"
).add_params(selection).interactive()

# Combine both charts side by side
combined_chart = alt.hconcat(bchart, lchart)

combined_chart


### Note on the Plot
1. The interactive chart effectively visualizes UFO sighting data by country and year.
2. By allowing users to select a country, the chart dynamically highlights the selected country's data in both the bar and line charts, providing a clear comparison with other countries. This interactivity enhances data exploration and enables users to focus on specific countries of interest. However, constructing this chart posed challenges, primarily due to the need to filter data based on the selected country and then update both charts simultaneously.
3. To overcome this, a combination of filtering techniques and careful chart layering was employed. By creating separate charts for selected and non-selected data and then layering them, the desired interactive effect was achieved.
4. This approach ensures that the selected country's data remains prominent while fading out the rest, providing a clear and focused visualization.

In [None]:
box_chart.to_json('box_chart.json')
combined_chart.to_json('combined_chart.json')