# Task 1

In [1]:
import pandas as pd
import plotly.express as px
from ipywidgets import interact, IntRangeSlider, Dropdown

In [2]:
# Load the dataset
data = pd.read_csv('scrubbed.csv', low_memory=False)
print(data.head())

           datetime                  city state country     shape  \
0  10/10/1949 20:30            san marcos    tx      us  cylinder   
1  10/10/1949 21:00          lackland afb    tx     NaN     light   
2  10/10/1955 17:00  chester (uk/england)   NaN      gb    circle   
3  10/10/1956 21:00                  edna    tx      us    circle   
4  10/10/1960 20:00               kaneohe    hi      us     light   

  duration (seconds) duration (hours/min)  \
0               2700           45 minutes   
1               7200              1-2 hrs   
2                 20           20 seconds   
3                 20             1/2 hour   
4                900           15 minutes   

                                            comments date posted    latitude  \
0  This event took place in early fall around 194...   4/27/2004  29.8830556   
1  1949 Lackland AFB&#44 TX.  Lights racing acros...  12/16/2005    29.38421   
2  Green/Orange circular disc over Chester&#44 En...   1/21/2008        53

In [3]:
# Check for missing values
print(data.isnull().sum())
print("--------------------")
# Check data types
print(data.dtypes)

datetime                   0
city                       0
state                   5797
country                 9670
shape                   1932
duration (seconds)         0
duration (hours/min)       0
comments                  15
date posted                0
latitude                   0
longitude                  0
dtype: int64
--------------------
datetime                 object
city                     object
state                    object
country                  object
shape                    object
duration (seconds)       object
duration (hours/min)     object
comments                 object
date posted              object
latitude                 object
longitude               float64
dtype: object


### Data Cleaning: 

In [4]:
# 'datetime' column is a string, convert it to a datetime so we can work with it
data['datetime'] = pd.to_datetime(data['datetime'], errors='coerce')

# 'latitude' is a string, convert it to a number
data['latitude'] = pd.to_numeric(data['latitude'], errors='coerce')

# Remove rows that don't have the columns we need 
data = data.dropna(subset=['datetime', 'latitude', 'longitude ', 'country', 'shape'])

# Add a 'year' column to make it easier to filter the data
data['year'] = data['datetime'].dt.year

# Cleaned data
print(data.info())

<class 'pandas.core.frame.DataFrame'>
Index: 68477 entries, 0 to 80331
Data columns (total 12 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   datetime              68477 non-null  datetime64[ns]
 1   city                  68477 non-null  object        
 2   state                 66048 non-null  object        
 3   country               68477 non-null  object        
 4   shape                 68477 non-null  object        
 5   duration (seconds)    68477 non-null  object        
 6   duration (hours/min)  68477 non-null  object        
 7   comments              68469 non-null  object        
 8   date posted           68477 non-null  object        
 9   latitude              68477 non-null  float64       
 10  longitude             68477 non-null  float64       
 11  year                  68477 non-null  int32         
dtypes: datetime64[ns](1), float64(2), int32(1), object(8)
memory usage: 6.5+ MB
Non

In [5]:
# Create a range slider for years, starting with the min and max years in the data
year_slider = IntRangeSlider(
    value=[data['year'].min(), data['year'].max()],  
    min=data['year'].min(),                          
    max=data['year'].max(),                          
    step=1,                                          
    description='Years:',                       
    continuous_update=False                          
)

In [6]:
# Dropdown for picking the UFO shape, default is All
shape_options = ['All'] + sorted(data['shape'].unique().tolist())
shape_dropdown = Dropdown(
    options=shape_options,      
    value='All',                
    description='UFO Shape:'    
)

In [7]:
# World map based on the selected years and shape
def update_map(selected_years, selected_shape):
    # Filter the data based on the selected years
    filtered_data = data[data['year'].between(selected_years[0], selected_years[1])]
    
    # Filter the data based on the selected shape
    if selected_shape != "All":
        filtered_data = filtered_data[filtered_data['shape'] == selected_shape]
    
    # The map
    fig = px.scatter_geo(
        filtered_data,                   
        lat='latitude',                  
        lon='longitude ',                
        hover_name='city',               
        hover_data=['datetime', 'comments'],  
        color='shape',                   #
        title=f'UFO Sightings ({selected_years[0]} - {selected_years[1]})' + 
              (f', Shape: {selected_shape}' if selected_shape != "All" else ', All Shapes'),
        projection='natural earth'       
    )
    
    fig.show()

# Display the map
interact(update_map, selected_years=year_slider, selected_shape=shape_dropdown)

interactive(children=(IntRangeSlider(value=(1910, 2014), continuous_update=False, description='Years:', max=20…

<function __main__.update_map(selected_years, selected_shape)>