In [1]:
# Import Dependencies
import pandas as pd

In [2]:
# Read file into DataFrame
ufo_df = pd.read_csv('Resources/ufoSightings.csv', low_memory=False)

# Remove the rows with missing data
clean_ufo_df = ufo_df.dropna(how="any")

# Converting the "duration (seconds)" column's values to numeric
converted_ufo_df = clean_ufo_df.copy()
converted_ufo_df["duration (seconds)"] = converted_ufo_df.loc[:, "duration (seconds)"].astype(float)

# Display the DataFrame
converted_ufo_df.head()

Unnamed: 0,datetime,city,state,country,shape,duration (seconds),duration (hours/min),comments,date posted,latitude,longitude
0,10/10/1949 20:30,san marcos,tx,us,cylinder,2700.0,45 minutes,This event took place in early fall around 194...,4/27/2004,29.8830556,-97.941111
3,10/10/1956 21:00,edna,tx,us,circle,20.0,1/2 hour,My older brother and twin sister were leaving ...,1/17/2004,28.9783333,-96.645833
4,10/10/1960 20:00,kaneohe,hi,us,light,900.0,15 minutes,AS a Marine 1st Lt. flying an FJ4B fighter/att...,1/22/2004,21.4180556,-157.803611
5,10/10/1961 19:00,bristol,tn,us,sphere,300.0,5 minutes,My father is now 89 my brother 52 the girl wit...,4/27/2007,36.595,-82.188889
7,10/10/1965 23:45,norwalk,ct,us,disk,1200.0,20 minutes,A bright orange color changing to reddish colo...,10/2/1999,41.1175,-73.408333


### Single Index (review)

In [3]:
# Show the average seconds for each country. And, round to one decimal place.
ufo_country_avg_secs = pd.pivot_table(converted_ufo_df, 
                             columns='country',
                             values='duration (seconds)',
                             aggfunc='mean').round(1)

# Rename the index
ufo_country_avg_secs.rename(index={"duration (seconds)":"Duration: Avg. Seconds"})

country,au,ca,gb,us
Duration: Avg. Seconds,252.5,29175.3,8343.6,5527.4


### Multi-Indexing

In [4]:
# Show the average seconds for each country and state and round to one decimal place
ufo_country_state = pd.pivot_table(converted_ufo_df, 
                                   index=['country','state'],
                                   values='duration (seconds)',
                                   aggfunc='mean').round(1)
# Show the table.
ufo_country_state.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,duration (seconds)
country,state,Unnamed: 2_level_1
au,al,900.0
au,dc,300.0
au,nt,180.0
au,oh,180.0
au,sa,152.5
au,wa,225.0
au,yt,30.0
ca,ab,1869.7
ca,bc,948.2
ca,mb,1291.4


In [5]:
# Show the number of UFOs for each country, state, and city. 
ufo_country_state_city_cnt = pd.pivot_table(converted_ufo_df, 
                                            index=['country','state','city'],
                                            values='shape',
                                            aggfunc='count')
# Show the table.
ufo_country_state_city_cnt.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,shape
country,state,city,Unnamed: 3_level_1
au,al,melbourne (australia),1
au,dc,maroochydore (queensland) (australia),1
au,nt,darwin (nt&#44 australia),2
au,oh,adelaide (south australia),1
au,sa,adelaide (south australia),1
au,sa,port adelaide (south australia),1
au,wa,cue (western australia) (australia),1
au,wa,perth (western australia),1
au,yt,port macquarie (australia),1
ca,ab,airdrie (canada),10


In [6]:
# Show the number of UFO sightings for each country, state, and city.  
# And, use `sort=False` to sort in descending order. 
ufo_country_state_city_cnt = pd.pivot_table(converted_ufo_df, 
                                           index=['country','state','city'],
                                           values='shape',
                                           aggfunc='count',
                                           sort=False)
# Show the table. 
ufo_country_state_city_cnt.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,shape
country,state,city,Unnamed: 3_level_1
us,tx,san marcos,20
us,tx,edna,3
us,hi,kaneohe,10
us,tn,bristol,22
us,ct,norwalk,20
us,al,pell city,4
us,fl,live oak,3
us,ca,hawthorne,11
us,nc,brevard,4
us,ny,bellmore,3


In [7]:
# Rename the "shape" column to "UFO Sightings"
ufo_country_state_city_sightings = ufo_country_state_city_cnt.rename(columns={"shape": "UFO Sightings"})

# Sort the pivot table to show the highest number of UFO sightings by country, state, and city.
ufo_country_state_city_sightings.sort_values(by=["UFO Sightings"], ascending=False).head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,UFO Sightings
country,state,city,Unnamed: 3_level_1
us,wa,seattle,471
us,az,phoenix,434
us,nv,las vegas,352
us,ca,los angeles,347
us,ca,san diego,327
us,or,portland,313
us,tx,houston,289
us,il,chicago,256
us,az,tucson,237
us,fl,miami,222


### Multi-Index and Multi-Aggregations

In [8]:
# Show the minimum and maximum seconds for each country and state.
ufo_country_state_min_max = pd.pivot_table(converted_ufo_df, 
                                           index=['country','state'],
                                           values='duration (seconds)',
                                           aggfunc=('min', 'max'))
# Show the table. 
ufo_country_state_min_max.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,max,min
country,state,Unnamed: 2_level_1,Unnamed: 3_level_1
au,al,900.0,900.0
au,dc,300.0,300.0
au,nt,300.0,60.0
au,oh,180.0,180.0
au,sa,300.0,5.0
au,wa,420.0,30.0
au,yt,30.0,30.0
ca,ab,259200.0,1.0
ca,bc,37800.0,0.02
ca,mb,36000.0,1.0
