In [2]:
#import dependencies
import pandas as pd
from pathlib import Path

In [3]:
csv_file = Path("cleaned_ufo_au.csv")
shapedf = pd.read_csv(csv_file)
shapedf.head()

Unnamed: 0,country,state,city,shape,duration_hours,duration_minutes,duration_seconds,comments,date_posted,latitude,longitude,date,time
0,au,New South Wales,sydney (nsw&#44 australia),formation,0.05,3.0,180.0,formation and impact,20/11/2001,-33.861481,151.205475,10/10/2001,4:33:00
1,au,South Australia,adelaide (pt. wakefield) (south australia),circle,0.166667,10.0,600.0,one light became 3,28/10/2002,-34.928661,138.598633,10/10/2002,4:00:00
2,au,Tasmania,burnie (tasmania) (australia),cross,0.0,0.0,0.0,the craft was large and noisy,13/07/2005,-41.05584,145.903748,10/10/2002,22:00:00
3,au,South Australia,adelaide (south australia),oval,0.083333,5.0,300.0,Dancing &amp; Flashing UFO in Glenelg&#44 Sout...,8/10/2007,-34.928661,138.598633,1/10/2007,2:00:00
4,au,Western Australia,perth (w. of; southern cross) (western australia),fireball,0.25,15.0,900.0,fire ball and min min,18/06/2004,-31.95224,115.861397,12/10/1975,1:00:00


In [4]:
shapedf.columns

Index(['country', 'state', 'city', 'shape', 'duration_hours',
       'duration_minutes', 'duration_seconds', 'comments', 'date_posted',
       'latitude', 'longitude', 'date', 'time'],
      dtype='object')

In [5]:
#create df for chart 
c_columns = ['state','shape']
columns_for_chart_df = shapedf[c_columns].copy()
print(columns_for_chart_df)


                 state      shape
0      New South Wales  formation
1      South Australia     circle
2             Tasmania      cross
3      South Australia       oval
4    Western Australia   fireball
..                 ...        ...
572    New South Wales   fireball
573  Western Australia     sphere
574    New South Wales      light
575         Queensland  rectangle
576           Victoria     circle

[577 rows x 2 columns]


In [6]:
#group data by state and UFO shape
counts_df = columns_for_chart_df.groupby(['state', 'shape']).size().reset_index(name='count')
print(counts_df)


                            state      shape  count
0    Australian Capital Territory   changing      1
1    Australian Capital Territory      cigar      1
2    Australian Capital Territory     circle      4
3    Australian Capital Territory       cone      1
4    Australian Capital Territory    diamond      2
..                            ...        ...    ...
106             Western Australia  rectangle      4
107             Western Australia     sphere      5
108             Western Australia   teardrop      1
109             Western Australia   triangle      7
110             Western Australia    unknown      5

[111 rows x 3 columns]


In [9]:
counts_df.to_csv('shape.csv', index=False)

In [10]:
counts_df.head()


Unnamed: 0,state,shape,count
0,Australian Capital Territory,changing,1
1,Australian Capital Territory,cigar,1
2,Australian Capital Territory,circle,4
3,Australian Capital Territory,cone,1
4,Australian Capital Territory,diamond,2


In [17]:
from bokeh.plotting import figure, show
from bokeh.io import output_file
from bokeh.palettes import Category20
from bokeh.models import HoverTool

# Prepare the data for the bar chart
states = counts_df['state'].unique()
shapes = counts_df['shape'].unique()
colors = Category20[len(states)]

# Create a dictionary to store the count of shapes for each state
state_shape_count = {state: counts_df[counts_df['state'] == state].set_index('shape')['count'].to_dict() for state in states}

# Set up the bar chart
output_file("shape_chart.html")
p = figure(x_range=shapes, plot_height=400, plot_width=800, title="Count of UFO Shapes by State",
           toolbar_location=None, tools="")

# Plot the bars for each state
bars = []
for i, state in enumerate(states):
    bar = p.vbar(x=shapes, top=[state_shape_count[state].get(shape, 0) for shape in shapes], width=0.8,
           color=colors[i], legend_label=state)
    bars.append(bar)

#Add hover tool with labels
hover = HoverTool(renderers=bars,
                  tooltips=[('Shape', '@x'), ('Count', '@top')],
                  mode='vline')

# Adjust hover tool styling
hover.point_policy = "follow_mouse"
hover.line_policy = "nearest"
hover.mode = "vline"
hover.formatters = {'Count': 'numeral'}  # Format count value 

# Customize hover tooltip appearance
hover.tooltips = """
    <div>
        <span style="font-size: 12px;"><b>Shape:</b> @x</span><br>
        <span style="font-size: 12px;"><b>Count:</b> @top</span>
    </div>
"""

p.add_tools(hover)

p.xgrid.grid_line_color = None
p.y_range.start = 0
p.y_range.end = counts_df['count'].max() + 5
p.legend.location = "top_right"
p.legend.orientation = "vertical"
p.xaxis.major_label_orientation = 1.2

# Show the bar chart
show(p)
