# Mustang Startup Analysis

In [8]:
import datetime as dt
import pandas as pd
import plotly.express as px

In [9]:
# Parameters

# The fake date to use for analaysis
fake_year, fake_month, fake_day = 2024, 1, 1
fake_date: str = '-'.join([str(fake_year), str(fake_month), str(fake_day)])

# Time definition (as a string) of the start of the afternoon
midday: str = '12:00'
midday_timestamp: pd.Timestamp = pd.Timestamp(f'{fake_date} {midday}')

# Set the number of minutes in each histogram bin
minutes_per_bin: int = 15

In [10]:
# Read In Google Sheet as a CSV
sheet_url: str = 'https://docs.google.com/spreadsheets/d/1LtKiSpGpBzPkVc2DjBDl_5yjR75bWKkU_Qm2IreuJik/export?format=csv'
raw_df: pd.DataFrame = pd.read_csv(sheet_url)

# Convert the times to a Pandas datetime all on the same day for analysis
# purposes and sort on the time
raw_df['time'] = pd.to_datetime(f'{fake_date} ' + raw_df['Start Time'])
raw_df = raw_df.sort_values(by=['time'])


In [11]:
# Collect the morning events only and set the bins based on the timespan of
# events within those hours
morning_df: pd.DataFrame =raw_df[(raw_df['time'] < midday_timestamp)]
morning_start_hour: int = min(morning_df['time']).hour
morning_start_dt: dt.datetime = dt.datetime(fake_year, fake_month, fake_day, morning_start_hour, 0)
morning_end_hour: int = max(morning_df['time']).hour + 1
morning_end_dt: dt.datetime =  dt.datetime(fake_year, fake_month, fake_day, morning_end_hour, 59)
morning_bins: int = (morning_end_hour - morning_start_hour) * (60 // minutes_per_bin)

In [12]:
morning_fig = px.histogram(morning_df, x='time', nbins=morning_bins)
morning_fig.update_xaxes(type='date', 
                 tickformat='%H:%M', 
                 nticks=morning_bins, 
                 range=[morning_start_dt, morning_end_dt])
morning_fig.update_layout(bargap=0.1)
morning_fig.update_layout(
    title=f'Morning Start Times {min(raw_df['Date'])} to {max(raw_df['Date'])}',
    xaxis_title=f'Time on {minutes_per_bin} minute Bins',
    yaxis_title='Count',
)
morning_fig.show()

In [13]:
# Collect the afternoon events only and set the bins based on the timespan of
# events within those hours
afternoon_df: pd.DataFrame =raw_df[(raw_df['time'] >= midday_timestamp)]
afternoon_start_hour: int = min(afternoon_df['time']).hour
afternoon_start_dt: dt.datetime = dt.datetime(fake_year, fake_month, fake_day, afternoon_start_hour, 0)
afternoon_end_hour: int = max(afternoon_df['time']).hour + 1
afternoon_end_dt: dt.datetime =  dt.datetime(fake_year, fake_month, fake_day, afternoon_end_hour, 59)
afternoon_bins: int = (afternoon_end_hour - afternoon_start_hour) * (60 // minutes_per_bin)


In [14]:
afternoon_fig = px.histogram(afternoon_df, x='time', nbins=afternoon_bins)
afternoon_fig.update_xaxes(type='date', 
                 tickformat='%H:%M', 
                 nticks=afternoon_bins, 
                 range=[afternoon_start_dt, afternoon_end_dt])
afternoon_fig.update_layout(bargap=0.1)
afternoon_fig.update_layout(
    title=f'Afternoon Start Times {min(raw_df['Date'])} to {max(raw_df['Date'])}',
    xaxis_title=f'Time on {minutes_per_bin} minute Bins',
    yaxis_title='Count',
)
morning_fig.show()