In [1]:
import pandas as pd
import plotly.express as px
import statsmodels
import city_to_state
import us_state_abbrev
from enum import Enum

In [2]:
class AvoType(Enum):
    ORGANIC = 'organic'
    CONVENTIONAL = 'conventional'

In [3]:
city_to_state_dict = city_to_state.city_to_state_dict
us_state_abbrev_dict = us_state_abbrev.us_state_to_abbrev_dict

In [4]:
regions = {
    'West': ['Utah', 'New Mexico'],
    'Plains': ['Oklahoma', 'Kansas', 'Nebraska', 'South Dakota', 'Wyoming', 'Montana', 'North Dakota'],
    'Southeast': ['West Virginia', 'Virginia', 'Mississippi', 'Alabama'],
    'Northeast': ['New Jersey', 'Rhode Island', 'Vermont', 'New Hampshire', 'Maine', 'Delaware'],
    'Northern New England': ['New Jersey'],
    'Great Lakes': ['Wisconsin', 'Iowa']
}

In [5]:
def set_up():
    avocado_df = pd.read_csv('avocados.csv')
    # avocado_df['date'] = pd.to_datetime(avocado_df['date'])
    avocado_df['state'] = avocado_df['geography'].apply(to_state)
    avocado_df = pd.concat([avocado_df, get_from_all(avocado_df)], ignore_index=True)
    avocado_df['code'] = avocado_df['state'].apply(to_abbrev)
    avocado_df['total_volume'] = pd.to_numeric(avocado_df['total_volume'])
    avocado_df['average_price'] = pd.to_numeric(avocado_df['average_price'])
    # print(avocado_df.loc[(avocado_df['state'] == 'NaC')].geography.unique())
    # print(avocado_df.loc[(avocado_df['code'] == 'NaS')].geography.unique())

    return avocado_df

In [6]:
def show_yearly_map(df, typ):
    show_df = get_from_all_years(df, typ)

    color_scale = 'Reds'
    if typ == AvoType.CONVENTIONAL:
        color_scale = 'Blues'

    fig = px.choropleth(
        show_df,
        locations='code',
        locationmode="USA-states",
        color='average_price',
        hover_name='state',
        animation_frame='year',
        scope="usa",
        color_continuous_scale=color_scale,
        title=f'Prices of {typ.name} Avocados across the U.S. (Yearly)',
    )
    fig.show()

In [7]:
def show_monthly_map(df, typ):
    show_df = get_from_all_dates(df, typ)
    show_df['date'] = show_df['date'].replace('-', '', regex=True)
    show_df['date'] = pd.to_numeric(show_df['date'])
    show_df.date = show_df.date.apply(to_readable_date)

    color_scale = 'Reds'
    if typ == AvoType.CONVENTIONAL:
        color_scale = 'Blues'

    fig = px.choropleth(
        show_df,
        locations='code',
        locationmode="USA-states",
        color='average_price',
        hover_name='state',
        animation_frame='date',
        scope='usa',
        color_continuous_scale=color_scale,
        title=f'Prices of {typ.name} Avocados across the U.S. (Monthly)',
    )
    fig.show()

In [8]:
def get_from_all_years(df, typ):
    from_all = get_from_year(df, 2015, typ)
    for x in range(2016, 2021):
        from_all = (pd.concat([from_all, get_from_year(df, x, typ)], ignore_index=True))
    return from_all

In [9]:
def get_from_year(df, year, typ):
    year_df = df.loc[((df['year'] == year) & (df['type'] == typ.value))]
    temp = year_df.groupby(year_df.state)['average_price'].mean()

    res = pd.DataFrame(temp.index.values, columns=['state'])

    res['code'] = res['state'].apply(to_abbrev)
    res['average_price'] = temp.values
    res['year'] = year

    return res

In [10]:
def get_from_all(df):
    total_df = pd.DataFrame(columns=df.columns)

    for region in regions.keys():
        curr_df = df.loc[(df['geography'] == region)]
        for x in range(0, len(regions[region])):
            temp = curr_df.assign(state=regions[region][x])
            total_df = pd.concat([total_df, temp], ignore_index=True)

    return total_df

In [11]:
def get_from_all_dates(df, typ):
    from_all = get_from_date(df, df.date.unique()[0], typ)
    for x in range(1, len(df.date.unique())):
        from_all = (pd.concat([from_all, get_from_date(df, df.date.unique()[x], typ)], ignore_index=True))
    return from_all

In [12]:
def get_from_date(df, date, typ):
    date_df = df.loc[((df['date'] == date) & (df['type'] == typ.value))]
    temp = date_df.groupby(date_df.state)['average_price'].mean()

    res = pd.DataFrame(temp.index.values, columns=['state'])

    res['code'] = res['state'].apply(to_abbrev)
    res['average_price'] = temp.values
    res['date'] = date

    return res

In [13]:
def to_abbrev(state):
    if state in us_state_abbrev_dict:
        return us_state_abbrev_dict[state]
    else:
        return 'NaS'

In [14]:
def to_state(city):
    if city in city_to_state_dict:
        return city_to_state_dict[city]

    city = city.split("/")[0]
    if city in city_to_state_dict:
        return city_to_state_dict[city]
    if city in us_state_abbrev_dict:
        return city
    elif city == 'Hartford':
        return city_to_state_dict['West Hartford']
    elif city == 'Roanoke':
        return city_to_state_dict['Roanoke Rapids']
    else:
        return 'NaC'

In [15]:
def to_state(city):
    if city in city_to_state_dict:
        return city_to_state_dict[city]

    city = city.split("/")[0]
    if city in city_to_state_dict:
        return city_to_state_dict[city]
    if city in us_state_abbrev_dict:
        return city
    elif city == 'Hartford':
        return city_to_state_dict['West Hartford']
    elif city == 'Roanoke':
        return city_to_state_dict['Roanoke Rapids']
    else:
        return 'NaC'

In [16]:
def to_readable_date(date):
    date = date // 100
    year = date // 100
    month = date % 100
    return year + (month / 100)

In [17]:
def main():
    df = set_up()
    show_yearly_map(df, AvoType.ORGANIC)
    show_yearly_map(df, AvoType.CONVENTIONAL)
    show_monthly_map(df, AvoType.ORGANIC)
    show_monthly_map(df, AvoType.CONVENTIONAL)

    us_df = df.loc[(df['geography'] == 'Total U.S.')].copy(deep=False)
    us_df['date'] = pd.to_datetime(us_df['date'])
    us_fig = px.scatter(
        us_df,
        x='date',
        y='total_volume',
        color='type',
        size=us_df['average_price'].values,
        title='Total Volume of Avocados Sold in the U.S.',
        trendline="ols"
    )
    us_fig.show()

In [18]:
main()