In [1]:
import pandas as pd
import plotly.express as px
import statsmodels
import city_to_state
import us_state_abbrev
from enum import Enum

In [2]:
class AvoType(Enum):
    ORGANIC = 'organic'
    CONVENTIONAL = 'conventional'

In [3]:
class MapType(Enum):
    MONTHLY = 'year_n_month'
    YEARLY = 'year'

In [4]:
city_to_state_dict = city_to_state.city_to_state_dict
us_state_abbrev_dict = us_state_abbrev.us_state_to_abbrev_dict

In [5]:
regions = {
    'West': ['Utah', 'New Mexico'],
    'Plains': ['Oklahoma', 'Kansas', 'Nebraska', 'South Dakota', 'Wyoming', 'Montana', 'North Dakota'],
    'Southeast': ['West Virginia', 'Virginia', 'Mississippi', 'Alabama'],
    'Northeast': ['New Jersey', 'Rhode Island', 'Vermont', 'New Hampshire', 'Maine', 'Delaware'],
    'Northern New England': ['New Jersey'],
    'Great Lakes': ['Wisconsin', 'Iowa']
}

In [6]:
df = pd.DataFrame()

In [7]:
def set_up():
    avocado_df = pd.read_csv('avocados.csv')

    # float year and month column (year.month)
    avocado_df['year_n_month'] = avocado_df['date'].replace('-', '', regex=True)
    avocado_df['year_n_month'] = pd.to_numeric(avocado_df['year_n_month'])
    avocado_df['year_n_month'] = avocado_df['year_n_month'].apply(to_readable_date)

    # state column from city
    avocado_df['state'] = avocado_df['geography'].apply(to_state)
    # get states not included from regions (plains, northern new england, great lakes)
    avocado_df = pd.concat([avocado_df, get_all_non_included_states_from_regions(avocado_df)], ignore_index=True)
    # code column from state names (for map visualization)
    avocado_df['code'] = avocado_df['state'].apply(to_abbrev)

    # print(avocado_df.loc[(avocado_df['state'] == 'NaC')].geography.unique())
    # print(avocado_df.loc[(avocado_df['code'] == 'NaS')].geography.unique())

    return avocado_df

In [8]:
def get_all_non_included_states_from_regions(avocado_df):
    total_df = avocado_df[0:0]
    for region in regions.keys():
        curr_df = avocado_df.loc[(avocado_df['geography'] == region)]
        for x in range(0, len(regions[region])):
            temp = curr_df.assign(state=regions[region][x])
            total_df = pd.concat([total_df, temp], ignore_index=True)
    return total_df

In [9]:
def show_map(map_type, avo_typ):
    show_df = get_from_all(map_type, avo_typ)

    color_scale = 'deep'
    if avo_typ == AvoType.CONVENTIONAL:
        color_scale = 'amp'

    max_val = show_df.loc[(df['type'] == avo_typ.value), :]['average_price'].max()
    min_val = show_df.loc[(df['type'] == avo_typ.value), :]['average_price'].min()

    fig = px.choropleth(
        show_df,
        locations='code',
        locationmode='USA-states',
        hover_name='state',
        color='average_price',
        color_continuous_scale=color_scale,
        range_color=(min_val, max_val),
        animation_frame=map_type.value,
        scope='usa',
        title=f'Prices of {avo_typ.name} Avocados across the U.S. ({map_type.name})',
        labels={'average_price': 'average price', 'year_n_month': 'date'}
    )
    fig.show()

In [10]:
def get_from_all(map_typ, avo_typ):
    chosen_df = df.copy()

    from_all = chosen_df[0:0]
    for x in range(0, len(chosen_df[map_typ.value].unique())):
        from_all = (pd.concat(
            [from_all,
             get_from(map_typ, chosen_df[map_typ.value].unique()[x], avo_typ)],
            ignore_index=True))
    return from_all

In [11]:
def get_from(map_typ, val, avo_typ):
    vals_df = df.loc[((df[map_typ.value] == val) & (df['type'] == avo_typ.value))]
    temp = vals_df.groupby(vals_df.state)['average_price'].mean()

    res = pd.DataFrame(temp.index.values, columns=['state'])

    res['code'] = res['state'].apply(to_abbrev)
    res['average_price'] = temp.values
    res[map_typ.value] = val
    res = res.drop(res[res['state'] == 'NaC'].index)

    return res

In [12]:
def to_abbrev(state):
    if state in us_state_abbrev_dict:
        return us_state_abbrev_dict[state]
    else:
        return 'NaS'

In [13]:
def to_state(city):
    if city in city_to_state_dict:
        return city_to_state_dict[city]

    city = city.split("/")[0]
    if city in city_to_state_dict:
        return city_to_state_dict[city]
    if city in us_state_abbrev_dict:
        return city
    elif city == 'Hartford':
        return city_to_state_dict['West Hartford']
    elif city == 'Roanoke':
        return city_to_state_dict['Roanoke Rapids']
    else:
        return 'NaC'

In [14]:
def to_readable_date(date):
    date = date // 100
    year = date // 100
    month = date % 100
    return year + (month / 100)

In [15]:
def show_scatter():
    us_df = df.loc[(df['geography'] == 'Total U.S.'), :].copy(deep=False)
    us_df['date'] = pd.to_datetime(us_df['date'])
    us_fig = px.scatter(
        us_df,
        x='date',
        y='total_volume',
        color='type',
        size=us_df['average_price'].values,
        title='Total Volume of Avocados Sold in the U.S.',
        trendline="ols",
    )
    us_fig.show()

In [16]:
def main():
    global df
    df = set_up()

    show_map(MapType.YEARLY, AvoType.ORGANIC)
    show_map(MapType.YEARLY, AvoType.CONVENTIONAL)
    show_map(MapType.MONTHLY, AvoType.ORGANIC)
    show_map(MapType.MONTHLY, AvoType.CONVENTIONAL)

    show_scatter()

In [17]:
main()