In [17]:
import pandas as pd
import datetime
import plotly.express as px

In [225]:
def wrangle(file):
    df = pd.read_csv(file, parse_dates=["date"], index_col="date")
    df = df[~df.index.isna()]

    df["zipcode"] = [i.split()[-3].rstrip(",") for i in df["address"]]
    df = df[~(df["zipcode"] == "WA")]
    df["zipcode"] = df["zipcode"].astype("int64")

    df["month"] = [i.strftime("%Y-%m") for i in df.index]

    top_services = list(df["service_description"].value_counts()[:10].index)
    for i,v in enumerate(df["service_description"]):
        if v not in top_services:
            df["service_description"][i] = "Other"

    return df
data = wrangle("../data/mock_data.csv")
data.head()



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0_level_0,client_id,full_name,gender,age,service_description,address,latitude,longitude,zipcode,month
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2018-05-14,54892,Mehetabel Aylmore,Male,20,Case Management,"Gilman Dr W, Seattle, WA 98119, United States",47.636289,-122.371025,98119,2018-05
2018-05-14,49499,Elga Raeburn,Female,9,Case Management,"7th Avenue West, Seattle, WA 98119, United States",47.639123,-122.365667,98119,2018-05
2018-05-21,64034,Kimmie Richardot,Female,7,Case Management,"West Lee Street, Seattle, WA 98119, United States",47.629724,-122.369483,98119,2018-05
2018-09-17,42222,Adham Polack,Female,62,Case Management,"8th Avenue West, Seattle, WA 98119, United States",47.638473,-122.369279,98119,2018-09
2018-05-21,45792,Jeff Winspar,Female,8,Other,"14th Ave W, Seattle, WA 98119, United States",47.632918,-122.372471,98119,2018-05


In [226]:
def service_splits(categories,demographic):
    """
    Splits a categorical series and finds the frequency of each unique
    value crossed with the values of a specified demographic series.
    """
    return pd.crosstab(categories,
                       demographic,
                       margins=True, 
                    #    normalize="columns"
                       ).sort_values(by="All", ascending=False)

services = data["service_description"]
demo = data["gender"]
service_splits(services,demo)

gender,Female,Male,All
service_description,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
All,505,491,996
Case Management,287,286,573
Case/Care management,95,95,190
Bus Pass,22,19,41
Bus Tokens,20,19,39
Life Skills Classes,20,16,36
Other,17,17,34
Housing Search and Placement,19,12,31
Mental Health Counseling,8,9,17
Responsible Renters Class,6,7,13


In [269]:
def services_by_age(ages):
    """
    Splits a series of ages into age groups,
    gets the frequency of each age,
    then plots it into a pie chart.
    """
    age_groups = {}
    age_groups["<18"]   = (ages < 18).sum()
    age_groups["18-24"] = ((ages >= 18) & (ages < 25)).sum()
    age_groups["25-34"] = ((ages >= 25) & (ages < 35)).sum()
    age_groups["35-44"] = ((ages >= 35) & (ages < 45)).sum()
    age_groups["45-54"] = ((ages >= 45) & (ages < 55)).sum()
    age_groups["55+"]   = (ages >= 55).sum()

    age_groups = pd.Series(age_groups)

    fig = px.pie(age_groups,
                 values=age_groups,
                 names=age_groups.index,
                 labels={"index": "Age Group"},
                 title="Service Counts by Age Group",
                 )
    fig.update_traces(textinfo='percent+label')

    return fig.show()

services_by_age(data["age"])

In [231]:
fig = px.bar(pd.crosstab(data["month"],data["service_description"]),
             labels={"month": "Month", "value": "Service Counts", "service_description": "Services"},
             title="Service Frequency by Month")

fig.show()

In [265]:
# pd.Series([i.split(", ")[1] for i in data["address"]])

In [266]:
map = pd.read_csv("../data/services_by-zipcode.csv")
map.head()

Unnamed: 0,zipcode,counts,city,latitude,longitude
0,99201,16,Spokane,47.663945,-117.43185
1,99217,12,Spokane,47.707542,-117.33776
2,99021,9,Mead,47.81108,-117.22306
3,99037,8,Spokane Valley,47.643346,-117.19993
4,99202,7,Spokane,47.656692,-117.37899


In [267]:
fig = px.scatter_mapbox(map, lat="latitude", lon='longitude',
                     color="city", # which column to use to set the color of markers
                     hover_name="zipcode", # column added to hover information
                     size='counts',
                     zoom=4,
                     )

fig.update_layout(mapbox_style="open-street-map")
fig.show()