In [1]:
import pandas as pd
import numpy as np


In [2]:
# Transaction sales data (1% population sample)
sales = pd.read_csv(
    "system2_case_study_transaction_data_monthly_sales_by_locationid.csv",
    parse_dates=["period_start", "period_end"]
)

# Store open / close dates (transaction location IDs)
location_info = pd.read_csv(
    "system2_case_study_transaction_data_location_info.csv",
    parse_dates=["est_open_date", "est_close_date"]
)

# Reported comparable sales through Q3-2022
reported = pd.read_excel(
    "system2_case_study_reported_numbers.xlsx"
)

# Web-scraped store location data (used for closure analysis)
web_locations = pd.read_csv(
    "system2_case_study_webscrape_data_locations.csv"
)

In [3]:
# Number of services offered per store
web_locations["num_services"] = (
    web_locations["store_services"]
    .fillna("")
    .str.count("\|") + 1
)

# Flag urban vs suburban (rough heuristic via ZIP length)
web_locations["suburban_flag"] = (
    web_locations["zipcode"].astype(str).str.len() == 5
)


In [10]:
web_locations.head(5)

Unnamed: 0,address,as_of_date,city,phone,state,location_id,store_services,title,url,zipcode,postcode,latitude,longitude,num_services,suburban_flag,closure_risk_score
0,13477 Middlebelt Road,2024-10-07,Livonia,(734) 666-3019,Michigan,539,In-Store Shopping | In-Store Pickup | Curbside...,Livonia Commons,https://stores.partycity.com/us/mi/livonia/par...,48150,48150,42.381156,-83.33577,6,True,1
1,5114 28th Street SE,2024-10-07,Grand Rapids,(616) 365-5419,Michigan,4107,In-Store Shopping | In-Store Pickup | Curbside...,Waterfall Shoppes,https://stores.partycity.com/us/mi/grandrapids...,49512,49512,42.910841,-85.540782,6,True,1
2,2677 Oak Valley Dr,2024-10-07,Ann Arbor,(734) 519-5591,Michigan,4111,In-Store Shopping | In-Store Pickup | Curbside...,Oak Valley Plaza,https://stores.partycity.com/us/mi/annarbor/pa...,48103,48103,42.24749,-83.768959,6,True,1
3,3000 White Bear Avenue North,2024-10-07,Maplewood,(612) 428-0697,Minnesota,1138,In-Store Shopping | In-Store Pickup | Curbside...,Plaza 3000 Shopping Center,https://stores.partycity.com/us/mn/maplewood/p...,55109,55109,45.032238,-93.014647,6,True,1
4,2560 Lemay Ferry Road,2024-10-07,Saint Louis,(314) 396-2245,Missouri,5163,In-Store Shopping | In-Store Pickup | Curbside...,Lemay Plaza,https://stores.partycity.com/us/mo/saintlouis/...,63125,63125,38.518385,-90.305049,6,True,1


In [8]:
web_locations["closure_risk_score"] = (
    (web_locations["num_services"] <= 3).astype(int) +
    web_locations["suburban_flag"].astype(int)
)

# Higher score = higher closure risk
high_risk_stores = web_locations.sort_values(
    "closure_risk_score",
    ascending=False
)

high_risk_stores.head()

Unnamed: 0,address,as_of_date,city,phone,state,location_id,store_services,title,url,zipcode,postcode,latitude,longitude,num_services,suburban_flag,closure_risk_score
365,2010 S.W. Westport Drive,2024-10-07,Topeka,(785) 596-6093,Kansas,1190,In-Store Shopping | Helium,South West Westport,https://stores.partycity.com/us/ks/topeka/part...,66604,66604,39.030716,-95.756562,2,True,2
64,8330 Rio San Diego Drive,2024-10-07,San Diego,(619) 210-2313,California,1239,In-Store Shopping | In-Store Pickup | Helium,Rio San Diego Plaza,https://stores.partycity.com/us/ca/sandiego/pa...,92108,92108,32.775187,-117.146604,3,True,2
654,11465 Carmel Mountain Road,2024-10-07,San Diego,(619) 763-1893,California,1241,In-Store Shopping | In-Store Pickup | Helium,Carmel Mountain Rd,https://stores.partycity.com/us/ca/sandiego/pa...,92128,92128,32.978036,-117.083886,3,True,2
91,10861 Old Halls Ferry Road,2024-10-07,Ferguson,(314) 669-8834,Missouri,412,In-Store Shopping | Helium,Central City Shopping Center,https://stores.partycity.com/us/mo/ferguson/pa...,63136,63136,38.767854,-90.262371,2,True,2
286,15242 Summit Avenue,2024-10-07,Fontana,(909) 899-2233,California,361,In-Store Shopping | Helium,Falcon Ridge Town Center,https://stores.partycity.com/us/ca/fontana/par...,92336,92336,34.152594,-117.475,2,True,2


In [11]:
len(high_risk_stores)

747

In [12]:
len(web_locations)

747

In [16]:
# Flag high-risk stores
web_locations["high_risk_flag"] = web_locations["closure_risk_score"] == 2

# Count stores
store_counts = web_locations["high_risk_flag"].value_counts().rename(
    index={True: "High Risk", False: "Not High Risk"}
)

store_counts

high_risk_flag
Not High Risk    706
High Risk         41
Name: count, dtype: int64

In [15]:
store_share = (
    web_locations["high_risk_flag"]
    .value_counts(normalize=True)
    .rename(index={True: "High Risk", False: "Not High Risk"})
    * 100
)

store_share

high_risk_flag
Not High Risk    94.511379
High Risk         5.488621
Name: proportion, dtype: float64

In [17]:
# Pie chart
plt.figure(figsize=(6,6))
colors = ["red", "green"]
plt.pie(risk_counts, labels=risk_counts.index, autopct="%1.1f%%", colors=colors, startangle=90)
plt.title("Party City Store Closure Risk Distribution")
plt.show()


NameError: name 'plt' is not defined