In [None]:
import sys
import plotly.express as px

# Allow importing from parent directory by temporarily moving the CWD up one level
# Very hacky, but there literally isn't a simpler way (in Jupyter)
sys.path.append("../../")
from common import get_dataframe_from_pipeline

outages = get_dataframe_from_pipeline("../../pipeline/1.csv.gz")
# Drop the path back down after import
sys.path.pop()

In [None]:
# Calculate the value counts and percentages
value_counts = outages["cause"].value_counts()
percentages = value_counts / value_counts.sum()

# Define a threshold for small percentages
threshold = 0.001  # 0.1%

# Combine small percentage regions into "Other"
labels = [
  label if percentages[label] >= threshold else "Other"
  for label in value_counts.index
]
values = [
  value_counts[label] if percentages[label] >= threshold else value_counts[percentages < threshold].sum()
  for label in value_counts.index
]

# Add the pie chart with modified labels and values
fig = px.pie(
  values=values,
  names=labels,
  hole=0.4,
  title="Causes of Power Outages (Pre-Filtering)",
)

# Center title
fig.update_layout(title_x=0.5)

fig.update_traces(textposition='inside')
fig.update_layout(uniformtext_minsize=12, uniformtext_mode='hide')

In [None]:
import os

# Save the map
# Make directory if it doesn't exist
if not os.path.exists("html"):
  os.makedirs("html")

fig.write_html("html/preCleanOutageCauses.html", include_plotlyjs="cdn")