In [None]:
import sys
import plotly.graph_objects as go

# Allow importing from parent directory by temporarily moving the CWD up one level
# Very hacky, but there literally isn't a simpler way (in Jupyter)
sys.path.append("../")
from common import get_dataframe_from_pipeline
from plotly.subplots import make_subplots

outages = get_dataframe_from_pipeline("../pipeline/1.csv.gz")
postOutages = get_dataframe_from_pipeline("../pipeline/3.csv.gz")
# Drop the path back down after import
sys.path.pop()

# Create subplots with 1 row and 2 columns
fig = make_subplots(
  rows=1,
  cols=2,
  specs=[[{"type": "domain"}, {"type": "domain"}]],
  subplot_titles=["Pre-Filtering", "Post-Filtering"],
)

In [None]:
# Calculate the value counts and percentages
value_counts = outages["cause"].value_counts()
percentages = value_counts / value_counts.sum()

# Define a threshold for small percentages
threshold = 0.001  # 0.1%

# Combine small percentage regions into "Other"
labels = [
  label if percentages[label] >= threshold else "Other" for label in value_counts.index
]
values = [
  value_counts[label]
  if percentages[label] >= threshold
  else value_counts[percentages < threshold].sum()
  for label in value_counts.index
]

# Add the pie chart with modified labels and values
fig.add_trace(
  go.Pie(
    values=values,
    labels=labels,
    hole=0.3,
    name="Pre-Filtering",
  ),
  row=1,
  col=1
)

print("Keep going!")

In [None]:
value_counts = postOutages["outageCause"].value_counts()
percentages = value_counts / value_counts.sum()

# Combine small percentage regions into "Other"
labels = [
  label if percentages[label] >= threshold else "Other" for label in value_counts.index
]
values = [
  value_counts[label]
  if percentages[label] >= threshold
  else value_counts[percentages < threshold].sum()
  for label in value_counts.index
]

fig.add_trace(
  go.Pie(
    values=values,
    labels=labels,
    hole=0.3,
    name="Post-Filtering",
  ),
  row=1,
  col=2,
)

# Center title
fig.update_layout(title="Causes of Power Outages", title_x=0.5)

fig.update_traces(textposition="inside")
fig.update_layout(uniformtext_minsize=12, uniformtext_mode="hide")

fig.show()

In [None]:
import os

# Save the map
# Make directory if it doesn't exist
if not os.path.exists("html"):
  os.makedirs("html")

fig.write_html("html/outageCauses.html", include_plotlyjs="cdn")