In [2]:
import pandas as pd
import sqlite3

In [3]:
conn = sqlite3.connect('dialogue_analysis.db')

In [4]:
episodes_df = pd.read_csv('Sample Data/episodes.csv')
organizations_monthly_df = pd.read_csv('Sample Data/organizations_monthly.csv')
cost_to_serve_monthly_df = pd.read_csv('Sample Data/cost_to_serve_monthly.csv')

In [5]:
episodes_df.to_sql('episodes', conn, if_exists='replace', index=False)
organizations_monthly_df.to_sql('organizations_monthly', conn, if_exists='replace', index=False)
cost_to_serve_monthly_df.to_sql('cost_to_serve_monthly', conn, if_exists='replace', index=False)

12

## Utilization Analysis

Previewing Tables

In [6]:
# episodes table
episodes_preview = pd.read_sql_query("SELECT * FROM episodes LIMIT 5;", conn)
print("Episodes Preview:")
display(episodes_preview)

# organizations_monthly table
org_monthly_preview = pd.read_sql_query("SELECT * FROM organizations_monthly LIMIT 5;", conn)
print("\nOrganizations Monthly Preview:")
display(org_monthly_preview)

# cost_to_serve_monthly table
cost_serve_monthly_preview = pd.read_sql_query("SELECT * FROM cost_to_serve_monthly LIMIT 5;", conn)
print("\nCost to Serve Monthly Preview:")
display(cost_serve_monthly_preview)

Episodes Preview:


Unnamed: 0,EPISODE_ID,ORGANIZATION_ID,EPISODE_CREATED_AT,PROGRAM,OUTCOME
0,d0d178b23004534a5ff6723959785ba20ce1971d,5,2023-04-10T12:31:41.107Z,primary_care,walkin_clinic
1,ea85cffb681a06788b364d9ce49b61649f8688a5,5,2023-05-26T12:19:44.878Z,primary_care,walkin_clinic
2,7624c12a4ef8847e1951c0918d7193a9c3326dc1,5,2023-06-02T11:22:07.744Z,primary_care,md_np_appointment
3,9d994173c5775829c6b6badd898972522f038794,5,2023-11-24T14:00:17.658Z,primary_care,mhs_coaching
4,6be180209d276715e9256a78e8a73b13fe6a460d,2,2023-02-25T12:20:49.917Z,primary_care,md_np_appointment



Organizations Monthly Preview:


Unnamed: 0,DATE_MONTH,ORGANIZATION_ID,ELIGIBLE_MEMBERS,PEPM,HAS_PRIMARY_CARE_SINCE,HAS_MENTAL_HEALTH_SINCE,HAS_EAP_SINCE
0,2023-10-01T00:00:00Z,6,185,7.47,2000-01-01T00:00:00Z,,
1,2023-06-01T00:00:00Z,6,219,7.47,2000-01-01T00:00:00Z,,
2,2023-01-01T00:00:00Z,1,268,15.71,2000-01-01T00:00:00Z,2021-01-19T22:58:24.2361Z,2021-11-01T16:05:28.0127Z
3,2023-03-01T00:00:00Z,1,272,15.71,2000-01-01T00:00:00Z,2021-01-19T22:58:24.2361Z,2021-11-01T16:05:28.0127Z
4,2023-09-01T00:00:00Z,4,1436,14.21,2000-01-01T00:00:00Z,2021-06-01T12:15:33.8532Z,2023-04-05T21:32:35.77486Z



Cost to Serve Monthly Preview:


Unnamed: 0,DATE_MONTH,COST_TO_SERVE_PRIMARY_CARE,COST_TO_SERVE_MENTAL_HEALTH,COST_TO_SERVE_EAP
0,2023-01-01,60.04,334.29,112.27
1,2023-02-01,62.9,312.29,111.19
2,2023-03-01,62.96,298.27,119.29
3,2023-04-01,63.77,320.8,120.93
4,2023-05-01,65.27,276.54,114.95


Number of episodes by category for the year 2023: According to the result. EAP was used 67 times, there were 112 mental health consultations and 550 consultations for primary care

In [7]:
query = """
SELECT PROGRAM, COUNT(*) AS EPISODES_COUNT
FROM episodes
WHERE ORGANIZATION_ID = 4
GROUP BY PROGRAM;
"""
df = pd.read_sql_query(query, conn)
display(df)

Unnamed: 0,PROGRAM,EPISODES_COUNT
0,eap,67
1,mental_health,112
2,primary_care,550


Episodes opened by month. The following table represents the user engagement during the different months.

In [8]:
query_monthly_episodes = """
SELECT
  strftime('%Y-%m', EPISODE_CREATED_AT) AS month,
  COUNT(EPISODE_ID) AS episodes_opened
FROM episodes
WHERE organization_id = 4
GROUP BY month
ORDER BY month;
"""

df = pd.read_sql_query(query_monthly_episodes, conn)
display(df)

Unnamed: 0,month,episodes_opened
0,2023-01,1
1,2023-02,1
2,2023-05,158
3,2023-06,89
4,2023-07,78
5,2023-08,76
6,2023-09,58
7,2023-10,90
8,2023-11,90
9,2023-12,88


Eligible members per month. From the month of May there is a spike in user eligibility and the number of users slowly increase until it reaches 1500 users by the end of the year

In [9]:
query_monthly_members = """
SELECT
  strftime('%Y-%m', date_month) AS month,
  eligible_members
FROM organizations_monthly
WHERE organization_id = 4
GROUP BY month
ORDER BY month;
"""

df = pd.read_sql_query(query_monthly_members, conn)
display(df)

Unnamed: 0,month,ELIGIBLE_MEMBERS
0,2023-01,59
1,2023-02,43
2,2023-03,42
3,2023-04,43
4,2023-05,1478
5,2023-06,1466
6,2023-07,1453
7,2023-08,1429
8,2023-09,1436
9,2023-10,1497


Combining queries in order to obtain member's utilization rate. The utilization rate spikes when there is an increase in user eligibility which happens in May 2023.

In [10]:
query_utilization_rate = """
WITH monthly_episodes AS (
  SELECT
    strftime('%Y-%m', EPISODE_CREATED_AT) AS month,
    COUNT(EPISODE_ID) AS episodes_opened
  FROM episodes
  WHERE organization_id = 4
  GROUP BY month
),
monthly_members AS (
  SELECT
    strftime('%Y-%m', date_month) AS month,
    eligible_members
  FROM organizations_monthly
  WHERE organization_id = 4
  GROUP BY month
)
SELECT
  me.month,
  me.episodes_opened,
  mm.eligible_members,
  ROUND(((me.episodes_opened * 1.0/ mm.eligible_members) * 100), 2) AS utilization_rate_percentage
FROM monthly_episodes me
JOIN monthly_members mm ON me.month = mm.month
ORDER BY me.month;
"""

df = pd.read_sql_query(query_utilization_rate, conn)
display(df)

Unnamed: 0,month,episodes_opened,eligible_members,utilization_rate_percentage
0,2023-01,1,59,1.69
1,2023-02,1,43,2.33
2,2023-05,158,1478,10.69
3,2023-06,89,1466,6.07
4,2023-07,78,1453,5.37
5,2023-08,76,1429,5.32
6,2023-09,58,1436,4.04
7,2023-10,90,1497,6.01
8,2023-11,90,1500,6.0
9,2023-12,88,1528,5.76


Trends in utilization.
- The spike in user eligibility in May increases the rates of the service utilization.
- After the month of May, the rates decrease and only increase by 1.97% in October.

In [11]:
query_utilization_trends = """
WITH monthly_episodes AS (
  SELECT
    strftime('%Y-%m', EPISODE_CREATED_AT) AS month,
    COUNT(EPISODE_ID) AS episodes_opened
  FROM episodes
  WHERE ORGANIZATION_ID = 4
  GROUP BY month
),
monthly_members AS (
  SELECT
    strftime('%Y-%m', date_month) AS month,
    eligible_members
  FROM organizations_monthly
  WHERE ORGANIZATION_ID = 4
  GROUP BY month
),
utilization_rates AS (
  SELECT
    me.month,
    me.episodes_opened,
    mm.eligible_members,
    ROUND(((me.episodes_opened * 1.0 / mm.eligible_members) * 100), 2) AS utilization_rate_percentage
  FROM monthly_episodes me
  JOIN monthly_members mm ON me.month = mm.month
),
utilization_trends AS (
  SELECT
    month,
    utilization_rate_percentage,
    utilization_rate_percentage - LAG(utilization_rate_percentage) OVER (ORDER BY month) AS mom_change
  FROM utilization_rates
)
SELECT
  month,
  utilization_rate_percentage,
  mom_change,
  CASE
    WHEN mom_change > 0 THEN 'Increase'
    WHEN mom_change < 0 THEN 'Decrease'
    ELSE 'Stable'
  END AS trend
FROM utilization_trends
ORDER BY month;
"""

df = pd.read_sql_query(query_utilization_trends, conn)
display(df)

Unnamed: 0,month,utilization_rate_percentage,mom_change,trend
0,2023-01,1.69,,Stable
1,2023-02,2.33,0.64,Increase
2,2023-05,10.69,8.36,Increase
3,2023-06,6.07,-4.62,Decrease
4,2023-07,5.37,-0.7,Decrease
5,2023-08,5.32,-0.05,Decrease
6,2023-09,4.04,-1.28,Decrease
7,2023-10,6.01,1.97,Increase
8,2023-11,6.0,-0.01,Decrease
9,2023-12,5.76,-0.24,Decrease


Episodes by program type per month. When the rates of utilization increase, the program that is used the most is primary care

In [12]:
query_changes_utilization_rate = """
SELECT
  strftime('%Y-%m', EPISODE_CREATED_AT) AS month,
  PROGRAM,
  COUNT(EPISODE_ID) AS episodes_count
FROM episodes
WHERE organization_id = 4
GROUP BY month, PROGRAM
ORDER BY month, PROGRAM;
"""

df = pd.read_sql_query(query_changes_utilization_rate, conn)
display(df)

Unnamed: 0,month,PROGRAM,episodes_count
0,2023-01,primary_care,1
1,2023-02,primary_care,1
2,2023-05,eap,21
3,2023-05,mental_health,27
4,2023-05,primary_care,110
5,2023-06,eap,7
6,2023-06,mental_health,19
7,2023-06,primary_care,63
8,2023-07,eap,9
9,2023-07,mental_health,9


Adjusting the query to create separate columns for each program type

In [13]:
query_changes_utilization_rate_pivot = """
SELECT
  strftime('%Y-%m', EPISODE_CREATED_AT) AS month,
  SUM(CASE WHEN PROGRAM = 'primary_care' THEN 1 ELSE 0 END) AS primary_care,
  SUM(CASE WHEN PROGRAM = 'eap' THEN 1 ELSE 0 END) AS eap,
  SUM(CASE WHEN PROGRAM = 'mental_health' THEN 1 ELSE 0 END) AS mental_health
FROM episodes
WHERE ORGANIZATION_ID = 4
GROUP BY month
ORDER BY month;
"""

df = pd.read_sql_query(query_changes_utilization_rate_pivot, conn)
display(df)

Unnamed: 0,month,primary_care,eap,mental_health
0,2023-01,1,0,0
1,2023-02,1,0,0
2,2023-05,110,21,27
3,2023-06,63,7,19
4,2023-07,60,9,9
5,2023-08,61,10,5
6,2023-09,44,2,12
7,2023-10,67,8,15
8,2023-11,67,5,18
9,2023-12,76,5,7


## Financial Analysis

Monthly costs to serve per program type including total costs per month

In [14]:
query_monthly_costs = """
WITH episodes_per_month AS (
  SELECT
    strftime('%Y-%m', EPISODE_CREATED_AT) AS month,
    SUM(CASE WHEN PROGRAM = 'primary_care' THEN 1 ELSE 0 END) AS primary_care_episodes,
    SUM(CASE WHEN PROGRAM = 'eap' THEN 1 ELSE 0 END) AS eap_episodes,
    SUM(CASE WHEN PROGRAM = 'mental_health' THEN 1 ELSE 0 END) AS mental_health_episodes
  FROM episodes
  WHERE ORGANIZATION_ID = 4
  GROUP BY month
)
SELECT
   e.month,
  (e.primary_care_episodes * c.cost_to_serve_primary_care) AS total_cost_primary_care,
  (e.eap_episodes * c.cost_to_serve_eap) AS total_cost_eap,
  (e.mental_health_episodes * c.cost_to_serve_mental_health) AS total_cost_mental_health,
  (e.primary_care_episodes * c.cost_to_serve_primary_care) +
  (e.eap_episodes * c.cost_to_serve_eap) +
  (e.mental_health_episodes * c.cost_to_serve_mental_health) AS total_cost_to_serve
FROM episodes_per_month e
INNER JOIN cost_to_serve_monthly c ON e.month = strftime('%Y-%m', c.date_month)
ORDER BY e.month;
"""

df = pd.read_sql_query(query_monthly_costs, conn)
display(df)

Unnamed: 0,month,total_cost_primary_care,total_cost_eap,total_cost_mental_health,total_cost_to_serve
0,2023-01,60.04,0.0,0.0,60.04
1,2023-02,62.9,0.0,0.0,62.9
2,2023-05,7179.7,2413.95,7466.58,17060.23
3,2023-06,4261.32,931.14,5548.19,10740.65
4,2023-07,3961.2,1144.71,2809.44,7915.35
5,2023-08,3993.06,1321.0,1505.25,6819.31
6,2023-09,2739.0,197.8,2807.52,5744.32
7,2023-10,4014.64,679.68,3097.5,7791.82
8,2023-11,3576.46,377.85,4988.16,8942.47
9,2023-12,4138.96,736.25,2706.27,7581.48


Total_cost_to_serve per month
As expected, the month of May drove the highest total cost followed by the month of June

In [15]:
query_monthly_costs_total = """
WITH episodes_per_month AS (
  SELECT
    strftime('%Y-%m', EPISODE_CREATED_AT) AS month,
    SUM(CASE WHEN PROGRAM = 'primary_care' THEN 1 ELSE 0 END) AS primary_care_episodes,
    SUM(CASE WHEN PROGRAM = 'eap' THEN 1 ELSE 0 END) AS eap_episodes,
    SUM(CASE WHEN PROGRAM = 'mental_health' THEN 1 ELSE 0 END) AS mental_health_episodes
  FROM episodes
  WHERE ORGANIZATION_ID = 4
  GROUP BY month
)
SELECT
   e.month,
  (e.primary_care_episodes * c.cost_to_serve_primary_care) +
  (e.eap_episodes * c.cost_to_serve_eap) +
  (e.mental_health_episodes * c.cost_to_serve_mental_health) AS total_cost_to_serve
FROM episodes_per_month e
INNER JOIN cost_to_serve_monthly c ON e.month = strftime('%Y-%m', c.date_month)
ORDER BY e.month;
"""

df = pd.read_sql_query(query_monthly_costs_total, conn)
display(df)

Unnamed: 0,month,total_cost_to_serve
0,2023-01,60.04
1,2023-02,62.9
2,2023-05,17060.23
3,2023-06,10740.65
4,2023-07,7915.35
5,2023-08,6819.31
6,2023-09,5744.32
7,2023-10,7791.82
8,2023-11,8942.47
9,2023-12,7581.48


Calculating monthly revenue: The highest monthly revenue was in december.

In [16]:
query_monthly_revenue = """
SELECT
  strftime('%Y-%m', date_month) AS month,
  pepm * eligible_members AS monthly_revenue
FROM organizations_monthly
WHERE organization_id = 4;
"""

df = pd.read_sql_query(query_monthly_revenue, conn)
display(df)

Unnamed: 0,month,monthly_revenue
0,2023-09,20405.56
1,2023-02,687.57
2,2023-03,671.58
3,2023-06,20831.86
4,2023-12,21712.88
5,2023-01,943.41
6,2023-08,20306.09
7,2023-11,21315.0
8,2023-07,20647.13
9,2023-05,21002.38


Merging monthly costs and monthly revenue to find the gross margin for each month. Gross margin = (revenue - total costs to serve)

In [17]:
query_gross_margin = """
WITH episodes_per_month AS (
  SELECT
    strftime('%Y-%m', EPISODE_CREATED_AT) AS month,
    SUM(CASE WHEN PROGRAM = 'primary_care' THEN 1 ELSE 0 END) AS primary_care_episodes,
    SUM(CASE WHEN PROGRAM = 'eap' THEN 1 ELSE 0 END) AS eap_episodes,
    SUM(CASE WHEN PROGRAM = 'mental_health' THEN 1 ELSE 0 END) AS mental_health_episodes
  FROM episodes
  WHERE ORGANIZATION_ID = 4
  GROUP BY month
),
costs AS (
  SELECT
    e.month,
    (e.primary_care_episodes * c.cost_to_serve_primary_care) +
    (e.eap_episodes * c.cost_to_serve_eap) +
    (e.mental_health_episodes * c.cost_to_serve_mental_health) AS total_cost_to_serve
  FROM episodes_per_month e
  INNER JOIN cost_to_serve_monthly c ON e.month = strftime('%Y-%m', c.date_month)
),
revenue AS (
  SELECT
    strftime('%Y-%m', date_month) AS month,
    pepm * eligible_members AS monthly_revenue
  FROM organizations_monthly
  WHERE organization_id = 4
)
SELECT
  r.month,
  r.monthly_revenue,
  c.total_cost_to_serve,
  (r.monthly_revenue - c.total_cost_to_serve) AS gross_margin
FROM revenue r
JOIN costs c ON r.month = c.month
ORDER BY r.month;
"""

df = pd.read_sql_query(query_gross_margin, conn)
display(df)

Unnamed: 0,month,monthly_revenue,total_cost_to_serve,gross_margin
0,2023-01,943.41,60.04,883.37
1,2023-02,687.57,62.9,624.67
2,2023-05,21002.38,17060.23,3942.15
3,2023-06,20831.86,10740.65,10091.21
4,2023-07,20647.13,7915.35,12731.78
5,2023-08,20306.09,6819.31,13486.78
6,2023-09,20405.56,5744.32,14661.24
7,2023-10,21272.37,7791.82,13480.55
8,2023-11,21315.0,8942.47,12372.53
9,2023-12,21712.88,7581.48,14131.4


Annual gross margin = -728834.84
These results indicate that the costs associated with providing services to Dunder Miflin have exceeded the revenue generated from these services, leading to a negative gross margin for the year 2023.

This means that pepm may not be covering the costs of the services provided, or that the services are being utilized at a higher rate than anticipated, which could be driving up costs.

In [18]:
query_annual_gross_margin = """
WITH episodes_per_year AS (
  SELECT
    SUM(CASE WHEN PROGRAM = 'primary_care' THEN 1 ELSE 0 END) AS primary_care_episodes,
    SUM(CASE WHEN PROGRAM = 'eap' THEN 1 ELSE 0 END) AS eap_episodes,
    SUM(CASE WHEN PROGRAM = 'mental_health' THEN 1 ELSE 0 END) AS mental_health_episodes
  FROM episodes
  WHERE ORGANIZATION_ID = 4
  AND strftime('%Y', EPISODE_CREATED_AT) = '2023'
),
costs_per_year AS (
  SELECT
    SUM(primary_care_episodes * cost_to_serve_primary_care) +
    SUM(eap_episodes * cost_to_serve_eap) +
    SUM(mental_health_episodes * cost_to_serve_mental_health) AS total_cost_to_serve
  FROM episodes_per_year, cost_to_serve_monthly
  WHERE strftime('%Y', date_month) = '2023'
),
annual_revenue AS (
  SELECT
    SUM(pepm * eligible_members) AS annual_revenue
  FROM organizations_monthly
  WHERE organization_id = 4
  AND strftime('%Y', date_month) = '2023'
)
SELECT
  (SELECT annual_revenue FROM annual_revenue) AS total_revenue,
  (SELECT total_cost_to_serve FROM costs_per_year) AS total_cost,
  ((SELECT annual_revenue FROM annual_revenue) - (SELECT total_cost_to_serve FROM costs_per_year)) AS annual_gross_margin;
"""

df = pd.read_sql_query(query_annual_gross_margin, conn)
display(df)

Unnamed: 0,total_revenue,total_cost,annual_gross_margin
0,170483.4,899318.24,-728834.84


Projections for 2024

In [19]:
query_projections = """
WITH episodes_per_month AS (
  SELECT
    strftime('%Y-%m', EPISODE_CREATED_AT) AS month,
    SUM(CASE WHEN PROGRAM = 'primary_care' THEN 1 ELSE 0 END) AS primary_care_episodes,
    SUM(CASE WHEN PROGRAM = 'eap' THEN 1 ELSE 0 END) AS eap_episodes,
    SUM(CASE WHEN PROGRAM = 'mental_health' THEN 1 ELSE 0 END) AS mental_health_episodes
  FROM episodes
  WHERE ORGANIZATION_ID = 4
  GROUP BY month
),
costs AS (
  SELECT
    e.month,
    (e.primary_care_episodes * c.cost_to_serve_primary_care) +
    (e.eap_episodes * c.cost_to_serve_eap) +
    (e.mental_health_episodes * c.cost_to_serve_mental_health) AS total_cost_to_serve
  FROM episodes_per_month e
  INNER JOIN cost_to_serve_monthly c ON e.month = strftime('%Y-%m', c.date_month)
),
revenue AS (
  SELECT
    strftime('%Y-%m', date_month) AS month,
    pepm * eligible_members AS monthly_revenue
  FROM organizations_monthly
  WHERE organization_id = 4
),
gross_margin_2023 AS (
  SELECT
    r.month,
    r.monthly_revenue,
    c.total_cost_to_serve,
    (r.monthly_revenue - c.total_cost_to_serve) AS gross_margin
  FROM revenue r
  JOIN costs c ON r.month = c.month
),
annual_totals AS (
  SELECT
    SUM(monthly_revenue) AS total_revenue,
    SUM(total_cost_to_serve) AS total_cost,
    SUM(gross_margin) AS total_gross_margin,
    COUNT(*) AS count_months -- Added to count the number of months
  FROM gross_margin_2023
),
growth_rates AS (
  SELECT
    (total_revenue / count_months) AS avg_monthly_revenue,
    (total_cost / count_months) AS avg_monthly_cost,
    (total_gross_margin / count_months) AS avg_monthly_gross_margin
  FROM annual_totals
),
projections AS (
  SELECT
    '2024' AS year,
    avg_monthly_revenue * 12 AS projected_revenue,
    avg_monthly_cost * 12 AS projected_cost,
    avg_monthly_gross_margin * 12 AS projected_gross_margin
  FROM growth_rates
)
SELECT * FROM projections;
"""

df = pd.read_sql_query(query_projections, conn)
display(df)

Unnamed: 0,year,projected_revenue,projected_cost,projected_gross_margin
0,2024,202949.1,87262.284,115686.816


Code to create the report using plotly and save it as a pdf file. Each image is interactive. At the end, the images are exported into a pdf file

In [28]:
import plotly.express as px
import plotly.io as pio
from IPython.display import FileLink
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter

# SQL Queries
query_monthly_members = """
SELECT strftime('%Y-%m', date_month) AS month, eligible_members AS ELIGIBLE_MEMBERS
FROM organizations_monthly
WHERE organization_id = 4
GROUP BY month
ORDER BY month;
"""

query_episodes_by_program = """
SELECT PROGRAM, COUNT(*) AS EPISODES_COUNT
FROM episodes
WHERE ORGANIZATION_ID = 4
GROUP BY PROGRAM;
"""

query_utilization_rate = """
WITH monthly_episodes AS (
  SELECT strftime('%Y-%m', EPISODE_CREATED_AT) AS month, COUNT(EPISODE_ID) AS episodes_opened
  FROM episodes
  WHERE organization_id = 4
  GROUP BY month
), monthly_members AS (
  SELECT strftime('%Y-%m', date_month) AS month, eligible_members AS ELIGIBLE_MEMBERS
  FROM organizations_monthly
WHERE organization_id = 4
GROUP BY month
)
SELECT me.month, me.episodes_opened, mm.ELIGIBLE_MEMBERS,
ROUND(((me.episodes_opened * 1.0/ mm.ELIGIBLE_MEMBERS) * 100), 2) AS utilization_rate_percentage
FROM monthly_episodes me
JOIN monthly_members mm ON me.month = mm.month
ORDER BY me.month;
"""

# Fetching data
df_monthly_members = pd.read_sql_query(query_monthly_members, conn)
df_episodes_by_program = pd.read_sql_query(query_episodes_by_program, conn)
df_utilization_rate = pd.read_sql_query(query_utilization_rate, conn)
df_monthly_costs_total = pd.read_sql_query(query_monthly_costs_total, conn)

# Visualization 1: Total Members per Month
fig_members_per_month = px.bar(df_monthly_members, x='month', y='ELIGIBLE_MEMBERS', title='Total Eligible Members per Month', labels={'ELIGIBLE_MEMBERS': 'Eligible Members'}, color='ELIGIBLE_MEMBERS')
fig_members_per_month.show()

# Visualization 2: Episodes by Program
fig_episodes_by_program = px.pie(df_episodes_by_program, names='PROGRAM', values='EPISODES_COUNT', title="Usage by Program's Type", hole=.3)
fig_episodes_by_program.show()

# Visualization 3: Utilization Rate per Month
fig_utilization_rate = px.line(df_utilization_rate, x='month', y='utilization_rate_percentage', title='Utilization Rate Percentage per Month', labels={'utilization_rate_percentage': 'Utilization Rate (%)'})
fig_utilization_rate.add_bar(x=df_utilization_rate['month'], y=df_utilization_rate['episodes_opened'], name='Episodes Opened')
fig_utilization_rate.show()

# Visualization 4: Total Cost to Serve per Month
fig_total_cost_to_serve = px.bar(
    df_monthly_costs_total,
    x='month',
    y='total_cost_to_serve',
    title='Total Cost per Month',
    labels={'total_cost_to_serve': 'Total Cost to Serve'},
    color='total_cost_to_serve'
)
fig_total_cost_to_serve.show()

# Save Plotly figures as images
pio.write_image(fig_members_per_month, 'Sample Data/fig_members_per_month.png')
pio.write_image(fig_episodes_by_program, 'Sample Data/fig_episodes_by_program.png')
pio.write_image(fig_utilization_rate, 'Sample Data/fig_utilization_rate.png')
pio.write_image(fig_total_cost_to_serve, 'Sample Data/fig_total_cost_per_month.png')


# Create a PDF canvas with the letter page size
c = canvas.Canvas("user_engagement_report.pdf", pagesize=letter)
width, height = letter

# Set title with a contrasting color (e.g., black) and larger size
c.setFont("Helvetica-Bold", 24)
c.setFillColorRGB(0, 0, 0)  # Set text color to black
c.drawCentredString(width / 2.0, height - 100, "User Engagement Report for Dunder Miflin")

# Function to add images without cutting off
def add_image(image_path, position, c, width=500, height=180):
    x = (letter[0] - width) / 2  # Center the image
    c.drawImage(image_path, x, position, width=width, height=height, mask='auto')

# Calculate positions for images and add them
positions = [height - 150 - ((i + 1) * (height / 5)) for i in range(4)]
images = ['Sample Data/fig_members_per_month.png', 'Sample Data/fig_episodes_by_program.png', 'Sample Data/fig_utilization_rate.png', 'Sample Data/fig_total_cost_per_month.png']

for position, image in zip(positions, images):
    add_image(image, position, c)

c.showPage()
c.save()

# Generate a download link for the PDF (This line works in Jupyter notebooks)
FileLink(r'user_engagement_report.pdf')

In [29]:
conn.close()