# mcbroken

This script presents how much money McDonalds might be losing due to broken ice cream machines in the US.

Data filtering, web scraping for making assumptions about the numbers (ice cream price, population size in each state), calculations, dailly reports and displaying the total amount lost in revenue is performed.

Daily reports are generated based on the time window - 24 hours before running the script.

---
Data used for analysis comes from the https://mcbroken.com/ site.


In [1]:
import requests
from datetime import datetime


def fetch_data_from_url(url, get_content=False):
    data = None

    try:
        response = requests.get(url)
        if get_content:
            data = response.content
        else:
            data = response.json()
        print("Data successfully received!")
    # Catch different exceptions
    except requests.exceptions.ConnectionError:
        print("Connection error")
    except requests.exceptions.Timeout:
        print("Timeout error")
    except requests.exceptions.TooManyRedirects:
        print("Too many redirects")
    except requests.exceptions.RequestException as e:
        print("Other error: ", e)

    return data

In [2]:
import json


def save_json_file(data, file_name):
    with open(file_name, "w") as f:
        json_data = json.dump(data, f, indent=4)
        print(f"Data successfully saved in {file_name}")

In [3]:
URL_MC_BROKEN_DATA = "https://mcbroken2.nyc3.digitaloceanspaces.com/markers.json"

date_data_fetched = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
data = fetch_data_from_url(URL_MC_BROKEN_DATA)

Data successfully received!


In [4]:
# Name of the file to save data to according to date fetched
file_name = f"mcbroken-data-{date_data_fetched}.json"

# Save data dict as a json file
save_json_file(data, file_name)

Data successfully saved in mcbroken-data-2022-09-23_22-49-50.json


In [5]:
# Map states to full names
states_full_names = {
    "AL": "Alabama",
    "AK": "Alaska",
    "AZ": "Arizona",
    "AR": "Arkansas",
    "CA": "California",
    "CO": "Colorado",
    "CT": "Connecticut",
    "DE": "Delaware",
    "DC": "District of Columbia",
    "FL": "Florida",
    "GA": "Georgia",
    "HI": "Hawaii",
    "ID": "Idaho",
    "IL": "Illinois",
    "IN": "Indiana",
    "IA": "Iowa",
    "KS": "Kansas",
    "KY": "Kentucky",
    "LA": "Louisiana",
    "ME": "Maine",
    "MD": "Maryland",
    "MA": "Massachusetts",
    "MI": "Michigan",
    "MN": "Minnesota",
    "MS": "Mississippi",
    "MO": "Missouri",
    "MT": "Montana",
    "NE": "Nebraska",
    "NV": "Nevada",
    "NH": "New Hampshire",
    "NJ": "New Jersey",
    "NM": "New Mexico",
    "NY": "New York",
    "NC": "North Carolina",
    "ND": "North Dakota",
    "OH": "Ohio",
    "OK": "Oklahoma",
    "GU": "Guam",
    "OR": "Oregon",
    "PA": "Pennsylvania",
    "RI": "Rhode Island",
    "SC": "South Carolina",
    "SD": "South Dakota",
    "TN": "Tennessee",
    "TX": "Texas",
    "UT": "Utah",
    "VT": "Vermont",
    "VA": "Virginia",
    "WA": "Washington",
    "WV": "West Virginia",
    "WI": "Wisconsin",
    "WY": "Wyoming",
}

In [6]:
# Verify if there is a fake state

def verify_state(state):
    if state in states_full_names:
        return True
    print(f"State '{state}' is not valid")
    return False

In [7]:
import re


MINUTES_IN_24_HOURS = 1440

# Only data from the last 24 hours is considered - one day report
def get_data_from_US_states(data):
    features_24_hours = []
    for feature in data["features"]:
        # Get data only for US country
        if feature["properties"]["country"] == "USA":
            # Verify state
            if verify_state(feature["properties"]["state"]):
                last_checked = feature["properties"]["last_checked"]
                if "minutes" or "minute" in last_checked:
                    minutes_list = [int(num) for num in re.findall(r'\b\d+\b', last_checked)]
                    if minutes_list[0] <= MINUTES_IN_24_HOURS:
                        features_24_hours.append(feature)

    return features_24_hours


In [8]:
import datetime


# Create a list of features that last updated in the last 24 hours
features_24_hours = get_data_from_US_states(data)

# Save features_24_hours as a json file
save_json_file(features_24_hours, f"mcbroken-features-24-hours-{date_data_fetched}.json")

number_of_features = len(data["features"])
number_of_features_24_hours = len(features_24_hours)

print(f"Number of features: {number_of_features}")
print(f"Number of features updated in the last 24 hours: {number_of_features_24_hours}")

assert number_of_features != 0, "No features updated in the last 24 hours"
assert number_of_features_24_hours < len(data["features"]), "All features updated in the last 24 hours"

State 'Maharastra' is not valid
State 'Maharastra' is not valid
State 'Maharastra' is not valid
State 'MP' is not valid
State 'MP' is not valid
Data successfully saved in mcbroken-features-24-hours-2022-09-23_22-49-50.json
Number of features: 18071
Number of features updated in the last 24 hours: 12706


In [9]:
# Get the machines number for each state
# Create a dict of states and their mcdonalds ice cream machines numbers
states_machines_number = {}

for feature in features_24_hours:
    state = feature["properties"]["state"]
    if state in states_machines_number:
        states_machines_number[state] += 1
    else:
        states_machines_number[state] = 1

print(states_machines_number)

{'NY': 561, 'NJ': 241, 'CT': 130, 'PA': 456, 'DE': 35, 'MA': 222, 'MD': 256, 'RI': 29, 'VT': 24, 'NH': 50, 'DC': 25, 'VA': 379, 'WV': 99, 'ME': 57, 'NC': 439, 'OH': 584, 'MI': 500, 'KY': 243, 'TN': 306, 'SC': 215, 'IN': 330, 'GA': 422, 'IL': 625, 'WI': 281, 'AL': 230, 'FL': 818, 'IA': 136, 'MO': 294, 'MS': 134, 'AR': 159, 'MN': 208, 'KS': 137, 'LA': 217, 'NE': 74, 'OK': 182, 'SD': 27, 'ND': 23, 'TX': 1055, 'CO': 184, 'WY': 25, 'MT': 44, 'NM': 87, 'UT': 101, 'AZ': 253, 'ID': 56, 'NV': 117, 'WA': 236, 'OR': 147, 'CA': 1158, 'AK': 24, 'HI': 71}


In [10]:
# Create a dict of states and their mcdonalds machines ice cream numbers with full names
states_machines_number_full_names = {}

for state, number in states_machines_number.items():
    states_machines_number_full_names[states_full_names[state]] = {"machines_number": number}

print(f"States and their McDonalds numbers: {states_machines_number_full_names}")
save_json_file(states_machines_number_full_names, f"mcbroken-states-machines-number-{date_data_fetched}.json")

States and their McDonalds numbers: {'New York': {'machines_number': 561}, 'New Jersey': {'machines_number': 241}, 'Connecticut': {'machines_number': 130}, 'Pennsylvania': {'machines_number': 456}, 'Delaware': {'machines_number': 35}, 'Massachusetts': {'machines_number': 222}, 'Maryland': {'machines_number': 256}, 'Rhode Island': {'machines_number': 29}, 'Vermont': {'machines_number': 24}, 'New Hampshire': {'machines_number': 50}, 'District of Columbia': {'machines_number': 25}, 'Virginia': {'machines_number': 379}, 'West Virginia': {'machines_number': 99}, 'Maine': {'machines_number': 57}, 'North Carolina': {'machines_number': 439}, 'Ohio': {'machines_number': 584}, 'Michigan': {'machines_number': 500}, 'Kentucky': {'machines_number': 243}, 'Tennessee': {'machines_number': 306}, 'South Carolina': {'machines_number': 215}, 'Indiana': {'machines_number': 330}, 'Georgia': {'machines_number': 422}, 'Illinois': {'machines_number': 625}, 'Wisconsin': {'machines_number': 281}, 'Alabama': {'m

In [11]:
# Get the broken machines number for each state
# Create a dict of states and their mcdonalds ice cream broken machines numbers
states_machines_broken_number = {}

for feature in features_24_hours:
    state = feature["properties"]["state"]
    if feature["properties"]["is_broken"] == True and feature["properties"]["dot"] == "broken":
        if state in states_machines_broken_number:
            states_machines_broken_number[state] += 1
        else:
            states_machines_broken_number[state] = 1

save_json_file(states_machines_broken_number, f"mcbroken-broken-features-24-hours-{date_data_fetched}.json")
print(f"Number of broken features: {len(states_machines_broken_number)}") 
print(f"states_machines_number[state]: {states_machines_broken_number}")

Data successfully saved in mcbroken-broken-features-24-hours-2022-09-23_22-49-50.json
Number of broken features: 50
states_machines_number[state]: {'NY': 53, 'NJ': 21, 'PA': 37, 'CT': 5, 'DE': 1, 'MA': 23, 'MD': 26, 'NH': 1, 'DC': 1, 'VA': 43, 'WV': 4, 'VT': 1, 'ME': 2, 'NC': 38, 'OH': 63, 'MI': 45, 'KY': 21, 'SC': 33, 'TN': 29, 'IN': 38, 'GA': 64, 'IL': 53, 'WI': 18, 'AL': 16, 'FL': 69, 'IA': 10, 'MO': 29, 'MS': 24, 'AR': 18, 'MN': 12, 'KS': 9, 'LA': 24, 'NE': 9, 'SD': 2, 'OK': 23, 'ND': 2, 'TX': 119, 'CO': 15, 'WY': 1, 'NM': 5, 'MT': 7, 'AZ': 33, 'UT': 13, 'ID': 5, 'WA': 29, 'OR': 25, 'NV': 10, 'CA': 120, 'AK': 4, 'HI': 9}


In [12]:
for state, number in states_machines_broken_number.items():
    states_machines_number_full_names[states_full_names[state]]["broken_machines_number"] = number

# If state has no broken machines, add 0
for state in states_machines_number_full_names:
    if "broken_machines_number" not in states_machines_number_full_names[state]:
        states_machines_number_full_names[state]["broken_machines_number"] = 0

print(f"States and their McDonalds broken machines numbers: {states_machines_number_full_names}")

States and their McDonalds broken machines numbers: {'New York': {'machines_number': 561, 'broken_machines_number': 53}, 'New Jersey': {'machines_number': 241, 'broken_machines_number': 21}, 'Connecticut': {'machines_number': 130, 'broken_machines_number': 5}, 'Pennsylvania': {'machines_number': 456, 'broken_machines_number': 37}, 'Delaware': {'machines_number': 35, 'broken_machines_number': 1}, 'Massachusetts': {'machines_number': 222, 'broken_machines_number': 23}, 'Maryland': {'machines_number': 256, 'broken_machines_number': 26}, 'Rhode Island': {'machines_number': 29, 'broken_machines_number': 0}, 'Vermont': {'machines_number': 24, 'broken_machines_number': 1}, 'New Hampshire': {'machines_number': 50, 'broken_machines_number': 1}, 'District of Columbia': {'machines_number': 25, 'broken_machines_number': 1}, 'Virginia': {'machines_number': 379, 'broken_machines_number': 43}, 'West Virginia': {'machines_number': 99, 'broken_machines_number': 4}, 'Maine': {'machines_number': 57, 'bro

In [13]:
# Check if broken_machines_number is in states_machines_number_full_names
for state, data in states_machines_number_full_names.items():
    assert "broken_machines_number" in data, f"broken_machines_number not in {state}"

### Get the average value for one ice cream


This is the first assumption made.
The average price of ice cream is calculated.

***
The prices of ice creams come from the site https://www.dessertmenus.com/mcdonalds-ice-cream-prices/.



In [14]:
# Get the McDonald's Ice Cream Cone price

import requests
from datetime import datetime

URL_ICE_CREAM_PRICES = "https://www.dessertmenus.com/mcdonalds-ice-cream-prices/"

price_data = fetch_data_from_url(URL_ICE_CREAM_PRICES, get_content=True)

Data successfully received!


In [15]:
from bs4 import BeautifulSoup


soup = BeautifulSoup(price_data, "html.parser")

# Search for table with MCONALDS ICE CREAM PRICES
table = soup.find("center", text="MCDONALDS ICE CREAM PRICES").find_parent("table")

# Get all the prices of the ice cream from table
ice_cream_prices = []

for row in table.find_all("tr"):
    for cell in row.find_all("td"):
        ice_cream_prices.append(cell.text)

# Get only the prices with $ sign
ice_cream_prices = [price for price in ice_cream_prices if "$" in price]

print(f"Possible ice cream prices: {ice_cream_prices}")

# Calculate the average price of the ice cream
ice_cream_prices = [float(price.replace("$", "")) for price in ice_cream_prices]
average_ice_cream_price = sum(ice_cream_prices) / len(ice_cream_prices)

print(f"Average ice cream price: ${average_ice_cream_price}")

# Save the average ice cream price in a json file with the date

average_price_data = {
    "average_ice_cream_price": average_ice_cream_price,
    "date_data_fetched": date_data_fetched,
}

save_json_file(average_price_data, f"mcbroken-average-ice-cream-price-{date_data_fetched}.json")

Possible ice cream prices: ['$1.00', '$1.29', '$1.29', '$1.29']
Average ice cream price: $1.2175
Data successfully saved in mcbroken-average-ice-cream-price-2022-09-23_22-49-50.json


## Get population size in each state in USA

Population size estimation from July 1, 2021 from Wikipedia page.

In [16]:
URL_USA_STATES_POPULATION = "https://en.wikipedia.org/wiki/List_of_U.S._states_and_territories_by_population"

# Get the population of each state
population_data = fetch_data_from_url(URL_USA_STATES_POPULATION, get_content=True)

state_population_dict = {}

soup_population_data = BeautifulSoup(population_data, "html.parser")

# Search for table with wikitable class
table = soup_population_data.find("table", class_="wikitable")

# Get tbody from table
tbody = table.find("tbody")

# Iterate over all rows in tbody - save the state and population
for row in tbody.find_all("tr"):
    # Get th element from row
    th = row.find_all("th")

    # Filter th only with row scope
    if th and th[0].get("scope") == "row":
        # Get state name from th
        state = th[0].text.strip()

        # Get population from td after th for given state
        population = row.find_all("td")[2].text.strip()

        # Convert population to int if not empty and not N/A
        if population != "N/A":
            population = int(population.replace(",", ""))

        # Save state and population in dictionary
        state_population_dict[state] = population

print(state_population_dict)

# Assert whether the states are in the population dictionary
for state in states_machines_number_full_names:
    assert state in state_population_dict, f"{state} not in state_population_dict"


Data successfully received!
{'California': 39237836, 'Texas': 29527941, 'Florida': 21781128, 'New York': 19835913, 'Pennsylvania': 12964056, 'Illinois': 12671469, 'Ohio': 11780017, 'Georgia': 10799566, 'North Carolina': 10551162, 'Michigan': 10050811, 'New Jersey': 9267130, 'Virginia': 8642274, 'Washington': 7738692, 'Arizona': 7276316, 'Massachusetts': 6984723, 'Tennessee': 6975218, 'Indiana': 6805985, 'Missouri': 6168187, 'Maryland': 6165129, 'Wisconsin': 5895908, 'Colorado': 5812069, 'Minnesota': 5707390, 'South Carolina': 5190705, 'Alabama': 5039877, 'Louisiana': 4624047, 'Kentucky': 4509394, 'Oregon': 4246155, 'Oklahoma': 3986639, 'Connecticut': 3605597, 'Utah': 3322389, 'Puerto Rico': 3263584, 'Iowa': 3193079, 'Nevada': 3143991, 'Arkansas': 3025891, 'Mississippi': 2949965, 'Kansas': 2934582, 'New Mexico': 2115877, 'Nebraska': 1963692, 'Idaho': 1900923, 'West Virginia': 1782959, 'Hawaii': 1441553, 'New Hampshire': 1388992, 'Maine': 1372247, 'Rhode Island': 1095610, 'Montana': 1104

In [17]:
data_for_analysis = states_machines_number_full_names

# Add population to data_for_analysis
for state, data in data_for_analysis.items():
    data["population"] = state_population_dict[state]

print(data_for_analysis)

{'New York': {'machines_number': 561, 'broken_machines_number': 53, 'population': 19835913}, 'New Jersey': {'machines_number': 241, 'broken_machines_number': 21, 'population': 9267130}, 'Connecticut': {'machines_number': 130, 'broken_machines_number': 5, 'population': 3605597}, 'Pennsylvania': {'machines_number': 456, 'broken_machines_number': 37, 'population': 12964056}, 'Delaware': {'machines_number': 35, 'broken_machines_number': 1, 'population': 1003384}, 'Massachusetts': {'machines_number': 222, 'broken_machines_number': 23, 'population': 6984723}, 'Maryland': {'machines_number': 256, 'broken_machines_number': 26, 'population': 6165129}, 'Rhode Island': {'machines_number': 29, 'broken_machines_number': 0, 'population': 1095610}, 'Vermont': {'machines_number': 24, 'broken_machines_number': 1, 'population': 645570}, 'New Hampshire': {'machines_number': 50, 'broken_machines_number': 1, 'population': 1388992}, 'District of Columbia': {'machines_number': 25, 'broken_machines_number': 1

## Performing calculations for generating report

In [18]:
# Assumption - on average 1% of the population in a given state eats ice cream every day

# Calculate the number of ice cream cones sold in a given state
for state, data in data_for_analysis.items():
    data["ice_cream_sold"] = data["population"] * 0.01

# Calculate how many ice cream cones are sold in a given state per machine
for state, data in data_for_analysis.items():
    data["ice_cream_sold_per_machine"] = data["ice_cream_sold"] / data["machines_number"]

# Calculate the percentage of broken machines in a given state
for state, data in data_for_analysis.items():
    data["broken_machines_percentage"] = (data["broken_machines_number"] / data["machines_number"])*100

# Calculate how much money a machine makes in a given state
for state, data in data_for_analysis.items():
    data["money_per_machine"] = data["ice_cream_sold_per_machine"] * average_ice_cream_price

# Calculate how much money is lost due to broken machines in a given state
for state, data in data_for_analysis.items():
    data["money_lost_due_to_broken_machines"] = data["broken_machines_number"] * data["money_per_machine"]

# Calculate how much money is lost due to broken machines in a given state in percentage
for state, data in data_for_analysis.items():
    data["money_lost_due_to_broken_machines_percentage"] = (data["money_lost_due_to_broken_machines"] / (data["machines_number"] * data["money_per_machine"])) * 100

# Calculate the total revenue possible in a given state
for state, data in data_for_analysis.items():
    data["total_revenue_possible"] = data["machines_number"] * data["money_per_machine"]

# Calculate percentege of broken machines in the USA
broken_machines_percentage = sum([data["broken_machines_number"] for data in data_for_analysis.values()]) / sum([data["machines_number"] for data in data_for_analysis.values()]) * 100
print(f"Percentage of broken machines in the USA: {broken_machines_percentage}%")

# Calculate total money earned by selling ice cream in the USA in a day taking into consideration the broken machines
total_money_earned = sum([data["money_per_machine"] * (data["machines_number"] - data["broken_machines_number"]) for data in data_for_analysis.values()])
print(f"Total money earned by selling ice cream in the USA in a day taking into consideration the broken machines: ${total_money_earned}")

# Calculate total possible revenue in the USA in a day not taking into consideration the broken machines
total_possible_revenue = sum([data["machines_number"] * data["money_per_machine"] for data in data_for_analysis.values()])
print(f"Total possible revenue in the USA in a day not taking into consideration the broken machines: ${total_possible_revenue}")

# Calculate the total money lost due to broken machines in the USA
total_money_lost_due_to_broken_machines = sum([data["money_lost_due_to_broken_machines"] for data in data_for_analysis.values()])
print(f"Total money lost due to broken machines in the USA: ${total_money_lost_due_to_broken_machines}")

# Calculate the total money lost due to broken machines in the USA in percentage
total_money_lost_due_to_broken_machines_percentage = (total_money_lost_due_to_broken_machines / total_possible_revenue) * 100
print(f"Total money lost due to broken machines in the USA in percentage: {total_money_lost_due_to_broken_machines_percentage}%")

save_json_file(data_for_analysis, f"mcbroken-data-for-analysis-{date_data_fetched}.json")

Percentage of broken machines in the USA: 9.93231544152369%
Total money earned by selling ice cream in the USA in a day taking into consideration the broken machines: $3638372.5869431873
Total possible revenue in the USA in a day not taking into consideration the broken machines: $4040616.585825
Total money lost due to broken machines in the USA: $402243.9988818118
Total money lost due to broken machines in the USA in percentage: 9.955015288828324%
Data successfully saved in mcbroken-data-for-analysis-2022-09-23_22-49-50.json


In [19]:
# Create full report for a given day
# The summary numbers are rounded to 2 decimal places
# Save full report to json file

import copy

data_for_report = copy.deepcopy(data_for_analysis)

round_to_2_decimal_places = lambda x: round(x, 2)
add_dolar_sign = lambda x: f"${x}"
add_percentage_sign = lambda x: f"{x}%"

cost = lambda x: add_dolar_sign(round_to_2_decimal_places(x))
percentage = lambda x: add_percentage_sign(round_to_2_decimal_places(x))


# Apply cost and percentage to the data
for state, data in data_for_report.items():
    data["ice_cream_sold"] = cost(data["ice_cream_sold"])
    data["ice_cream_sold_per_machine"] = cost(data["ice_cream_sold_per_machine"])
    data["broken_machines_percentage"] = percentage(data["broken_machines_percentage"])
    data["money_per_machine"] = cost(data["money_per_machine"])
    data["money_lost_due_to_broken_machines"] = cost(data["money_lost_due_to_broken_machines"])
    data["money_lost_due_to_broken_machines_percentage"] = percentage(data["money_lost_due_to_broken_machines_percentage"])
    data["total_revenue_possible"] = cost(data["total_revenue_possible"])

full_report = {
    "date_data_fetched": date_data_fetched,
    "average_ice_cream_price": cost(average_ice_cream_price),
    "broken_machines_percentage": percentage(broken_machines_percentage),
    "total_possible_revenue": cost(total_possible_revenue),
    "total_money_lost_due_to_broken_machines": cost(total_money_lost_due_to_broken_machines),
    "total_money_lost_due_to_broken_machines_percentage": percentage(total_money_lost_due_to_broken_machines_percentage),
    "data_for_report": data_for_report
}

save_json_file(full_report, f"mcbroken-full-report-{date_data_fetched}.json")

Data successfully saved in mcbroken-full-report-2022-09-23_22-49-50.json


## Data visualization

In [20]:
! pip install kaleido

Defaulting to user installation because normal site-packages is not writeable


In [21]:
! pip install dash
! pip install dash-bootstrap-components
import dash
import dash_core_components as dcc
import dash_bootstrap_components as dbc

Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable


The dash_core_components package is deprecated. Please replace
`import dash_core_components as dcc` with `from dash import dcc`
  import dash_core_components as dcc


In [22]:
# Visualize the data using plotly

import plotly.express as px
import pandas as pd


# Create a dataframe for the data_for_analysis
df = pd.DataFrame(data_for_analysis).T


# Create a bar chart showing the number of broken machines in each state
fig_bar_chart_broken_machines = px.bar(
    df,
    x=df.index,
    y="broken_machines_number",
    title="Number of broken machines in each state",
    labels={"x": "State", "y": "Number of broken machines"},
)

fig_bar_chart_broken_machines.show()

# Save the bar chart as a png file
fig_bar_chart_broken_machines.write_image("mcbroken-bar-chart-broken-machines.png")

# Save the bar to dcc graph
dcc_graph_bar_chart_broken_machines = dcc.Graph(figure=fig_bar_chart_broken_machines)


In [23]:
# Create a bar chart showing the total money lost due to broken machines in each state
fig = px.bar(df, x=df.index, y="money_lost_due_to_broken_machines", title="Total money lost due to broken machines in each state")
fig.show()

# Save the bar chart as a png file
fig.write_image("mcbroken-bar-chart-money-lost-due-to-broken-machines.png")

# Save as dash dcc graph
dcc_graph_bar_chart_money_lost_due_to_broken_machines = dcc.Graph(figure=fig)


In [24]:
# Create a bar chart showing the total money lost due to broken machines in each state in percentage
fig = px.bar(df, x=df.index, y="money_lost_due_to_broken_machines_percentage", title="Total money lost due to broken machines in each state in percentage")
fig.show()

# Save the bar chart as a png file
fig.write_image("mcbroken-bar-chart-money-lost-due-to-broken-machines-percentage.png")

# Save as dash dcc graph
dcc_graph_bar_chart_money_lost_due_to_broken_machines_percentage = dcc.Graph(figure=fig)

In [25]:
# Create a bar chart showing side by side the total revenue possible in each state comparing with money lost due to broken machines 
fig = px.bar(
    df,
    x=df.index,
    y=["total_revenue_possible", "money_lost_due_to_broken_machines"],
    title="Total revenue possible in each state comparing with money lost due to broken machines",
    labels={"x": "State", "value": "Money in dollars", "variable": "Type of money"},
    barmode="group",
)
fig.show()

# Save as dash dcc graph
dcc_graph_bar_chart_total_revenue_possible_vs_money_lost_due_to_broken_machines = dcc.Graph(figure=fig)

In [26]:
# Create a pie chart showing the percentage of broken machines in each state
fig = px.pie(df, values="broken_machines_number", names=df.index, title="Percentage of broken machines in each state")
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.show()

# Save the pie chart as a png file
fig.write_image("mcbroken-pie-chart-broken-machines-percentage.png")

# Save as dash dcc graph
dcc_graph_pie_chart_broken_machines_percentage = dcc.Graph(figure=fig)

In [None]:
# Create a pie chart showing the percentage of money lost due to broken machines in each state
fig = px.pie(df, values="money_lost_due_to_broken_machines", names=df.index, title="Percentage of money lost due to broken machines in each state")
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.show()

# Save the pie chart as a png file
fig.write_image("mcbroken-pie-chart-money-lost-due-to-broken-machines-percentage.png")

# Save as dash dcc graph
dcc_graph_pie_chart_money_lost_due_to_broken_machines_percentage = dcc.Graph(figure=fig)

In [31]:
# Create a scatter plot showing the relation between the number of broken machines and the total money lost due to broken machines according to a state
fig_ = px.scatter(
    df,
    x="broken_machines_number",
    y="money_lost_due_to_broken_machines",
    title="Relation between the number of broken machines and the total money lost due to broken machines according to a state",
    size="population",
    labels={"x": "Number of broken machines", "y": "Total money lost due to broken machines"},
    hover_data=["population", "total_revenue_possible", "money_lost_due_to_broken_machines_percentage"],
    hover_name=df.index,
    color="population",
)

fig_.show()

# Save the scatter plot as dcc graph
dcc_graph_scatter_plot_broken_machines_vs_money_lost_due_to_broken_machines = dcc.Graph(figure=fig_)


In [32]:
# Create dash application with statistics and charts

from turtle import st
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import dash_bootstrap_components as dbc
import plotly.express as px

# Create dash application
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])


# Create dash table with statistics from the data
# Include index as a column
table = dbc.Table.from_dataframe(df.reset_index(), striped=True, bordered=True, hover=True)


def create_dbc_card_for_table(title, table):
    return dbc.Card(
        [
            dbc.CardHeader(title),
            dbc.CardBody(
                [
                    table,
                ], style={"maxWidth": "100%", "maxHeight": "500px", "overflow": "scroll"}
            ),
        ]
    )


# Put the table in a card
card_table = create_dbc_card_for_table("Statistics", table)

# Change app title
app.title = "Machines broken report"

# Create dash layout
app.layout = dbc.Container([
    dbc.Row([
        dbc.Col([
            html.H2("McBroken Report"),
            html.Hr(),
            html.H6(f"Date generated: {date_data_fetched}"),
            html.P(
                "This is a report about the number of broken machines in each state in the US and the money lost due to broken machines in each state.",
                className="card-text",
            ),
            html.P(
                "This daily report is generated based on the time window - 24 hours before running the main script.",
                className="card-text text-info",
            ),
            dbc.Card(
                [
                    dbc.CardHeader("Summary"),
                    dbc.Row([
                        dbc.Col(
                            dbc.CardBody(
                                [
                                    html.H5("Average ice cream price", className="card-title"),
                                    html.P(f"{cost(average_ice_cream_price)}", className="card-text"),
                                    html.H5("Broken machines percentage", className="card-title"),
                                    html.P(f"{percentage(broken_machines_percentage)}", className="card-text"),
                                    html.H5("Total money earned", className="card-title"),
                                    html.P(f"{cost(total_money_earned)}", className="card-text"),
                                ],
                            ),
                        ),
                        dbc.Col(
                            dbc.CardBody(
                                [    
                                    html.H5("Total possible revenue", className="card-title", style={"color": "green"}),
                                    html.P(f"{cost(total_possible_revenue)}", className="card-text", style={"color": "green"}),
                                    html.H5("Total money lost due to broken machines", className="card-title", style={"color": "red"}),
                                    html.P(f"{cost(total_money_lost_due_to_broken_machines)}", className="card-text", style={"color": "red"}),
                                    html.H5("Total money lost due to broken machines - percentage", className="card-title", style={"color": "red"}),
                                    html.P(f"{percentage(total_money_lost_due_to_broken_machines_percentage)}", className="card-text", style={"color": "red"}),
                                ],
                            ),
                        ),
                    ]),
                ], className="mb-3",
            ),
            card_table,
            dcc_graph_scatter_plot_broken_machines_vs_money_lost_due_to_broken_machines,
            dcc_graph_bar_chart_broken_machines,
            dcc_graph_bar_chart_money_lost_due_to_broken_machines,
            dcc_graph_bar_chart_money_lost_due_to_broken_machines_percentage,
            dcc_graph_pie_chart_broken_machines_percentage,
        ])
    ])
], className="p-3")


# Run dash application
app.run_server()


Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://127.0.0.1:8050/ (Press CTRL+C to quit)
127.0.0.1 - - [23/Sep/2022 22:51:48] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [23/Sep/2022 22:51:48] "GET /_dash-layout HTTP/1.1" 200 -
127.0.0.1 - - [23/Sep/2022 22:51:48] "GET /_dash-dependencies HTTP/1.1" 200 -
127.0.0.1 - - [23/Sep/2022 22:51:48] "GET /_favicon.ico?v=2.6.2 HTTP/1.1" 200 -
127.0.0.1 - - [23/Sep/2022 22:51:48] "GET /_dash-component-suites/dash/dcc/async-graph.js HTTP/1.1" 200 -
127.0.0.1 - - [23/Sep/2022 22:51:48] "GET /_dash-component-suites/dash/dcc/async-plotlyjs.js HTTP/1.1" 304 -
