In [None]:
from IPython.display import HTML, display

display(HTML("""
<style>
/* Center the entire page and make it responsive */
html, body { margin: 0; }
body {
  max-width: 1200px;
  margin: 0 auto;
  padding: 0 10px;
  text-align: center; /* center text by default */
}

/* Classic Notebook wrappers */
div.text_cell_render,
div.cell,
div.output_wrapper,
div.output,
div.output_area {
  max-width: 1200px;
  margin-left: auto;
  margin-right: auto;
  text-align: inherit;
}

/* JupyterLab wrappers (for HTML export using lab template) */
.jp-Notebook,
.jp-Cell,
.jp-OutputArea,
.jp-RenderedHTMLCommon {
  max-width: 1200px;
  margin-left: auto;
  margin-right: auto;
  text-align: inherit;
}

/* Center tables and their content */
table {
  margin-left: auto !important;
  margin-right: auto !important;
}
table th, table td {
  text-align: center !important;
  vertical-align: middle;
}

/* Center images and SVGs */
img, svg {
  display: block;
  margin-left: auto;
  margin-right: auto;
}

/* Headings centered explicitly */
h1, h2, h3, h4, h5, h6 {
  text-align: center;
}

/* Keep code blocks readable (left-aligned) */
pre, code, .highlight, .jp-RenderedHTMLCommon pre {
  text-align: left !important;
}
</style>
"""))


<style>
/* Center the entire page and make it responsive (applies in exports) */
html, body { margin: 0; }
body {
  max-width: 1200px;
  margin: 0 auto;
  padding: 0 10px;
  text-align: center;
}
div.text_cell_render, div.cell, div.output_wrapper, div.output, div.output_area,
.jp-Notebook, .jp-Cell, .jp-OutputArea, .jp-RenderedHTMLCommon {
  max-width: 1200px;
  margin-left: auto;
  margin-right: auto;
  text-align: inherit;
}
table {
  margin-left: auto !important;
  margin-right: auto !important;
}
table th, table td {
  text-align: center !important;
  vertical-align: middle;
}
img, svg {
  display: block;
  margin-left: auto;
  margin-right: auto;
}
h1, h2, h3, h4, h5, h6 {
  text-align: center;
}
pre, code, .highlight, .jp-RenderedHTMLCommon pre {
  text-align: left !important;
}
</style>

# Dog breeds â€“ lifespan and weight report

This report is generated from the BigQuery dataset and summarises lifespan, weight classes,  
and family-friendly temperaments across dog breeds.


In [None]:
import warnings
from google.cloud import bigquery
import pandas as pd
import matplotlib.pyplot as plt

# Hide noisy BigQuery warnings in the report
warnings.filterwarnings(
    "ignore",
    category=UserWarning,
    module="google.cloud.bigquery.table"
)

# Let the client pick the project from GOOGLE_APPLICATION_CREDENTIALS
client = bigquery.Client()
PROJECT_ID = client.project
print("Using project:", PROJECT_ID)

# IMPORTANT: set this to the dataset that ACTUALLY contains the tables
DATASET = "bronze_prod"  # brug denne i stedet hvis de ligger der

sql_dim = f"""
SELECT
  breed_id,
  breed_name,
  temperament,
  breed_group,
  bred_for,
  is_family_friendly
FROM `{PROJECT_ID}.{DATASET}.dim_breed`
"""

sql_fact = f"""
SELECT
  breed_id,
  breed_name,
  weight_kg_min,
  weight_kg_max,
  weight_kg_avg,
  weight_class,
  lifespan_years_min,
  lifespan_years_max,
  lifespan_years_avg
FROM `{PROJECT_ID}.{DATASET}.fact_weight_life_span`
"""

dim_df = client.query(sql_dim).to_dataframe()
fact_df = client.query(sql_fact).to_dataframe()


In [None]:
# Antag at breed_id er unik i dim_breed
df = fact_df.merge(
    dim_df[["breed_id", "temperament", "breed_group", "bred_for", "is_family_friendly"]],
    on="breed_id",
    how="left"
)

#df.head()


## Top 10 breeds by average lifespan

The table and chart below show the 10 breeds with the highest expected average lifespan. Smaller breeds tend to dominate the top of the list.


In [None]:
top10_lifespan = (
    df.dropna(subset=["lifespan_years_avg"])
      .sort_values("lifespan_years_avg", ascending=False)
      .head(10)
      [["breed_name", "lifespan_years_min", "lifespan_years_max", "lifespan_years_avg", "weight_class"]]
)

# Rename columns to nicer labels
top10_lifespan = top10_lifespan.rename(columns={
    "breed_name": "Breed",
    "lifespan_years_min": "Lifespan Min (years)",
    "lifespan_years_max": "Lifespan Max (years)",
    "lifespan_years_avg": "Average Lifespan (years)",
    "weight_class": "Weight Class"
})

# Round average lifespan to 1 decimal for display and export
top10_lifespan["Average Lifespan (years)"] = top10_lifespan["Average Lifespan (years)"].round(1)

# Display styled table centered with no index
from IPython.display import HTML, display
display(HTML(top10_lifespan.style.format({"Average Lifespan (years)": "{:.1f}"}).hide(axis="index").to_html()))


In [None]:
top10_lifespan.to_html(
    "top10_lifespan.html",
    index=False,
    formatters={"Average Lifespan (years)": (lambda x: f"{x:.1f}")}
)
#print("Gemte top10_lifespan.html")


In [None]:
plt.figure(figsize=(10, 6))
plt.bar(top10_lifespan["Breed"], top10_lifespan["Average Lifespan (years)"])
plt.xticks(rotation=45, ha="right")
plt.ylabel("Years")
plt.title("Top 10 breeds with highest average age")
plt.ylim(10, None)
plt.tight_layout()
plt.savefig("top10_lifespan.png", dpi=150)
plt.show()


## Distribution of breeds by weight class

Here we group breeds into Small, Medium, Large and Giant based on their average weight.
This gives a quick overview of how common each size category is in the dataset.



In [None]:
order = ["Small", "Medium", "Large", "Giant"]

weight_counts = (
    df
    .dropna(subset=["weight_class"])
    .groupby("weight_class")["breed_id"]
    .nunique()
    .reindex(order)
    .reset_index(name="Number of Breeds")
)

# Rename weight_class column for readability
weight_counts = weight_counts.rename(columns={"weight_class": "Weight Class"})

from IPython.display import display, HTML
display(HTML(weight_counts.style.hide(axis="index").to_html()))


In [None]:
plt.figure(figsize=(6, 4))
plt.bar(weight_counts["Weight Class"], weight_counts["Number of Breeds"])
plt.xlabel("Weight Class")
plt.ylabel("Number of Breeds")
plt.title("Distribution of Breeds by Weight Class")
plt.tight_layout()
plt.savefig("weight_class_distribution.png", dpi=150)
plt.show()


## Temperament traits among family-friendly breeds

In this section we look at all breeds marked as family-friendly and break down their
temperament text into individual traits. Each row below represents the top 20  traits most common among family friendly dogs. It shows how many family-friendly breeds have that trait.

In [None]:
# Filter to family-friendly breeds
family_df = df[df["is_family_friendly"] == True].copy()

# Split temperament string into individual traits
temperament_counts = (
    family_df["temperament"]
    .dropna()
    .str.split(",")          # split on comma
    .explode()               # one row per trait
    .str.strip()             # remove leading/trailing spaces
)

# Remove empty strings if any
temperament_counts = temperament_counts[temperament_counts != ""]

# Count occurrences per trait
temperament_counts = (
    temperament_counts
    .value_counts()
    .reset_index()
)

temperament_counts.columns = ["Temperament Trait", "Count"]

# Show table without index
from IPython.display import HTML, display
display(HTML(temperament_counts.head(15).style.hide(axis="index").to_html()))


In [None]:
# Hvor mange family-friendly breeds pr weight_class
family_weight_counts = (
    family_df
    .groupby("weight_class")["breed_id"]
    .nunique()
    .reset_index(name="Family-friendly Breeds")
    .sort_values("Family-friendly Breeds", ascending=False)
)

family_weight_counts = family_weight_counts.rename(columns={"weight_class": "Weight Class"})

from IPython.display import HTML, display
display(HTML(family_weight_counts.style.hide(axis="index").to_html()))



</div>


## Conclusion

Summming up family friendly breeds are often medium or large dogs that are intelligent, affectionate and friendly.