# Job 1

In [1]:
import org.apache.spark

Intitializing Scala interpreter ...

Spark Web UI available at http://192.168.1.80:4040
SparkContext available as 'sc' (version = 3.5.1, master = local[*], app id = local-1754294787964)
SparkSession available as 'spark'


import org.apache.spark


### Schema definitions

In [2]:
import org.apache.spark.sql.types._
import java.sql.Timestamp

val reviewSchema = StructType(
  Seq(
    StructField("user_id",  StringType,            nullable = true),
    StructField("name",     StringType,            nullable = true),
    StructField("time",     LongType,              nullable = false),
    StructField("rating",   DoubleType,            nullable = true),
    StructField("text",     StringType,            nullable = true),
    StructField("pics",     ArrayType(StringType), nullable = true),
    StructField("resp",     StructType(
      Seq(
        StructField("time", LongType,              nullable = false),
        StructField("text", StringType,            nullable = true)
      )
    ),                                             nullable = true),
    StructField("gmap_id",  StringType,            nullable = false),
  )
)

case class Response(time: Timestamp, text: Option[String])

case class Review(
  user_id: Option[String],
  name: Option[String],
  time: Timestamp,
  rating: Option[Double],
  text: Option[String],
  pics: Seq[String],
  resp: Option[Response],
  gmap_id: String
)

import org.apache.spark.sql.types._
import java.sql.Timestamp
reviewSchema: org.apache.spark.sql.types.StructType = StructType(StructField(user_id,StringType,true),StructField(name,StringType,true),StructField(time,LongType,false),StructField(rating,DoubleType,true),StructField(text,StringType,true),StructField(pics,ArrayType(StringType,true),true),StructField(resp,StructType(StructField(time,LongType,false),StructField(text,StringType,true)),true),StructField(gmap_id,StringType,false))
defined class Response
defined class Review


In [3]:
val metadataSchema = StructType(
  Seq(
    StructField("name",             StringType,                                 nullable = true),
    StructField("address",          StringType,                                 nullable = true),
    StructField("gmap_id",          StringType,                                 nullable = false),
    StructField("description",      StringType,                                 nullable = true),
    StructField("latitude",         DoubleType,                                 nullable = false),
    StructField("longitude",        DoubleType,                                 nullable = false),
    StructField("category",         ArrayType(StringType),                      nullable = true),
    StructField("avg_rating",       DoubleType,                                 nullable = false),
    StructField("num_of_reviews",   IntegerType,                                nullable = false),
    StructField("price",            StringType,                                 nullable = false),
    StructField("hours",            ArrayType(ArrayType(StringType)),           nullable = true),
    StructField("MISC",             MapType(StringType, ArrayType(StringType)), nullable = false),
    StructField("state",            StringType,                                 nullable = true),
    StructField("relative_results", ArrayType(StringType),                      nullable = true),
    StructField("url",              StringType,                                 nullable = false),
  )
)

case class Metadata(
  name: Option[String],
  address: Option[String],
  gmap_id: String,
  description: Option[String],
  latitude: Double,
  longitude: Double,
  category: Seq[String],
  avg_rating: Double,
  num_of_reviews: Int,
  price: String,
  hours: Seq[Seq[String]],
  MISC: Map[String, Seq[String]],
  state: Option[String],
  relative_results: Seq[String],
  url: String
)

metadataSchema: org.apache.spark.sql.types.StructType = StructType(StructField(name,StringType,true),StructField(address,StringType,true),StructField(gmap_id,StringType,false),StructField(description,StringType,true),StructField(latitude,DoubleType,false),StructField(longitude,DoubleType,false),StructField(category,ArrayType(StringType,true),true),StructField(avg_rating,DoubleType,false),StructField(num_of_reviews,IntegerType,false),StructField(price,StringType,false),StructField(hours,ArrayType(ArrayType(StringType,true),true),true),StructField(MISC,MapType(StringType,ArrayType(StringType,true),true),false),StructField(state,StringType,true),StructField(relative_results,ArrayType(StringType,true),true),StructField(url,StringType,false))
defined class Metadata


### Dataset load and parse

In [4]:
import java.nio.file.Paths
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions.from_unixtime

val projectDir: String = Paths.get(System.getProperty("user.dir")).getParent.getParent.getParent.toString
val reviewsPath = s"$projectDir/dataset/sample-reviews.ndjson"
val metadataPath = s"$projectDir/dataset/metadata.ndjson"

val spark = SparkSession.builder()
  .appName("NDJSON Reader")
  .master("local[*]") // Needed in local mode
  .getOrCreate()

val reviewsDf = spark.read
  .schema(reviewSchema)
  .json(reviewsPath)
  .withColumn("pics", when (col("pics") isNull, array()) otherwise col("pics"))
  .withColumn("time", from_unixtime(col("time") / 1000).cast("timestamp"))
  .withColumn("resp", 
    when (
      col("resp") isNotNull, 
      struct(
        from_unixtime(col("resp.time") / 1000).cast("timestamp").alias("time"),
        col("resp.text").cast(StringType).alias("text")
      )
    ) otherwise lit(null)
  )
  .as[Review]

val metadataDf = spark.read
  .schema(metadataSchema)
  .json(metadataPath)
  .withColumn("category", when (col("category") isNull, array()) otherwise col("category"))
  .withColumn("hours", when (col("hours") isNull, array()) otherwise col("hours"))
  .withColumn("relative_results", when (col("relative_results") isNull, array()) otherwise col("relative_results"))
  .as[Metadata]

reviewsDf.printSchema()
metadataDf.printSchema()

// Unforturnately, it seems that Spark does not support case classes in RDDs. It throws ArrayStoreException
// when trying to collect the RDD... [see also [here](https://github.com/adtech-labs/spylon-kernel/issues/40)]
val reviewsRdd = reviewsDf.rdd
  .map(Review.unapply(_).get)
  .map { case review @ (_, _, _, _, _, _, resp, _) => review.copy(_7 = resp.map(Response.unapply(_).get)) }
val metaRdd = metadataDf.rdd.map(Metadata.unapply).map(_.get)

root
 |-- user_id: string (nullable = true)
 |-- name: string (nullable = true)
 |-- time: timestamp (nullable = true)
 |-- rating: double (nullable = true)
 |-- text: string (nullable = true)
 |-- pics: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- resp: struct (nullable = true)
 |    |-- time: timestamp (nullable = true)
 |    |-- text: string (nullable = true)
 |-- gmap_id: string (nullable = true)

root
 |-- name: string (nullable = true)
 |-- address: string (nullable = true)
 |-- gmap_id: string (nullable = true)
 |-- description: string (nullable = true)
 |-- latitude: double (nullable = true)
 |-- longitude: double (nullable = true)
 |-- category: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- avg_rating: double (nullable = true)
 |-- num_of_reviews: integer (nullable = true)
 |-- price: string (nullable = true)
 |-- hours: array (nullable = true)
 |    |-- element: array (containsNull = true)
 |    |    |-- element: 

import java.nio.file.Paths
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions.from_unixtime
projectDir: String = /Users/lucatassi/Projects/big-data/big-data-project
reviewsPath: String = /Users/lucatassi/Projects/big-data/big-data-project/dataset/sample-reviews.ndjson
metadataPath: String = /Users/lucatassi/Projects/big-data/big-data-project/dataset/metadata.ndjson
spark: org.apache.spark.sql.SparkSession = org.apache.spark.sql.SparkSession@5e232ba9
reviewsDf: org.apache.spark.sql.Dataset[Review] = [user_id: string, name: string ... 6 more fields]
metadataDf: org.apache.spark.sql.Dataset[Metadata] = [name: string, address: string ... 13 more fields]
reviewsRdd: org.apache.spark.rdd.RDD[(Option[String], Option[String], java.sql.Timestamp, Option[Double], Optio...


L'obiettivo di questo job è capire, anno per anno, se una maggiore frequenza nella risposta alle recensioni ha un impatto sulla valutazione media ricevuta.
In particolare:
- Per ogni anno e business si calcola la media delle recensioni, il rate e il tempo medio di risposta;
- Sulla base del rate e del tempo medio di risposta viene calcolata un attributo aggiuntivo “response strategy” che categorizza il business in un particolare anno in 4 categorie (“Rapid and frequent”, “Slow but frequent”, “Occasional” o “Rare or none”);
- Aggregazione in base alla "response strategy", l'anno e lo stato per ottenere il rate medio e il numero di business all'interno della categoria.

---

The goal of this job is to understand, year by year, whether greater frequency in responding to reviews has an impact on the average rating received.

Specifically:

- For each year and business, the average rating, rate, and average response time are calculated;
- Based on the rate and average response time, an additional attribute "response strategy" is calculated that categorizes the business in a particular year into four categories ("Rapid and frequent," "Slow but frequent," "Occasional," or "Rare or none");
- Aggregation based on the "response strategy," year, and state to get the average rate and number of businesses within the category.

---

**Metadata**: (name, address, <ins>gmap_id</ins>, description, latitude, longitude, category, avg_rating, num_of_reviews, price, hours, misc, state, relative_results, url)

**Review**: (user_id, name, time, rating, text, pics, responses, <ins>gmap_id</ins>)

---

In [5]:
import java.util.concurrent.TimeUnit
import org.apache.spark.sql.SaveMode

import java.util.concurrent.TimeUnit
import org.apache.spark.sql.SaveMode


In [6]:
def responseStrategy(avgResponseRate: Double, avgResponseTime: Double): String =
  (avgResponseRate, avgResponseTime) match {
    case (rr, rt) if rr >= 0.5 && rt <= 4 * 24 => "Rapid and frequent"
    case (rr, rt) if rr >= 0.5 => "Slow but frequent"
    case (rr, _)  if rr >= 0.15 => "Occasional"
    case _ => "Rare or none"
  }

responseStrategy: (avgResponseRate: Double, avgResponseTime: Double)String


In [7]:

def toState(address: Option[String]): String = 
  address.flatMap { addr =>
    // This regex captures the state abbreviation between a comma and the ZIP code
    val StateRegex = """,\s*([A-Z]{2})\s+\d{5}""".r
    StateRegex.findFirstMatchIn(addr).map(_.group(1))
  }.getOrElse("Unknown")

toState: (address: Option[String])String


In [8]:
val businessesStates = metaRdd
  .map(b => b._3 -> toState(b._2))

businessesStates: org.apache.spark.rdd.RDD[(String, String)] = MapPartitionsRDD[16] at map at <console>:36


In [9]:
val reviewsInfo = reviewsRdd
  .filter(_._4.isDefined) // filter out reviews without a rating
  .map { case (_, _, time, rating, _, _, resp, id) => (time.toLocalDateTime.getYear, id) -> (time, rating.get, resp) }
  .aggregateByKey(
    (0.0, 0, 0L, 0) // (sum of ratings, num of responses, sum of response times (unix timestamp), num of reviews)
  )(
    (acc, v) => {
      val (sumRatings, numResponses, sumResponseTimes, totalReviews) = acc
      val (time, rating, response) = v
      (
        sumRatings + rating,
        numResponses + (if (response.isDefined) 1 else 0),
        sumResponseTimes + (if (response.isDefined) response.get._1.getTime - time.getTime else 0L),
        totalReviews + 1
      )
    },
    (r1, r2) => (r1._1 + r2._1, r1._2 + r2._2, r1._3 + r2._3, r1._4 + r2._4)
  )
  .mapValues { case (sumRatings, numResponses, sumResponseTimes, totalReviews) => 
    (
      sumRatings / totalReviews,
      numResponses.toDouble / totalReviews,
      if (numResponses > 0) TimeUnit.MILLISECONDS.toHours(sumResponseTimes / numResponses) else Double.PositiveInfinity
    )
  } // [((year, gmap_id), (avg_rating, response_rate, avg_response_time))*]
  .mapValues { case (avgRating, responseRate, avgResponseTime) => 
    (avgRating, responseRate, avgResponseTime, responseStrategy(responseRate, avgResponseTime)) 
  } // [((year, gmap_id), (avg_rating, response_rate, avg_response_time, response_strategy))*]

reviewsInfo: org.apache.spark.rdd.RDD[((Int, String), (Double, Double, Double, String))] = MapPartitionsRDD[21] at mapValues at <console>:60


In [10]:
val outcome = reviewsInfo
  .map { case ((year, id), (avgRating, _, _, responseStrategy)) => id -> (year, responseStrategy, avgRating) }
  .join(businessesStates) // [(gmap_id, ((year, response_strategy, avg_rating), state))*]
  .map { case (_, ((year, responseStrategy, avgRating), state)) => (year, state, responseStrategy) -> avgRating }
  .aggregateByKey((0.0, 0))((acc, v) => (acc._1 + v, acc._2 + 1), (r1, r2) => (r1._1 + r2._1, r1._2 + r2._2))
  .mapValues { case (sumRatings, totalBusinesses) => sumRatings / totalBusinesses }

outcome: org.apache.spark.rdd.RDD[((Int, String, String), Double)] = MapPartitionsRDD[28] at mapValues at <console>:41


In [11]:
val outputDirPath = s"$projectDir/output"
val outputPath = s"$outputDirPath/job1-output"

outputDirPath: String = /Users/lucatassi/Projects/big-data/big-data-project/output
outputPath: String = /Users/lucatassi/Projects/big-data/big-data-project/output/job1-output


In [12]:
outcome.map { case ((year, state, responseStrategy), avgRating) => (year, state, responseStrategy, avgRating) }
  .coalesce(1)
  .toDF("year", "state", "response_strategy", "avg_rating")
  .write.format("csv").option("header", "true")
  .mode(SaveMode.Overwrite)
  .save(s"file://$outputPath")

### Results

In [1]:
%%python
import glob
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

files = glob.glob("/Users/lucatassi/Projects/big-data/big-data-project/output/job1-output/*.csv")

if not files:
    raise FileNotFoundError("No CSV files found")

df = pd.read_csv(files[0])

# Set seaborn style
sns.set(style="whitegrid")

# --- 1. Faceted bar charts by state ---
g = sns.catplot(
    data=df,
    x="response_strategy",
    y="avg_rating",
    hue="year",
    col="state",
    kind="bar",
    col_wrap=4,
    height=4,
    sharey=False,
    palette="muted"
)
g.set_xticklabels(rotation=30)
g.fig.suptitle("Average Rating by Response Strategy, Year & State", y=1.03)
plt.show()

In [2]:
%%python

# --- 2. Heatmap: average rating by state and response_strategy ---
pivot_df = df.pivot_table(
    index="state",
    columns="response_strategy",
    values="avg_rating",
    aggfunc='mean'
)

plt.figure(figsize=(12,8))
sns.heatmap(
    pivot_df,
    annot=True,
    fmt=".2f",
    cmap="coolwarm",
    center=df["avg_rating"].mean()
)
plt.title("Heatmap: Average Rating by State and Response Strategy")
plt.ylabel("State")
plt.xlabel("Response Strategy")
plt.xticks(rotation=30)
plt.show()

In [8]:
%%python

# --- 3. Boxplot: distribution of avg_rating by state and response_strategy ---
plt.figure(figsize=(16,6))
sns.boxplot(
    data=df,
    x="state",
    y="avg_rating",
    hue="response_strategy",
    palette="Set2"
)
plt.xticks(rotation=90)
plt.title("Boxplot of Average Ratings by State and Response Strategy")
plt.legend(title="Response Strategy", bbox_to_anchor=(1.05, 1), loc=2)
plt.tight_layout()
plt.show()

In [5]:
%%python
import plotly.express as px
import plotly.graph_objects as go
import numpy as np

# --- 1. Choropleth ---
fig_choro = px.choropleth(
    df,
    locations="state",
    locationmode="USA-states",
    color="avg_rating",
    scope="usa",
    animation_frame="year",
    title="Choropleth: Avg Rating by State"
)
fig_choro.show()

# --- 2. Bar Chart ---
fig_bar = px.bar(
    df.groupby("state")["avg_rating"].mean().reset_index(),
    x="state",
    y="avg_rating",
    title="Average Rating by State"
)
fig_bar.show()

# --- 3. Line Chart ---
fig_line = px.line(
    df.groupby(["year", "state"])["avg_rating"].mean().reset_index(),
    x="year",
    y="avg_rating",
    color="state",
    title="Rating Trends Over Time"
)
fig_line.show()

# --- 4. Scatter Plot ---
fig_scatter = px.scatter(
    df,
    x="avg_rating",
    y=np.random.uniform(1, 100, len(df)),
    color="state",
    title="Scatter Plot Example"
)
fig_scatter.show()

# --- 5. Heatmap ---
pivot = df.pivot_table(
    index="state", columns="year", values="avg_rating", aggfunc="mean"
)
fig_heatmap = go.Figure(
    data=go.Heatmap(
        z=pivot.values,
        x=pivot.columns.astype(str),
        y=pivot.index,
        colorscale="Viridis"
    )
)
fig_heatmap.update_layout(title="Heatmap: Avg Ratings by Year and State")
fig_heatmap.show()

# --- 6. Pie Chart ---
fig_pie = px.pie(
    df,
    names="state",
    title="State Distribution in Data"
)
fig_pie.show()

fig_choro.write_html("choropleth.html")

print("✅ All charts generated successfully.")

{'application/vnd.plotly.v1+json': {'data': [{'coloraxis': 'coloraxis', 'geo': 'geo', 'hovertemplate': 'year=2021<br>state=%{location}<br>avg_rating=%{z}<extra></extra>', 'locationmode': 'USA-states', 'locations': ['NM', 'NH', 'TX', 'WA', 'NM', 'AL', 'Unknown', 'GA', 'AL', 'TX', 'Unknown', 'FL', 'GA', 'WA', 'MS', 'AL', 'MA', 'NM', 'Unknown', 'OR', 'WA', 'Unknown', 'TN', 'NH', 'ID', 'GA', 'MS', 'NM', 'MS', 'GA', 'NH', 'NH', 'AL', 'OR', 'OR', 'MS', 'WA', 'TN'], 'name': '', 'z': {'dtype': 'f8', 'bdata': '5ZpoSYeeEUCuR9eks8wRQKEaGwgAQxBA/KM18cVrEUAS6MFEEaAQQIutv2F83xBAR9RSqex4EkCrRC14T9oRQCgco+7BOhFAMzMzMzMzE0Ae4zEe47ESQHTRRRddNBFAVVVVVVVVDUCQztAouk0RQEr2pF1n6RBAT3lsb3onEUAAAAAAAAAQQEFUdBYrKBFAHgiKm3kEEkAAAAAAAAATQLSZGI4pExJAb/mWb/mWE0CrqqqqqqoSQIBwFGPbnRFAVlVVVVVVE0AAAAAAAAAUQPZ+uQNkbxBA9g+sYDj3EECuE1Mqp5YRQOM4juM4jhFAKwf/l4csEkDS9X3smb8RQHfFpXgj0BFAAAAAAAAAFEAWX/EVX/ESQNTmhJjj2BBAS470fj88EUDDMAzDMIwJQA=='}, 'type': 'choropleth'}], 'layout': {'template': {'data': {'histogram2dcontour': [{

{'application/vnd.plotly.v1+json': {'data': [{'hovertemplate': 'state=%{x}<br>avg_rating=%{y}<extra></extra>', 'legendgroup': '', 'marker': {'color': '#636efa', 'pattern': {'shape': ''}}, 'name': '', 'orientation': 'v', 'showlegend': False, 'textposition': 'auto', 'x': ['AL', 'FL', 'GA', 'ID', 'MA', 'MS', 'NH', 'NM', 'OR', 'TN', 'TX', 'Unknown', 'VT', 'WA'], 'xaxis': 'x', 'y': {'dtype': 'f8', 'bdata': 'p2gKUHzgEEBCFJDbEDESQIEnhv9fAxFAq6qqqqqqEkBLn/RJn3QNQAiozm3qwBBAtt5XWaUuEUBdQQptSK4QQKxTOmffaRBAruH9ADK/DkAU9ctaidQQQJlvDdTMThJAAAAAAAAAFECWsaxjAPAQQA=='}, 'yaxis': 'y', 'type': 'bar'}], 'layout': {'template': {'data': {'histogram2dcontour': [{'type': 'histogram2dcontour', 'colorbar': {'outlinewidth': 0, 'ticks': ''}, 'colorscale': [[0.0, '#0d0887'], [0.1111111111111111, '#46039f'], [0.2222222222222222, '#7201a8'], [0.3333333333333333, '#9c179e'], [0.4444444444444444, '#bd3786'], [0.5555555555555556, '#d8576b'], [0.6666666666666666, '#ed7953'], [0.7777777777777778, '#fb9f3a'], [0.8888888

{'application/vnd.plotly.v1+json': {'data': [{'hovertemplate': 'state=AL<br>year=%{x}<br>avg_rating=%{y}<extra></extra>', 'legendgroup': 'AL', 'line': {'color': '#636efa', 'dash': 'solid'}, 'marker': {'symbol': 'circle'}, 'mode': 'lines', 'name': 'AL', 'orientation': 'v', 'showlegend': True, 'x': {'dtype': 'i2', 'bdata': '3wfgB+EH4gfjB+QH5Qc='}, 'xaxis': 'x', 'y': {'dtype': 'f8', 'bdata': 'CUGzujsREEC6qzon4E4QQASoeXPv7RBAtbw1WioNEUDyOxlqsTYRQAYNdQgITRFAHkIdDndEEUA='}, 'yaxis': 'y', 'type': 'scatter'}, {'hovertemplate': 'state=FL<br>year=%{x}<br>avg_rating=%{y}<extra></extra>', 'legendgroup': 'FL', 'line': {'color': '#EF553B', 'dash': 'solid'}, 'marker': {'symbol': 'circle'}, 'mode': 'lines', 'name': 'FL', 'orientation': 'v', 'showlegend': True, 'x': {'dtype': 'i2', 'bdata': '3wfgB+EH4gfjB+QH5Qc='}, 'xaxis': 'x', 'y': {'dtype': 'f8', 'bdata': 'AAAAAAAAEkAAAAAAAKASQKKLLrrowhFArSMb3LvNEEA9z/M8zzMSQHh3d3d3dxNAdNFFF100EUA='}, 'yaxis': 'y', 'type': 'scatter'}, {'hovertemplate': 'state=GA<br>

{'application/vnd.plotly.v1+json': {'data': [{'hovertemplate': 'state=NM<br>avg_rating=%{x}<br>y=%{y}<extra></extra>', 'legendgroup': 'NM', 'marker': {'color': '#636efa', 'symbol': 'circle'}, 'mode': 'markers', 'name': 'NM', 'orientation': 'v', 'showlegend': True, 'x': {'dtype': 'f8', 'bdata': '5ZpoSYeeEUA1YR8CLiAQQDRslb0foRBANxRiDzPLEEAS6MFEEaAQQMHRHGKOIxBAaQOdNtBpDUCeH7Dn/FwQQHxnnrt1DBBA7yDcOkoBEUDSXQk0NJIRQAqmpvUT9xBAsF7BghW2EEBBVHQWKygRQN7d3d3d3RFAjPdm+MEBD0DsazquGD0RQPFmMtIO2RBA4y6PPYtKEEANwzAMwzAPQPYPrGA49xBAdyU3RG9XEEB8eQQ5JhMRQJXWs2QJqBFAD6W+03+FEEA55vAM4uMQQKEVXx8N0hBA/CuSEP38EEA='}, 'xaxis': 'x', 'y': {'dtype': 'f8', 'bdata': 'HPhSZTwIVkBEf9e1UVdXQFjexCattz9ADAp2McRdQUDA5wKqkrYnQG3HqaVG1ThAyZcoC7KXQUC0FJ1GqT5WQCK7L0T+5UhAcLshN+4cV0B4jZRUOsBWQGOshxMYk1RAf9FqpwwLUkBXJ3Gose1SQJA+axJDBy5AaTL1+OJxNkDRHW3ASilVQMXfEipsClNALr4eg6eeN0BeVLRsGEM8QO3rfBMqNlFAPlBlYYl9NkBTz2AUW9ZRQJEEoAu4305Aw45s3FJtO0A9P6z0ZS5FQLdJrtzq1kZA1wHkCuMgUEA='}, 'yaxis': 'y', 'type': 'scatter'}, {'hovertemplate':

{'application/vnd.plotly.v1+json': {'data': [{'colorscale': [[0.0, '#440154'], [0.1111111111111111, '#482878'], [0.2222222222222222, '#3e4989'], [0.3333333333333333, '#31688e'], [0.4444444444444444, '#26828e'], [0.5555555555555556, '#1f9e89'], [0.6666666666666666, '#35b779'], [0.7777777777777778, '#6ece58'], [0.8888888888888888, '#b5de2b'], [1.0, '#fde725']], 'x': ['2015', '2016', '2017', '2018', '2019', '2020', '2021'], 'y': ['AL', 'FL', 'GA', 'ID', 'MA', 'MS', 'NH', 'NM', 'OR', 'TN', 'TX', 'Unknown', 'VT', 'WA'], 'z': {'dtype': 'f8', 'bdata': 'CUGzujsREEC6qzon4E4QQASoeXPv7RBAtbw1WioNEUDyOxlqsTYRQAYNdQgITRFAHkIdDndEEUAAAAAAAAASQAAAAAAAoBJAoosuuujCEUCtIxvcu80QQD3P8zzPMxJAeHd3d3d3E0B00UUXXTQRQKuqqqqqqhJAj+M4juM4DUAJATzU+u8RQHVjUIxLnw1AJUNdFgXdEUDLnHeMJNERQA6KmcHMhBFAAAAAAAAA+H8AAAAAAAD4fwAAAAAAABJAAAAAAAAAE0AAAAAAAAASQAAAAAAAABNAVlVVVVVVE0AAAAAAAAD4fwAAAAAAAPA/AAAAAAAADUAzMzMzMzMRQAAAAAAAABJAq6qqqqqqEkAAAAAAAAAQQGzBRfpH4Q9A/PkaAn6PEEAqqNnmYpIQQLpJUuBbABFASHgJALYYEUBptyWxvCgRQPCbDYkV8hBA

{'application/vnd.plotly.v1+json': {'data': [{'domain': {'x': [0.0, 1.0], 'y': [0.0, 1.0]}, 'hovertemplate': 'state=%{label}<extra></extra>', 'labels': ['NM', 'MS', 'NH', 'TX', 'ID', 'NM', 'GA', 'NH', 'TX', 'WA', 'Unknown', 'FL', 'Unknown', 'GA', 'OR', 'MS', 'WA', 'Unknown', 'GA', 'WA', 'AL', 'Unknown', 'WA', 'NM', 'NM', 'AL', 'NH', 'MS', 'VT', 'AL', 'TN', 'GA', 'NH', 'FL', 'OR', 'WA', 'GA', 'GA', 'AL', 'TN', 'OR', 'TN', 'NM', 'TX', 'NM', 'WA', 'AL', 'Unknown', 'OR', 'NH', 'MS', 'AL', 'AL', 'Unknown', 'WA', 'Unknown', 'NH', 'MS', 'NH', 'TX', 'NM', 'GA', 'AL', 'NM', 'OR', 'TX', 'NM', 'TX', 'Unknown', 'WA', 'TX', 'TN', 'NH', 'TX', 'Unknown', 'FL', 'AL', 'FL', 'Unknown', 'Unknown', 'NH', 'TX', 'GA', 'MA', 'WA', 'WA', 'MS', 'NM', 'OR', 'AL', 'TX', 'TX', 'MS', 'ID', 'NM', 'MA', 'OR', 'WA', 'MS', 'MA', 'GA', 'WA', 'OR', 'NM', 'Unknown', 'WA', 'FL', 'WA', 'Unknown', 'GA', 'MS', 'AL', 'NM', 'TN', 'AL', 'AL', 'MA', 'OR', 'NM', 'OR', 'AL', 'AL', 'TN', 'TX', 'OR', 'TN', 'AL', 'OR', 'Unknown', 'AL

✅ All charts generated successfully.
