## TRDE703 Atelier Int√©gration des Donn√©es

In [1]:
import sys
import os
from pathlib import Path

current_dir = Path(os.getcwd())
project_root = current_dir.parent if current_dir.name == "etl" else current_dir

if str(project_root) not in sys.path:
    sys.path.append(str(project_root))

print(f"‚úÖ Racine ajout√©e au path : {project_root}")

from pyspark.sql import SparkSession
from etl.shared.config import SPARK_CONFIG

%load_ext autoreload
%autoreload 2

builder = SparkSession.builder
for key, val in SPARK_CONFIG.items():
    builder = builder.config(key, val)

spark = builder.getOrCreate()

print(f"‚úÖ Session Spark cr√©√©e avec le JAR : {SPARK_CONFIG.get('spark.jars')}")

‚úÖ Racine ajout√©e au path : /Users/cedricsanchez/Master1/Cours/integration_donnees_TP


Using Spark's default log4j profile: org/apache/spark/log4j2-defaults.properties
26/01/20 09:36:14 WARN Utils: Your hostname, MacBook-Pro-de-Cedric.local, resolves to a loopback address: 127.0.0.1; using 10.101.116.72 instead (on interface en0)
26/01/20 09:36:14 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
26/01/20 09:36:14 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Using Spark's default log4j profile: org/apache/spark/log4j2-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).


‚úÖ Session Spark cr√©√©e avec le JAR : /Users/cedricsanchez/Master1/Cours/integration_donnees_TP/jars/mysql-connector-j-9.1.0.jar


In [2]:
json_filepath = str(project_root / "data" / "raw" / "openfoodfacts-products.jsonl")

print(f"üìÇ Fichier cible : {json_filepath}")

if os.path.exists(json_filepath):
    print("‚úÖ Le fichier existe bien.")
else:
    print("‚ùå Fichier introuvable. V√©rifie le dossier data/raw/")

üìÇ Fichier cible : /Users/cedricsanchez/Master1/Cours/integration_donnees_TP/data/raw/openfoodfacts-products.jsonl
‚úÖ Le fichier existe bien.


### üí° Un mot sur nos choix (et les consignes du TP)

Pourquoi s'emb√™ter √† √©crire ce sch√©ma manuellement ?

1.  **Respect de la consigne :** Le sujet est strict : *"Lecture JSON/CSV avec sch√©ma explicite (pas d'inf√©rence magique en prod)"*. Utiliser `inferSchema=True` nous ferait perdre des points.
2.  **Gestion de l'Historique (SCD2) :** Le sujet impose de g√©rer le *"SCD2 produit"*. Pour cela, nous avons imp√©rativement besoin du timestamp brut (`last_modified_t` en `LongType`) pour comparer les versions √† la seconde pr√®s.
3.  **Structure Imbriqu√©e :** Le format JSONL groupe les nutriments dans un objet. Notre sch√©ma refl√®te cette r√©alit√© (`StructType` imbriqu√©) pour √©viter de cr√©er 1000 colonnes plates inutiles.
4.  **S√©curit√© (`String`) :** Pour des champs instables comme `nova_group`, on lit en `String` pour √©viter que Spark ne rejette la ligne en cas de format inattendu.

In [3]:
from pyspark.sql.types import StructType, StructField, StringType, FloatType, IntegerType, LongType, ArrayType

def get_jsonl_schema():
    """
    Sch√©ma robuste pour l'ingestion JSONL.
    G√®re les types imbriqu√©s (nested) propres √† MongoDB/JSON.
    """

    nutriments_schema = StructType([
        StructField("energy-kcal_100g", FloatType(), True),
        StructField("sugars_100g", FloatType(), True),
        StructField("salt_100g", FloatType(), True),
        StructField("sodium_100g", FloatType(), True),
        StructField("fiber_100g", FloatType(), True),
        StructField("proteins_100g", FloatType(), True)
    ])

    return StructType([
        # --- Identifiants & M√©tadonn√©es ---
        StructField("code", StringType(), True),
        StructField("product_name", StringType(), True),
        StructField("last_modified_t", LongType(), True),
        StructField("created_t", LongType(), True),

        # --- Dimensions (Marques, Cat√©gories...) ---
        StructField("brands", StringType(), True),
        StructField("categories", StringType(), True),
        StructField("countries_tags", ArrayType(StringType()), True),

        # --- Qualit√© & Scores ---
        StructField("nutriscore_grade", StringType(), True),
        StructField("nova_group", IntegerType(), True),
        StructField("ecoscore_grade", StringType(), True),

        # --- Mesures (Imbriqu√©es) ---
        StructField("nutriments", nutriments_schema, True)
    ])

print("‚úÖ Sch√©ma JSONL d√©fini.")

‚úÖ Sch√©ma JSONL d√©fini.


In [4]:
# Adapte le nom du fichier si n√©cessaire
input_file = "openfoodfacts-products.jsonl"
raw_path = str(project_root / "data" / "raw" / input_file)

print(f"üìÇ Pr√©paration de la lecture : {raw_path}")

try:
    df_raw = spark.read \
        .schema(get_jsonl_schema()) \
        .json(raw_path)

    print("‚úÖ Lecture configur√©e (Lazy). Le chargement r√©el se fera apr√®s le sampling.")


    count = df_raw.count()
    print(f"üìä Nombre de produits ing√©r√©s : {count:,}")

    df_raw.printSchema()

except Exception as e:
    print(f"‚ùå Erreur de lecture : {e}")

üìÇ Pr√©paration de la lecture : /Users/cedricsanchez/Master1/Cours/integration_donnees_TP/data/raw/openfoodfacts-products.jsonl
‚úÖ Lecture configur√©e (Lazy). Le chargement r√©el se fera apr√®s le sampling.




üìä Nombre de produits ing√©r√©s : 4,247,844
root
 |-- code: string (nullable = true)
 |-- product_name: string (nullable = true)
 |-- last_modified_t: long (nullable = true)
 |-- created_t: long (nullable = true)
 |-- brands: string (nullable = true)
 |-- categories: string (nullable = true)
 |-- countries_tags: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- nutriscore_grade: string (nullable = true)
 |-- nova_group: integer (nullable = true)
 |-- ecoscore_grade: string (nullable = true)
 |-- nutriments: struct (nullable = true)
 |    |-- energy-kcal_100g: float (nullable = true)
 |    |-- sugars_100g: float (nullable = true)
 |    |-- salt_100g: float (nullable = true)
 |    |-- sodium_100g: float (nullable = true)
 |    |-- fiber_100g: float (nullable = true)
 |    |-- proteins_100g: float (nullable = true)



                                                                                

## üßπ √âtape 2 : Transformation "Silver" (Nettoyage & Typage)

Maintenant que les donn√©es brutes sont charg√©es, nous devons les rendre utilisables pour l'analyse et le SCD2.
Cette √©tape applique les r√®gles de qualit√© demand√©es :

1.  **Typage Temporel :** Conversion des timestamps UNIX (`Long`) en vraies dates (`Timestamp`) pour `last_modified_t` et `created_t`.
2.  **Nettoyage Textuel :** Suppression des espaces superflus (`trim`) sur les codes-barres et noms.
3.  **Extraction des Nutriments :** Aplatissement de la structure imbriqu√©e `nutriments` pour faciliter les requ√™tes SQL futures.
4.  **Gestion des Nulls :** Conversion s√©curis√©e de `nova_group` (texte vers entier) et filtrage des produits sans code-barre.

In [5]:
from pyspark.sql.functions import col, trim, from_unixtime, to_timestamp, when

print("‚è≥ D√©marrage du nettoyage Silver avec Sampling...")

# On ne garde que 10% des donn√©es (environ 400k lignes) pour sauver le disque dur.
# seed=42 permet d'avoir toujours les m√™mes 10% si on relances.
df_sampled = df_raw.sample(withReplacement=False, fraction=0.1, seed=42)

df_silver = df_sampled \
    .select(
        # --- 1. Nettoyage des Cl√©s & Textes ---
        trim(col("code")).alias("code"),
        trim(col("product_name")).alias("product_name"),

        # --- 2. Gestion Temporelle ---
        from_unixtime(col("last_modified_t")).cast("timestamp").alias("last_modified_ts"),
        from_unixtime(col("created_t")).cast("timestamp").alias("created_ts"),

        # --- 3. Normalisation des Dimensions ---
        col("countries_tags"),
        trim(col("brands")).alias("brands"),
        trim(col("categories")).alias("categories"),

        # --- 4. Qualit√© & Scores ---
        trim(col("nutriscore_grade")).alias("nutriscore_grade"),
        trim(col("ecoscore_grade")).alias("ecoscore_grade"),
        col("nova_group").cast("integer").alias("nova_group"),

        # --- 5. Nutriments ---
        col("nutriments.energy-kcal_100g").alias("energy_kcal_100g"),
        col("nutriments.sugars_100g").alias("sugars_100g"),
        col("nutriments.salt_100g").alias("salt_100g"),
        col("nutriments.proteins_100g").alias("proteins_100g")
    ) \
    .filter(col("code").isNotNull()) \
    .filter(col("code") != "")

# On met en cache ce petit √©chantillon
df_silver.cache()

count = df_silver.count()
print(f"‚úÖ Nettoyage termin√© sur l'√©chantillon. Produits restants : {count:,}")
print("(C'est normal d'en avoir moins, on a pris 10% volontairement !)")

display(df_silver.select("code", "last_modified_ts", "product_name").limit(5))

‚è≥ D√©marrage du nettoyage Silver avec Sampling...




‚úÖ Nettoyage termin√© sur l'√©chantillon. Produits restants : 425,588
(C'est normal d'en avoir moins, on a pris 10% volontairement !)


                                                                                

DataFrame[code: string, last_modified_ts: timestamp, product_name: string]

## üîê √âtape 3 : Fingerprinting (Pr√©paration SCD2)

Pour g√©rer l'historique (SCD2) efficacement, nous ne pouvons pas comparer toutes les colonnes √† chaque fois.
Nous allons g√©n√©rer un **Hash Technique (`row_hash`)** : une empreinte digitale unique bas√©e sur les colonnes m√©tier.

* **Strat√©gie :** On concat√®ne toutes les colonnes importantes (Nom, Marque, Nutriscore, Sucre...) et on applique un hachage SHA-256.
* **Int√©r√™t :** Si le hash change, cela signifie que le produit a √©t√© modifi√©. C'est ce qui d√©clenchera la cr√©ation d'une nouvelle version dans le Datamart.

In [6]:
from pyspark.sql.functions import sha2, concat_ws, col

print("‚è≥ Calcul du Hash (Fingerprint) pour chaque produit...")

columns_to_hash = [
    "product_name", "brands", "categories", "countries_tags",
    "nutriscore_grade", "nova_group", "ecoscore_grade",
    "energy_kcal_100g", "sugars_100g", "salt_100g", "proteins_100g"
]

df_hashed = df_silver.withColumn(
    "row_hash",
    sha2(concat_ws("||", *[col(c) for c in columns_to_hash]), 256)
)

print("‚úÖ Hashing termin√©.")
display(df_hashed.select("code", "product_name", "row_hash").limit(5))

‚è≥ Calcul du Hash (Fingerprint) pour chaque produit...
‚úÖ Hashing termin√©.


DataFrame[code: string, product_name: string, row_hash: string]

## üè≠ √âtape 4 : Initialisation du Datamart (DDL)

Avant de charger les donn√©es, nous devons cr√©er la structure des tables dans MySQL.
Nous utilisons une connexion Python directe (hors Spark) pour d√©finir pr√©cis√©ment :
1.  **Les Cl√©s Primaires (PK) :** `product_sk` (Auto-incr√©ment) pour identifier unique une *version* de produit.
2.  **Les Index :** Sur `code` et `row_hash` pour que les recherches (Join/Upsert) soient instantan√©es.
3.  **Les Colonnes SCD2 :** `effective_from` (d√©but), `effective_to` (fin), `is_current` (actif).

In [7]:
import mysql.connector

def init_datamart():
    print("‚è≥ Initialisation et Tuning MySQL...")

    conn = mysql.connector.connect(
        host="localhost",
        port=3306,
        user=MYSQL_CONFIG["user"],
        password=MYSQL_CONFIG["password"],
        database="openfoodfacts"
    )
    cursor = conn.cursor()

    # --- TUNING ---
    cursor.execute("SET GLOBAL max_allowed_packet=67108864")

    # --- NETTOYAGE ---
    cursor.execute("DROP TABLE IF EXISTS fact_nutrition_snapshot")
    cursor.execute("DROP TABLE IF EXISTS dim_product")

    # --- DIMENSION (Pas de changement) ---
    product_ddl = """
    CREATE TABLE dim_product (
        product_sk INT AUTO_INCREMENT PRIMARY KEY,
        code VARCHAR(255) NOT NULL,
        product_name TEXT,
        brands TEXT,
        categories TEXT,
        row_hash CHAR(64) NOT NULL,
        effective_from DATETIME,
        effective_to DATETIME,
        is_current BOOLEAN,
        INDEX idx_code (code),
        INDEX idx_hash (row_hash)
    ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
    """
    cursor.execute(product_ddl)
    print("   - Table 'dim_product' cr√©√©e.")

    fact_ddl = """
    CREATE TABLE fact_nutrition_snapshot (
        fact_sk INT AUTO_INCREMENT PRIMARY KEY,
        product_sk INT NOT NULL,
        date_sk INT NOT NULL,

        nutriscore_grade VARCHAR(50),
        ecoscore_grade VARCHAR(50),
        nova_group INT,
        energy_kcal_100g FLOAT,
        sugars_100g FLOAT,
        salt_100g FLOAT,
        proteins_100g FLOAT,

        FOREIGN KEY (product_sk) REFERENCES dim_product(product_sk)
    ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
    """
    cursor.execute(fact_ddl)
    print("   - Table 'fact_nutrition_snapshot' cr√©√©e (avec VARCHAR).")

    conn.close()
    print("‚úÖ Datamart pr√™t.")

try:
    init_datamart()
except Exception as e:
    print(f"‚ùå Erreur MySQL : {e}")

‚è≥ Initialisation et Tuning MySQL...
‚ùå Erreur MySQL : name 'MYSQL_CONFIG' is not defined


## üöö √âtape 5 : Chargement de la Dimension Produit (Initial Load)

Nous s√©parons les donn√©es en deux flux :
1.  **Dimension (`dim_product`) :** Contient les descriptions et l'historique.
2.  **Faits (`fact_nutrition_snapshot`) :** Contient les chiffres.

Ici, nous chargeons la dimension.
* **Transformation :** On ne garde que les colonnes descriptives.
* **Initialisation SCD2 :** Comme c'est le premier chargement, on fixe :
    * `effective_from` = La date de modification du produit (`last_modified_ts`).
    * `effective_to` = '9999-12-31' (Date infinie = produit actif).
    * `is_current` = True.
* **√âcriture JDBC :** On pousse vers MySQL en mode `append`.

In [8]:
from pyspark.sql.functions import lit
from etl.shared.config import MYSQL_CONFIG

print("‚è≥ Pr√©paration de la dimension Produit...")

# S√©lection finale
df_dim_product_init = df_hashed.select(
    col("code"),
    col("product_name"),
    col("brands"),
    col("categories"),
    col("row_hash"),
    col("last_modified_ts").alias("effective_from"),
    lit("9999-12-31 23:59:59").cast("timestamp").alias("effective_to"),
    lit(True).alias("is_current")
)

# Config JDBC optimis√©e
jdbc_url = MYSQL_CONFIG["url"]
jdbc_props = {
    "user": MYSQL_CONFIG["user"],
    "password": MYSQL_CONFIG["password"],
    "driver": MYSQL_CONFIG["driver"],

    # On r√©duit √† 1000 pour m√©nager le r√©seau et la m√©moire
    "batchsize": "1000"
}

print("üöÄ √âcriture dans MySQL (dim_product)...")

try:
    df_dim_product_init.write \
        .jdbc(url=jdbc_url, table="dim_product", mode="append", properties=jdbc_props)

    print("‚úÖ Chargement termin√© avec succ√®s !")

except Exception as e:
    print(f"‚ùå Erreur d'√©criture : {e}")

‚è≥ Pr√©paration de la dimension Produit...
üöÄ √âcriture dans MySQL (dim_product)...
‚ùå Erreur d'√©criture : An error occurred while calling o212.jdbc.
: com.mysql.cj.jdbc.exceptions.CommunicationsException: Communications link failure

The last packet sent successfully to the server was 0 milliseconds ago. The driver has not received any packets from the server.
	at com.mysql.cj.jdbc.exceptions.SQLError.createCommunicationsException(SQLError.java:165)
	at com.mysql.cj.jdbc.exceptions.SQLExceptionsMapping.translateException(SQLExceptionsMapping.java:55)
	at com.mysql.cj.jdbc.ConnectionImpl.createNewIO(ConnectionImpl.java:837)
	at com.mysql.cj.jdbc.ConnectionImpl.<init>(ConnectionImpl.java:420)
	at com.mysql.cj.jdbc.ConnectionImpl.getInstance(ConnectionImpl.java:238)
	at com.mysql.cj.jdbc.NonRegisteringDriver.connect(NonRegisteringDriver.java:180)
	at org.apache.spark.sql.execution.datasources.jdbc.connection.BasicConnectionProvider.getConnection(BasicConnectionProvider.scala:50)
	at

## üìä √âtape 6 : Chargement de la Table de Faits (Fact Table)

C'est l'√©tape finale. Nous devons charger les mesures (sucre, sel, nutriscore...) dans `fact_nutrition_snapshot`.
**Le d√©fi :** La table de faits a besoin de la cl√© √©trang√®re `product_sk`. Or, cette cl√© a √©t√© g√©n√©r√©e par MySQL (Auto-increment) √† l'√©tape pr√©c√©dente. Spark ne la connait pas.

**Strat√©gie :**
1.  **Lecture (Lookup) :** On lit la table `dim_product` depuis MySQL pour r√©cup√©rer le couple `(code, product_sk)`.
2.  **Jointure :** On joint ces IDs avec notre DataFrame Spark (`df_hashed`) sur le code-barre.
3.  **Calcul Date Key :** On transforme la date en entier `YYYYMMDD` (ex: `20230520`) pour la cl√© de temps `date_sk`.
4.  **√âcriture :** On ins√®re les lignes dans `fact_nutrition_snapshot`.

In [9]:
from pyspark.sql.functions import date_format

print("‚è≥ Chargement des Faits : R√©cup√©ration des IDs MySQL...")

# 1. On relit la dimension depuis MySQL pour avoir les product_sk g√©n√©r√©s
df_dim_mysql = spark.read \
    .format("jdbc") \
    .option("url", MYSQL_CONFIG["url"]) \
    .option("dbtable", "dim_product") \
    .option("user", MYSQL_CONFIG["user"]) \
    .option("password", MYSQL_CONFIG["password"]) \
    .option("driver", MYSQL_CONFIG["driver"]) \
    .load() \
    .select("product_sk", "code")

# 2. Jointure : Spark (Donn√©es) + MySQL (IDs)
df_facts = df_hashed.join(df_dim_mysql, on="code", how="inner")

# 3. Pr√©paration finale des colonnes de la Fact Table
df_facts_final = df_facts.select(
    col("product_sk"), # La cl√© √©trang√®re r√©cup√©r√©e de MySQL

    # Cr√©ation d'une cl√© de date simple (YYYYMMDD) bas√©e sur la date de modif
    date_format(col("last_modified_ts"), "yyyyMMdd").cast("integer").alias("date_sk"),

    # Les Mesures
    col("nutriscore_grade"),
    col("ecoscore_grade"),
    col("nova_group"),
    col("energy_kcal_100g"),
    col("sugars_100g"),
    col("salt_100g"),
    col("proteins_100g")
)

print(f"üì¶ Pr√™t √† charger {df_facts_final.count():,} lignes de faits.")

# 4. √âcriture dans MySQL
print("üöÄ √âcriture dans 'fact_nutrition_snapshot'...")
try:
    df_facts_final.write \
        .jdbc(
            url=MYSQL_CONFIG["url"],
            table="fact_nutrition_snapshot",
            mode="append",
            properties=jdbc_props
        )
    print("‚úÖ TERMINE ! Le Datamart est complet (Dimensions + Faits).")

except Exception as e:
    print(f"‚ùå Erreur √©criture Faits : {e}")

‚è≥ Chargement des Faits : R√©cup√©ration des IDs MySQL...


Py4JJavaError: An error occurred while calling o225.load.
: com.mysql.cj.jdbc.exceptions.CommunicationsException: Communications link failure

The last packet sent successfully to the server was 0 milliseconds ago. The driver has not received any packets from the server.
	at com.mysql.cj.jdbc.exceptions.SQLError.createCommunicationsException(SQLError.java:165)
	at com.mysql.cj.jdbc.exceptions.SQLExceptionsMapping.translateException(SQLExceptionsMapping.java:55)
	at com.mysql.cj.jdbc.ConnectionImpl.createNewIO(ConnectionImpl.java:837)
	at com.mysql.cj.jdbc.ConnectionImpl.<init>(ConnectionImpl.java:420)
	at com.mysql.cj.jdbc.ConnectionImpl.getInstance(ConnectionImpl.java:238)
	at com.mysql.cj.jdbc.NonRegisteringDriver.connect(NonRegisteringDriver.java:180)
	at org.apache.spark.sql.execution.datasources.jdbc.connection.BasicConnectionProvider.getConnection(BasicConnectionProvider.scala:50)
	at org.apache.spark.sql.execution.datasources.jdbc.connection.ConnectionProviderBase.create(ConnectionProvider.scala:102)
	at org.apache.spark.sql.jdbc.JdbcDialect.$anonfun$createConnectionFactory$1(JdbcDialects.scala:233)
	at org.apache.spark.sql.jdbc.JdbcDialect.$anonfun$createConnectionFactory$1$adapted(JdbcDialects.scala:229)
	at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$.withConnection(JdbcUtils.scala:1318)
	at org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD$.resolveTable(JDBCRDD.scala:80)
	at org.apache.spark.sql.execution.datasources.jdbc.JDBCRelation$.getSchema(JDBCRelation.scala:247)
	at org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider.$anonfun$createRelation$1(JdbcRelationProvider.scala:41)
	at org.apache.spark.sql.execution.metric.SQLMetrics$.withTimingNs(SQLMetrics.scala:234)
	at org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider.createRelation(JdbcRelationProvider.scala:41)
	at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:364)
	at org.apache.spark.sql.catalyst.analysis.ResolveDataSource.org$apache$spark$sql$catalyst$analysis$ResolveDataSource$$loadV1BatchSource(ResolveDataSource.scala:143)
	at org.apache.spark.sql.catalyst.analysis.ResolveDataSource$$anonfun$apply$1.$anonfun$applyOrElse$2(ResolveDataSource.scala:61)
	at scala.Option.getOrElse(Option.scala:201)
	at org.apache.spark.sql.catalyst.analysis.ResolveDataSource$$anonfun$apply$1.applyOrElse(ResolveDataSource.scala:61)
	at org.apache.spark.sql.catalyst.analysis.ResolveDataSource$$anonfun$apply$1.applyOrElse(ResolveDataSource.scala:45)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsUpWithPruning$3(AnalysisHelper.scala:139)
	at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:107)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsUpWithPruning$1(AnalysisHelper.scala:139)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.allowInvokingTransformsInAnalyzer(AnalysisHelper.scala:416)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsUpWithPruning(AnalysisHelper.scala:135)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsUpWithPruning$(AnalysisHelper.scala:131)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsUpWithPruning(LogicalPlan.scala:37)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsUp(AnalysisHelper.scala:112)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsUp$(AnalysisHelper.scala:111)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsUp(LogicalPlan.scala:37)
	at org.apache.spark.sql.catalyst.analysis.ResolveDataSource.apply(ResolveDataSource.scala:45)
	at org.apache.spark.sql.catalyst.analysis.ResolveDataSource.apply(ResolveDataSource.scala:43)
	at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$2(RuleExecutor.scala:248)
	at scala.collection.LinearSeqOps.foldLeft(LinearSeq.scala:183)
	at scala.collection.LinearSeqOps.foldLeft$(LinearSeq.scala:179)
	at scala.collection.immutable.List.foldLeft(List.scala:79)
	at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1(RuleExecutor.scala:245)
	at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1$adapted(RuleExecutor.scala:237)
	at scala.collection.immutable.List.foreach(List.scala:323)
	at org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:237)
	at org.apache.spark.sql.catalyst.analysis.Analyzer.org$apache$spark$sql$catalyst$analysis$Analyzer$$executeSameContext(Analyzer.scala:343)
	at org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$execute$1(Analyzer.scala:339)
	at org.apache.spark.sql.catalyst.analysis.AnalysisContext$.withNewAnalysisContext(Analyzer.scala:224)
	at org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:339)
	at org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:289)
	at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$executeAndTrack$1(RuleExecutor.scala:207)
	at org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:89)
	at org.apache.spark.sql.catalyst.rules.RuleExecutor.executeAndTrack(RuleExecutor.scala:207)
	at org.apache.spark.sql.catalyst.analysis.resolver.HybridAnalyzer.resolveInFixedPoint(HybridAnalyzer.scala:236)
	at org.apache.spark.sql.catalyst.analysis.resolver.HybridAnalyzer.$anonfun$apply$1(HybridAnalyzer.scala:91)
	at org.apache.spark.sql.catalyst.analysis.resolver.HybridAnalyzer.withTrackedAnalyzerBridgeState(HybridAnalyzer.scala:122)
	at org.apache.spark.sql.catalyst.analysis.resolver.HybridAnalyzer.apply(HybridAnalyzer.scala:84)
	at org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$executeAndCheck$1(Analyzer.scala:322)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:423)
	at org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:322)
	at org.apache.spark.sql.execution.QueryExecution.$anonfun$lazyAnalyzed$2(QueryExecution.scala:139)
	at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:148)
	at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$2(QueryExecution.scala:330)
	at org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:717)
	at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:330)
	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:804)
	at org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:329)
	at org.apache.spark.sql.execution.QueryExecution.$anonfun$lazyAnalyzed$1(QueryExecution.scala:139)
	at scala.util.Try$.apply(Try.scala:217)
	at org.apache.spark.util.Utils$.doTryWithCallerStacktrace(Utils.scala:1392)
	at org.apache.spark.util.Utils$.getTryWithCallerStacktrace(Utils.scala:1453)
	at org.apache.spark.util.LazyTry.get(LazyTry.scala:58)
	at org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:150)
	at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:90)
	at org.apache.spark.sql.classic.Dataset$.$anonfun$ofRows$1(Dataset.scala:114)
	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:804)
	at org.apache.spark.sql.classic.Dataset$.ofRows(Dataset.scala:112)
	at org.apache.spark.sql.classic.DataFrameReader.load(DataFrameReader.scala:108)
	at org.apache.spark.sql.classic.DataFrameReader.load(DataFrameReader.scala:91)
	at org.apache.spark.sql.classic.DataFrameReader.load(DataFrameReader.scala:57)
	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77)
	at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.base/java.lang.reflect.Method.invoke(Method.java:568)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)
	at py4j.Gateway.invoke(Gateway.java:282)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:184)
	at py4j.ClientServerConnection.run(ClientServerConnection.java:108)
	at java.base/java.lang.Thread.run(Thread.java:840)
	Suppressed: org.apache.spark.util.Utils$OriginalTryStackTraceException: Full stacktrace of original doTryWithCallerStacktrace caller
		at com.mysql.cj.jdbc.exceptions.SQLError.createCommunicationsException(SQLError.java:165)
		at com.mysql.cj.jdbc.exceptions.SQLExceptionsMapping.translateException(SQLExceptionsMapping.java:55)
		at com.mysql.cj.jdbc.ConnectionImpl.createNewIO(ConnectionImpl.java:837)
		at com.mysql.cj.jdbc.ConnectionImpl.<init>(ConnectionImpl.java:420)
		at com.mysql.cj.jdbc.ConnectionImpl.getInstance(ConnectionImpl.java:238)
		at com.mysql.cj.jdbc.NonRegisteringDriver.connect(NonRegisteringDriver.java:180)
		at org.apache.spark.sql.execution.datasources.jdbc.connection.BasicConnectionProvider.getConnection(BasicConnectionProvider.scala:50)
		at org.apache.spark.sql.execution.datasources.jdbc.connection.ConnectionProviderBase.create(ConnectionProvider.scala:102)
		at org.apache.spark.sql.jdbc.JdbcDialect.$anonfun$createConnectionFactory$1(JdbcDialects.scala:233)
		at org.apache.spark.sql.jdbc.JdbcDialect.$anonfun$createConnectionFactory$1$adapted(JdbcDialects.scala:229)
		at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$.withConnection(JdbcUtils.scala:1318)
		at org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD$.resolveTable(JDBCRDD.scala:80)
		at org.apache.spark.sql.execution.datasources.jdbc.JDBCRelation$.getSchema(JDBCRelation.scala:247)
		at org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider.$anonfun$createRelation$1(JdbcRelationProvider.scala:41)
		at org.apache.spark.sql.execution.metric.SQLMetrics$.withTimingNs(SQLMetrics.scala:234)
		at org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider.createRelation(JdbcRelationProvider.scala:41)
		at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:364)
		at org.apache.spark.sql.catalyst.analysis.ResolveDataSource.org$apache$spark$sql$catalyst$analysis$ResolveDataSource$$loadV1BatchSource(ResolveDataSource.scala:143)
		at org.apache.spark.sql.catalyst.analysis.ResolveDataSource$$anonfun$apply$1.$anonfun$applyOrElse$2(ResolveDataSource.scala:61)
		at scala.Option.getOrElse(Option.scala:201)
		at org.apache.spark.sql.catalyst.analysis.ResolveDataSource$$anonfun$apply$1.applyOrElse(ResolveDataSource.scala:61)
		at org.apache.spark.sql.catalyst.analysis.ResolveDataSource$$anonfun$apply$1.applyOrElse(ResolveDataSource.scala:45)
		at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsUpWithPruning$3(AnalysisHelper.scala:139)
		at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:107)
		at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsUpWithPruning$1(AnalysisHelper.scala:139)
		at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.allowInvokingTransformsInAnalyzer(AnalysisHelper.scala:416)
		at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsUpWithPruning(AnalysisHelper.scala:135)
		at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsUpWithPruning$(AnalysisHelper.scala:131)
		at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsUpWithPruning(LogicalPlan.scala:37)
		at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsUp(AnalysisHelper.scala:112)
		at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsUp$(AnalysisHelper.scala:111)
		at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsUp(LogicalPlan.scala:37)
		at org.apache.spark.sql.catalyst.analysis.ResolveDataSource.apply(ResolveDataSource.scala:45)
		at org.apache.spark.sql.catalyst.analysis.ResolveDataSource.apply(ResolveDataSource.scala:43)
		at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$2(RuleExecutor.scala:248)
		at scala.collection.LinearSeqOps.foldLeft(LinearSeq.scala:183)
		at scala.collection.LinearSeqOps.foldLeft$(LinearSeq.scala:179)
		at scala.collection.immutable.List.foldLeft(List.scala:79)
		at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1(RuleExecutor.scala:245)
		at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1$adapted(RuleExecutor.scala:237)
		at scala.collection.immutable.List.foreach(List.scala:323)
		at org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:237)
		at org.apache.spark.sql.catalyst.analysis.Analyzer.org$apache$spark$sql$catalyst$analysis$Analyzer$$executeSameContext(Analyzer.scala:343)
		at org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$execute$1(Analyzer.scala:339)
		at org.apache.spark.sql.catalyst.analysis.AnalysisContext$.withNewAnalysisContext(Analyzer.scala:224)
		at org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:339)
		at org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:289)
		at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$executeAndTrack$1(RuleExecutor.scala:207)
		at org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:89)
		at org.apache.spark.sql.catalyst.rules.RuleExecutor.executeAndTrack(RuleExecutor.scala:207)
		at org.apache.spark.sql.catalyst.analysis.resolver.HybridAnalyzer.resolveInFixedPoint(HybridAnalyzer.scala:236)
		at org.apache.spark.sql.catalyst.analysis.resolver.HybridAnalyzer.$anonfun$apply$1(HybridAnalyzer.scala:91)
		at org.apache.spark.sql.catalyst.analysis.resolver.HybridAnalyzer.withTrackedAnalyzerBridgeState(HybridAnalyzer.scala:122)
		at org.apache.spark.sql.catalyst.analysis.resolver.HybridAnalyzer.apply(HybridAnalyzer.scala:84)
		at org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$executeAndCheck$1(Analyzer.scala:322)
		at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:423)
		at org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:322)
		at org.apache.spark.sql.execution.QueryExecution.$anonfun$lazyAnalyzed$2(QueryExecution.scala:139)
		at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:148)
		at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$2(QueryExecution.scala:330)
		at org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:717)
		at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:330)
		at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:804)
		at org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:329)
		at org.apache.spark.sql.execution.QueryExecution.$anonfun$lazyAnalyzed$1(QueryExecution.scala:139)
		at scala.util.Try$.apply(Try.scala:217)
		at org.apache.spark.util.Utils$.doTryWithCallerStacktrace(Utils.scala:1392)
		at org.apache.spark.util.LazyTry.tryT$lzycompute(LazyTry.scala:46)
		at org.apache.spark.util.LazyTry.tryT(LazyTry.scala:46)
		... 21 more
Caused by: com.mysql.cj.exceptions.CJCommunicationsException: Communications link failure

The last packet sent successfully to the server was 0 milliseconds ago. The driver has not received any packets from the server.
	at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:77)
	at java.base/jdk.internal.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.base/java.lang.reflect.Constructor.newInstanceWithCaller(Constructor.java:499)
	at java.base/java.lang.reflect.Constructor.newInstance(Constructor.java:480)
	at com.mysql.cj.exceptions.ExceptionFactory.createException(ExceptionFactory.java:52)
	at com.mysql.cj.exceptions.ExceptionFactory.createException(ExceptionFactory.java:95)
	at com.mysql.cj.exceptions.ExceptionFactory.createException(ExceptionFactory.java:140)
	at com.mysql.cj.exceptions.ExceptionFactory.createCommunicationsException(ExceptionFactory.java:156)
	at com.mysql.cj.protocol.a.NativeSocketConnection.connect(NativeSocketConnection.java:79)
	at com.mysql.cj.NativeSession.connect(NativeSession.java:142)
	at com.mysql.cj.jdbc.ConnectionImpl.connectOneTryOnly(ConnectionImpl.java:961)
	at com.mysql.cj.jdbc.ConnectionImpl.createNewIO(ConnectionImpl.java:825)
	at com.mysql.cj.jdbc.ConnectionImpl.<init>(ConnectionImpl.java:420)
	at com.mysql.cj.jdbc.ConnectionImpl.getInstance(ConnectionImpl.java:238)
	at com.mysql.cj.jdbc.NonRegisteringDriver.connect(NonRegisteringDriver.java:180)
	at org.apache.spark.sql.execution.datasources.jdbc.connection.BasicConnectionProvider.getConnection(BasicConnectionProvider.scala:50)
	at org.apache.spark.sql.execution.datasources.jdbc.connection.ConnectionProviderBase.create(ConnectionProvider.scala:102)
	at org.apache.spark.sql.jdbc.JdbcDialect.$anonfun$createConnectionFactory$1(JdbcDialects.scala:233)
	at org.apache.spark.sql.jdbc.JdbcDialect.$anonfun$createConnectionFactory$1$adapted(JdbcDialects.scala:229)
	at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$.withConnection(JdbcUtils.scala:1318)
	at org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD$.resolveTable(JDBCRDD.scala:80)
	at org.apache.spark.sql.execution.datasources.jdbc.JDBCRelation$.getSchema(JDBCRelation.scala:247)
	at org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider.$anonfun$createRelation$1(JdbcRelationProvider.scala:41)
	at org.apache.spark.sql.execution.metric.SQLMetrics$.withTimingNs(SQLMetrics.scala:234)
	at org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider.createRelation(JdbcRelationProvider.scala:41)
	at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:364)
	at org.apache.spark.sql.catalyst.analysis.ResolveDataSource.org$apache$spark$sql$catalyst$analysis$ResolveDataSource$$loadV1BatchSource(ResolveDataSource.scala:143)
	at org.apache.spark.sql.catalyst.analysis.ResolveDataSource$$anonfun$apply$1.$anonfun$applyOrElse$2(ResolveDataSource.scala:61)
	at scala.Option.getOrElse(Option.scala:201)
	at org.apache.spark.sql.catalyst.analysis.ResolveDataSource$$anonfun$apply$1.applyOrElse(ResolveDataSource.scala:61)
	at org.apache.spark.sql.catalyst.analysis.ResolveDataSource$$anonfun$apply$1.applyOrElse(ResolveDataSource.scala:45)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsUpWithPruning$3(AnalysisHelper.scala:139)
	at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:107)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsUpWithPruning$1(AnalysisHelper.scala:139)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.allowInvokingTransformsInAnalyzer(AnalysisHelper.scala:416)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsUpWithPruning(AnalysisHelper.scala:135)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsUpWithPruning$(AnalysisHelper.scala:131)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsUpWithPruning(LogicalPlan.scala:37)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsUp(AnalysisHelper.scala:112)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsUp$(AnalysisHelper.scala:111)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsUp(LogicalPlan.scala:37)
	at org.apache.spark.sql.catalyst.analysis.ResolveDataSource.apply(ResolveDataSource.scala:45)
	at org.apache.spark.sql.catalyst.analysis.ResolveDataSource.apply(ResolveDataSource.scala:43)
	at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$2(RuleExecutor.scala:248)
	at scala.collection.LinearSeqOps.foldLeft(LinearSeq.scala:183)
	at scala.collection.LinearSeqOps.foldLeft$(LinearSeq.scala:179)
	at scala.collection.immutable.List.foldLeft(List.scala:79)
	at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1(RuleExecutor.scala:245)
	at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1$adapted(RuleExecutor.scala:237)
	at scala.collection.immutable.List.foreach(List.scala:323)
	at org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:237)
	at org.apache.spark.sql.catalyst.analysis.Analyzer.org$apache$spark$sql$catalyst$analysis$Analyzer$$executeSameContext(Analyzer.scala:343)
	at org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$execute$1(Analyzer.scala:339)
	at org.apache.spark.sql.catalyst.analysis.AnalysisContext$.withNewAnalysisContext(Analyzer.scala:224)
	at org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:339)
	at org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:289)
	at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$executeAndTrack$1(RuleExecutor.scala:207)
	at org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:89)
	at org.apache.spark.sql.catalyst.rules.RuleExecutor.executeAndTrack(RuleExecutor.scala:207)
	at org.apache.spark.sql.catalyst.analysis.resolver.HybridAnalyzer.resolveInFixedPoint(HybridAnalyzer.scala:236)
	at org.apache.spark.sql.catalyst.analysis.resolver.HybridAnalyzer.$anonfun$apply$1(HybridAnalyzer.scala:91)
	at org.apache.spark.sql.catalyst.analysis.resolver.HybridAnalyzer.withTrackedAnalyzerBridgeState(HybridAnalyzer.scala:122)
	at org.apache.spark.sql.catalyst.analysis.resolver.HybridAnalyzer.apply(HybridAnalyzer.scala:84)
	at org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$executeAndCheck$1(Analyzer.scala:322)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:423)
	at org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:322)
	at org.apache.spark.sql.execution.QueryExecution.$anonfun$lazyAnalyzed$2(QueryExecution.scala:139)
	at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:148)
	at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$2(QueryExecution.scala:330)
	at org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:717)
	at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:330)
	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:804)
	at org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:329)
	at org.apache.spark.sql.execution.QueryExecution.$anonfun$lazyAnalyzed$1(QueryExecution.scala:139)
	at scala.util.Try$.apply(Try.scala:217)
	at org.apache.spark.util.Utils$.doTryWithCallerStacktrace(Utils.scala:1392)
	at org.apache.spark.util.LazyTry.tryT$lzycompute(LazyTry.scala:46)
	at org.apache.spark.util.LazyTry.tryT(LazyTry.scala:46)
	... 21 more
Caused by: java.net.ConnectException: Connection refused
	at java.base/sun.nio.ch.Net.connect0(Native Method)
	at java.base/sun.nio.ch.Net.connect(Net.java:579)
	at java.base/sun.nio.ch.Net.connect(Net.java:568)
	at java.base/sun.nio.ch.NioSocketImpl.connect(NioSocketImpl.java:593)
	at java.base/java.net.SocksSocketImpl.connect(SocksSocketImpl.java:327)
	at java.base/java.net.Socket.connect(Socket.java:633)
	at com.mysql.cj.protocol.StandardSocketFactory.connect(StandardSocketFactory.java:144)
	at com.mysql.cj.protocol.a.NativeSocketConnection.connect(NativeSocketConnection.java:53)
	... 90 more
