In [7]:
from pyspark.sql import SparkSession
import mysql.connector

# Step 1: Create Spark Session
spark = SparkSession.builder \
    .appName("MySQL JDBC Example") \
    .config("spark.jars", "/Users/mugesh_krishna/Downloads/mysql-connector-j-9.3.0/mysql-connector-j-9.3.0.jar") \
    .getOrCreate()

print("Spark Session created!")

jdbc_url = "jdbc:mysql://localhost:3306/samplebill"
properties = {
    "user": "root",
    "password": "mugesh585",
    "driver": "com.mysql.cj.jdbc.Driver"
}

# Step 2: Read billinfo table
df = spark.read.jdbc(url=jdbc_url, table="billinfo", properties=properties)
print("All rows from billinfo table:")
df.show()

# Step 3: Filter where ispackage = 'No' or 'no'
filtered_df = df.filter((df["ispackage"] == "No") | (df["ispackage"] == "no"))

# Get billids to update
id_list = [row["billid"] for row in filtered_df.select("billid").collect()]
print("IDs with ispackage = 'No':", id_list)

# Step 4: Update those billids in MySQL to ispackage = 'Yes'
if id_list:
    try:
        conn = mysql.connector.connect(
            host="localhost",
            user="root",
            password="mugesh585",
            database="samplebill"
        )
        cursor = conn.cursor()

        format_ids = ','.join(['%s'] * len(id_list))
        update_query = f"UPDATE billinfo SET ispackage = 'Yes' WHERE billid IN ({format_ids})"
        cursor.execute(update_query, id_list)
        conn.commit()

        print(f"Updated {cursor.rowcount} rows where ispackage was 'No'.")
    except Exception as e:
        print("Error updating MySQL:", e)
    finally:
        cursor.close()
        conn.close()
else:
    print("No records found where ispackage = 'No'.")




repli_df = spark.read.jdbc(url=jdbc_url, table="billrepli", properties=properties)

repli_ids_df = repli_df.select("billid").distinct()


new_records_df = df.join(repli_ids_df, on="billid", how="left_anti")

print("New records to replicate from billinfo to billrepli:")
new_records_df.show()


new_records_to_insert = new_records_df.select(
    "billid","name", "origin", "desti", "billdate", "deliverydate", "ispackage"
)

new_records_to_insert.write.jdbc(
    url=jdbc_url,
    table="billrepli",
    mode="append",
    properties=properties
)
print(f"{new_records_to_insert.count()} new record(s) replicated to billrepli.")


Spark Session created!
All rows from billinfo table:
+------+-------------+---------+---------+----------+------------+---------+
|billid|         name|   origin|    desti|  billdate|deliverydate|ispackage|
+------+-------------+---------+---------+----------+------------+---------+
|   101|     Gopinath|  Chennai|  Denmark|2025-06-03|  2025-07-10|      Yes|
|   102|     Karthick|   Mumbai|Singapore|2025-06-03|  2025-07-15|      Yes|
|   103|Krishna Kumar|    Dubai|  Hamburg|2025-06-03|  2025-08-05|      Yes|
|   104|      Gowshik|  Denmark|   Mexico|2025-06-03|  2025-09-20|      Yes|
|   105|       Dinesh|Singapore|    Dubai|2025-06-04|  2025-09-12|      Yes|
|   106|        Muthu|    China|   Mumbai|2025-06-04|  2025-09-20|       No|
+------+-------------+---------+---------+----------+------------+---------+

IDs with ispackage = 'No': [106]
Updated 1 rows where ispackage was 'No'.
New records to replicate from billinfo to billrepli:
+------+-----+------+------+----------+----------

25/06/04 13:37:47 WARN HeartbeatReceiver: Removing executor driver with no recent heartbeats: 991856 ms exceeds timeout 120000 ms
25/06/04 13:37:47 WARN SparkContext: Killing executors is not supported by current scheduler.
25/06/04 13:47:30 ERROR Inbox: Ignoring error
org.apache.spark.SparkException: Exception thrown in awaitResult: 
	at org.apache.spark.util.SparkThreadUtils$.awaitResult(SparkThreadUtils.scala:53)
	at org.apache.spark.util.ThreadUtils$.awaitResult(ThreadUtils.scala:342)
	at org.apache.spark.rpc.RpcTimeout.awaitResult(RpcTimeout.scala:75)
	at org.apache.spark.rpc.RpcEnv.setupEndpointRefByURI(RpcEnv.scala:102)
	at org.apache.spark.rpc.RpcEnv.setupEndpointRef(RpcEnv.scala:110)
	at org.apache.spark.util.RpcUtils$.makeDriverRef(RpcUtils.scala:36)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.driverEndpoint$lzycompute(BlockManagerMasterEndpoint.scala:132)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.org$apache$spark$storage$BlockManagerMasterEndpoint$$

In [6]:
import mysql.connector
from datetime import datetime


try:
    conn = mysql.connector.connect(
        host="localhost",
        user="root",
        password="mugesh585",
        database="samplebill"
    )

    cursor = conn.cursor()
    print("Connected to MySQL database")

    # Getting inputs from user
    billid = int(input("Enter Bill ID (int): "))
    name = input("Enter Name: ")
    origin = input("Enter Origin: ")
    desti = input("Enter Destination: ")
    billdate = input("Enter Bill Date (YYYY-MM-DD): ")
    deliverydate = input("Enter Delivery Date (YYYY-MM-DD): ")
    ispackage = input("Is it a package? (Yes/No): ")

   
    insert_query = """
    INSERT INTO billinfo (billid, name, origin, desti, billdate, deliverydate, ispackage)
    VALUES (%s, %s, %s, %s, %s, %s, %s)
    """

   
    cursor.execute(insert_query, (billid, name, origin, desti, billdate, deliverydate, ispackage))
    conn.commit()

    print(" Data inserted successfully into 'billinfo' table.")

except mysql.connector.Error as err:
    print(" Error:", err)

finally:
    if conn.is_connected():
        cursor.close()
        conn.close()
        print(" MySQL connection closed.")


Connected to MySQL database


Enter Bill ID (int):  106
Enter Name:  Muthu
Enter Origin:  China
Enter Destination:  Mumbai
Enter Bill Date (YYYY-MM-DD):  2025-06-04
Enter Delivery Date (YYYY-MM-DD):  2025-09-20
Is it a package? (Yes/No):  No


 Data inserted successfully into 'billinfo' table.
 MySQL connection closed.
