In [2]:
import json
from sqlalchemy import create_engine
import pandas as pd
from urllib.parse import quote

# Load MySQL Config from JSON
CONFIG_FILEPATH = 'C:/Users/Ribish/Downloads/Karan/mysql.json'
with open(CONFIG_FILEPATH) as config_file:
    mysql_config = json.load(config_file)

# Extract MySQL credentials
HOST = mysql_config["HOST"]
PORT = mysql_config["PORT"]
USER = mysql_config["USER"]
PASSWORD = mysql_config["PASSWORD"]
DATABASE = mysql_config["DATABASE"]

# Function to connect to MySQL
def connecting_to_mysql(ip_address, port, username, password, database_name):
    db_connection_str = f"mysql+pymysql://{username}:{quote(password)}@{ip_address}:{port}/{database_name}"
    engine = create_engine(db_connection_str)
    return engine  # Return engine for running queries

# Connect to MySQL
engine = connecting_to_mysql(HOST, PORT, USER, PASSWORD, DATABASE)

# Define Queries
query1 = """
SELECT Chassis_no, SUM(totalDistance) AS TotalDistance_Table1, SUM(totalEH) AS TotalEH_Table1 
FROM vecvdb.uel_agg_daily_vw 
WHERE firstPacketDateTime BETWEEN '2025-02-26 00:00:00' AND '2025-02-26 23:59:59' 
AND lastPacketDateTime BETWEEN '2025-02-26 00:00:00' AND '2025-02-26 23:59:59' 
GROUP BY Chassis_no;
"""

query2 = """
SELECT Chassis_no, SUM(totalDistance) AS TotalDistance_Table2, SUM(totalEH) AS TotalEH_Table2 
FROM vecvdb.uel_quarter_hour_agg_drv_vw 
WHERE firstPacketDateTime BETWEEN '2025-02-26 00:00:00' AND '2025-02-26 23:59:59' 
AND lastPacketDateTime BETWEEN '2025-02-26 00:00:00' AND '2025-02-26 23:59:59' 
GROUP BY Chassis_no;
"""

# Fetch Data from MySQL
df1 = pd.read_sql(query1, engine)
df2 = pd.read_sql(query2, engine)

# Merge Data on Chassis_no
merged_df = pd.merge(df1, df2, on="Chassis_no", how="outer")

# Compare Values
merged_df['Match_Status'] = (merged_df['TotalDistance_Table1'] == merged_df['TotalDistance_Table2']) & \
                            (merged_df['TotalEH_Table1'] == merged_df['TotalEH_Table2'])

# Convert Match_Status to Readable Format
merged_df['Match_Status_Text'] = merged_df['Match_Status'].map({True: "✅ Matched", False: "❌ Mismatch"})

# Separate Matched & Mismatched Data
matched_df = merged_df[merged_df['Match_Status']].drop(columns=['Match_Status'])
mismatched_df = merged_df[~merged_df['Match_Status']].drop(columns=['Match_Status'])
all_data_df = merged_df.drop(columns=['Match_Status'])

# Save to Excel
output_file = "comparison2_result.xlsx"
with pd.ExcelWriter(output_file) as writer:
    all_data_df.to_excel(writer, sheet_name="All_Data", index=False)
    mismatched_df.to_excel(writer, sheet_name="Mismatches", index=False)
    matched_df.to_excel(writer, sheet_name="Matches", index=False)

print(f"Excel file '{output_file}' generated successfully with 3 sheets!")


Excel file 'comparison2_result.xlsx' generated successfully with 3 sheets!


In [None]:
import json
from sqlalchemy import create_engine
import pandas as pd
from urllib.parse import quote

# Load MySQL configuration from JSON
CONFIG_FILEPATH = 'C:/Users/Ribish/Downloads/Karan/mysql.json'
with open(CONFIG_FILEPATH) as config_file:
    mysql_config = json.load(config_file)

# Extract MySQL credentials
HOST = mysql_config["HOST"]
PORT = mysql_config["PORT"]
USER = mysql_config["USER"]
PASSWORD = mysql_config["PASSWORD"]
DATABASE = mysql_config["DATABASE"]

# Function to connect to MySQL
def connecting_to_mysql(ip_address, port, username, password, database_name):
    db_connection_str = f"mysql+pymysql://{username}:{quote(password)}@{ip_address}:{port}/{database_name}"
    engine = create_engine(db_connection_str)
    return engine  # Return engine for running queries

# Connect to MySQL
engine = connecting_to_mysql(HOST, PORT, USER, PASSWORD, DATABASE)

# Define SQL queries for Daily and Quarter Hour data
query1 = """
SELECT Chassis_no, SUM(totalDistance) AS TotalDistance_Table1, SUM(totalEH) AS TotalEH_Table1 
FROM vecvdb.uel_agg_daily_vw 
WHERE firstPacketDateTime BETWEEN '2025-02-25 00:00:00' AND '2025-02-25 23:59:59' 
  AND lastPacketDateTime BETWEEN '2025-02-25 00:00:00' AND '2025-02-25 23:59:59' 
GROUP BY Chassis_no;
"""

query2 = """
SELECT Chassis_no, SUM(totalDistance) AS TotalDistance_Table2, SUM(totalEH) AS TotalEH_Table2 
FROM vecvdb.uel_quarter_hour_agg_drv_vw 
WHERE firstPacketDateTime BETWEEN '2025-02-25 00:00:00' AND '2025-02-25 23:59:59' 
  AND lastPacketDateTime BETWEEN '2025-02-25 00:00:00' AND '2025-02-25 23:59:59' 
GROUP BY Chassis_no;
"""

# Fetch data from MySQL into DataFrames
df1 = pd.read_sql(query1, engine)
df2 = pd.read_sql(query2, engine)

# Merge the two DataFrames on Chassis_no (outer join to capture all records)
merged_df = pd.merge(df1, df2, on="Chassis_no", how="outer")

# Compare values between the two sources for each chassis
merged_df['Match_Status'] = (merged_df['TotalDistance_Table1'] == merged_df['TotalDistance_Table2']) & \
                            (merged_df['TotalEH_Table1'] == merged_df['TotalEH_Table2'])

# Rename columns according to your specification
merged_df = merged_df.rename(columns={
    "TotalDistance_Table1": "TotalDistance_Daily",
    "TotalDistance_Table2": "TotalDistance_Quarter",
    "TotalEH_Table1": "TotalEH_Daily",
    "TotalEH_Table2": "TotalEH_Quarter Hour"
})

# Reorder columns as requested
merged_df = merged_df[[
    "Chassis_no",
    "TotalDistance_Daily",
    "TotalDistance_Quarter",
    "TotalEH_Daily",
    "TotalEH_Quarter Hour",
    "Match_Status"
]]

# Create a human-readable column for match status (optional)
merged_df['Match_Status_Text'] = merged_df['Match_Status'].map({True: "✅ Matched", False: "❌ Mismatch"})

# Prepare separate DataFrames for export:
# All data (with both matched and mismatched)
all_data_df = merged_df.copy()

# Only mismatches (exclude the human-readable column if desired)
mismatched_df = merged_df[merged_df['Match_Status'] == False].drop(columns=['Match_Status'])

# Only matches
matched_df = merged_df[merged_df['Match_Status'] == True].drop(columns=['Match_Status'])

# Save the results to an Excel file with three sheets
output_file = "comparison_result.xlsx"
with pd.ExcelWriter(output_file) as writer:
    all_data_df.to_excel(writer, sheet_name="All_Data", index=False)
    mismatched_df.to_excel(writer, sheet_name="Mismatches", index=False)
    matched_df.to_excel(writer, sheet_name="Matches", index=False)

print(f"Excel file '{output_file}' generated successfully with 3 sheets!")
