In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [3]:
import configparser
# Define DB credentials
# Read database credentials from db.properties
import mysql.connector
config = configparser.ConfigParser()
config.read('db.properties')

db_config = {
    'host': config.get('mysql', 'host'),
    'user': config.get('mysql', 'user'),
    'password': config.get('mysql', 'password')
}

In [4]:
# Connect to the MySQL database using db_config
conn = mysql.connector.connect(
    host=db_config['host'],
    user=db_config['user'],
    password=db_config['password'],
    database='velocity_vehicles'
)
cursor = conn.cursor()

In [5]:
# Fetch and print all column names along with their SQL data types from the 'cars' table
cursor.execute("SHOW COLUMNS FROM cars")
for col in cursor.fetchall():
    print(f"{col[0]}: {col[1]}")

car_id: varchar(255)
brand_id: varchar(255)
dealer_username: varchar(100)
model: varchar(100)
variant: varchar(100)
price_inr: decimal(15,2)
engine(CC): int
cylinders: int
fuel_capacity: int
fuel_type: varchar(100)
body_type: varchar(100)
mileage: float
gears: int
power_steering: varchar(100)
keyless_entry: tinyint(1)
power(HP): float
torque(NM): float
seating_capacity: int
transmission_type: varchar(100)
warranty: int
sun_visor: tinyint(1)
ventilation_system: tinyint(1)
abs: tinyint(1)
airbags: tinyint(1)
colour: varchar(100)
status: varchar(20)


In [None]:
# Fetch and print 10 random BMW cars from the database
query = """
SELECT c.Car_id, b.car_brand, c.model, c.variant, c.dealer_username, c.Price_INR, c.`Engine(CC)`, c.Cylinders, c.Fuel_capacity, c.Fuel_type, c.Body_type, c.Mileage, c.Gears, c.Power_steering, c.Keyless_entry, c.`Power(HP)`, c.`Torque(NM)`, c.Seating_capacity, c.Transmission_type, c.warranty, c.Sun_visor, c.Ventilation_system, c.ABS, c.Airbags, c.Colour, c.Status
FROM cars c
JOIN car_brands b ON c.Brand_ID = b.brand_id
ORDER BY RAND()
"""
cursor.execute(query)
rows = cursor.fetchall()

for row in rows:
    formatted_items = []
    for item in row:
        if item == 1:
            formatted_items.append("true")
        elif item == 0:
            formatted_items.append("false")
        elif isinstance(item, str):
            formatted_items.append(f'"{item}"')
        else:
            formatted_items.append(str(item))
    formatted = ', '.join(formatted_items)
    print("new Car("+formatted+", carImageService.getCarImageSrc("+formatted_items[0]+")),")

In [6]:
# Fetch and print all column names along with their SQL data types from the 'car_brands' table
cursor.execute("SHOW COLUMNS FROM car_brands")
for col in cursor.fetchall():
    print(f"{col[0]}: {col[1]}")

car_brand: varchar(50)
brand_id: varchar(255)


In [None]:
# Fetch and print 10 random BMW cars from the database
query = """
SELECT c.Car_id, b.car_brand, c.model, c.variant
FROM cars c
JOIN car_brands b ON c.Brand_ID = b.brand_id
ORDER BY RAND()
"""
cursor.execute(query)
rows = cursor.fetchall()

for row in rows:
    formatted_items = []
    for item in row:
        if item == 1:
            formatted_items.append("true")
        elif item == 0:
            formatted_items.append("false")
        elif isinstance(item, str):
            formatted_items.append(f'"{item}"')
        else:
            formatted_items.append(str(item))
    formatted = ', '.join(formatted_items)

df_cars = pd.DataFrame(rows, columns=['car_id', 'brand', 'model', 'variant'])
df_cars.head()

Unnamed: 0,car_id,brand,model,variant
0,7-SEECD6C,Bmw,7-Series,730Ld Design Pure Excellence Signature
1,AMA2E1405,Honda,Amaze,E Mt Petrol
2,CIAEB8722,Maruti Suzuki,Ciaz,1.5L Zeta
3,TUC7D2599,Hyundai,Tucson,2Wd Mt Diesel
4,VENCD8EAA,Hyundai,Venue,1.0 Turbo Gdi Mt Sx(O)


In [25]:
import re
import os
import pandas as pd # Import pandas for DataFrame handling

def parse_old_car_data(car_data_string):
    """
    Parses the string containing old Car object instantiations and extracts
    carId, carName, and modelVariant.
    """
    cars = []
    # Regex to capture carId, carName, model_version, model_variant, and image_old_id_from_src
    # It looks for patterns like: new Car("OLD_ID", "CarName", "ModelVersion", "ModelVariant", ...
    # The regex is quite specific to the number of arguments in your Car constructor.
    pattern = re.compile(r'new Car\("([^"]+)",\s*"([^"]+)",\s*"([^"]+)",\s*"([^"]*)",\s*"([^"]+)",\s*([\d.]+),\s*(\d+),\s*(\d+),\s*(\d+),\s*"([^"]+)",\s*"([^"]+)",\s*([\d.]+),\s*(\d+),\s*"([^"]+)",\s*(true|false),\s*([\d.]+),\s*([\d.]+),\s*(\d+),\s*"([^"]+)",\s*(\d+),\s*(true|false),\s*(true|false),\s*(true|false),\s*(true|false),\s*"([^"]+)",\s*"([^"]+)",\s*carImageService\.getCarImageSrc\("([^"]+)"\)\)')

    matches = pattern.finditer(car_data_string)
    for match in matches:
        (old_car_id, car_name, model_version, model_variant, dealer_id, price, engine_displacement,
         num_cylinders, fuel_tank_capacity, fuel_type, body_type, mileage, seating_capacity,
         steering_type, ac, max_power, max_torque, gears, transmission_type, num_airbags,
         power_windows, abs, central_locking, alloy_wheels, color, availability,
         image_old_id) = match.groups()

        cars.append({
            "old_car_id": old_car_id,
            "car_name": car_name,
            "model_version": model_version, # This is the "Aspire" or "Kuv100 Nxt" part
            "model_variant": model_variant, # This is the specific variant like "1.5 Tdci Blu"
            "full_match": f"{car_name} {model_variant}", # Used for matching: "Ford 1.5 Tdci Blu"
            "original_line": match.group(0), # Keep the original line for replacement
            "image_old_id_from_src": image_old_id # The ID used in getCarImageSrc
        })
    return cars

def get_new_car_data(df_cars):
    """
    Fetches new car data from the provided DataFrame.
    This function expects df_cars to have 'car_id', 'brand', and 'variant' columns.
    It renames 'brand' to 'car_name' and 'variant' to 'model_variant' for
    consistency with the rest of the script.
    """
    # Create a copy to avoid modifying the original DataFrame outside this function
    temp_df = df_cars.copy()

    # Ensure required columns are present in the input DataFrame
    required_cols = ['car_id', 'brand', 'variant']
    if not all(col in temp_df.columns for col in required_cols):
        raise ValueError(f"df_cars DataFrame must contain {required_cols} columns.")
    
    # Rename columns to match the expected keys ('car_name', 'model_variant')
    # used by the matching logic in generate_updated_car_definitions and rename_car_images
    temp_df = temp_df.rename(columns={'brand': 'car_name', 'variant': 'model_variant'})

    # After renaming, ensure the keys expected by the rest of the script are present
    if not all(col in temp_df.columns for col in ['car_id', 'car_name', 'model_variant']):
        # This check is mostly for robustness, should pass if previous check and rename work
        raise ValueError("Error after renaming columns. Expected 'car_id', 'car_name', and 'model_variant'.")
    
    return temp_df.to_dict(orient='records')

def generate_updated_car_definitions(old_car_data, new_car_data):
    """
    Matches old car data with new car data and generates updated Car object definitions.
    """
    updated_definitions = []
    # Create a map for quick lookup: "CarName ModelVariant" -> new_car_id
    new_car_map = {f"{car['car_name']} {car['model_variant']}": car['car_id'] for car in new_car_data}

    for old_car in old_car_data:
        full_match_key = old_car["full_match"]
        if full_match_key in new_car_map:
            new_car_id = new_car_map[full_match_key]
            # Replace the old_car_id (the first argument in new Car)
            # and the image_old_id_from_src (in getCarImageSrc) with the new_car_id
            
            # Use re.sub for more precise replacement, especially for the first ID.
            # We target the specific pattern for the first ID in the Car constructor.
            # This makes sure we don't accidentally replace part of another string.
            updated_line = re.sub(
                r'new Car\("' + re.escape(old_car["old_car_id"]) + r'"',
                f'new Car("{new_car_id}"',
                old_car["original_line"],
                count=1 # Replace only the first occurrence
            )
            
            # Then replace the ID in carImageService.getCarImageSrc
            updated_line = updated_line.replace(
                f'carImageService.getCarImageSrc("{old_car["image_old_id_from_src"]}")',
                f'carImageService.getCarImageSrc("{new_car_id}")'
            )
            updated_definitions.append(updated_line)
        else:
            print(f"Warning: No new ID found for {old_car['car_name']} {old_car['model_variant']}. Keeping old definition.")
            updated_definitions.append(old_car["original_line"])
    return updated_definitions

def rename_car_images(old_car_data, new_car_data, image_directory):
    """
    Renames car image files based on the matched new car IDs.
    Assumes image files are named like 'OLD_ID.jpg' or 'OLD_ID.png'.
    """
    new_car_map = {f"{car['car_name']} {car['model_variant']}": car['car_id'] for car in new_car_data}

    for old_car in old_car_data:
        full_match_key = old_car["full_match"]
        if full_match_key in new_car_map:
            old_image_id = old_car["image_old_id_from_src"]
            new_image_id = new_car_map[full_match_key] 

            # Find files that start with the old_image_id (e.g., OLD_ID.jpg, OLD_ID.png)
            # Use os.scandir for better performance on large directories
            found_image = False
            for entry in os.scandir(image_directory):
                if entry.is_file() and entry.name.startswith(old_image_id):
                    # Get the extension
                    name, ext = os.path.splitext(entry.name)
                    old_path = entry.path
                    new_path = os.path.join(image_directory, f"{new_image_id}{ext}")

                    try:
                        os.rename(old_path, new_path)
                        print(f"Renamed '{entry.name}' to '{os.path.basename(new_path)}'")
                        found_image = True
                    except OSError as e:
                        print(f"Error renaming {entry.name} to {os.path.basename(new_path)}: {e}")
                    # Assuming only one file per old_image_id, break after renaming
                    break
            if not found_image:
                print(f"No image found for old ID '{old_image_id}' in '{image_directory}'.")
        else:
            print(f"No new ID for {old_car['car_name']} {old_car['model_variant']}. Skipping image rename for {old_car['image_old_id_from_src']}")


if __name__ == "__main__":
    # Your provided old Car data string
    # Read old car definitions from a text file (recommended for larger inputs)
    try:
        with open("old_car_definitions.txt", "r", encoding="utf-8") as f:
            old_car_definitions_string = f.read()
    except FileNotFoundError:
        print("old_car_definitions.txt not found. Using a hardcoded string for demonstration.")

    # Define the directory where your car images are stored
    # IMPORTANT: Change this to your actual image directory!
    IMAGE_DIRECTORY = "car_img" # Example: "C:/Users/YourUser/Desktop/car_images" or "./images"

    # --- User-provided df_cars DataFrame ---
    # This DataFrame is expected to be created and populated in your environment
    # before running this script (e.g., in a preceding cell of a Jupyter Notebook).
    # For demonstration, I'm defining a sample 'rows' data here.
    # -------------------------------------

    # --- Step 1: Parse old car data ---
    old_cars = parse_old_car_data(old_car_definitions_string)
    print("--- Parsed Old Car Data ---")
    for car in old_cars:
        print(f"Old ID: {car['old_car_id']}, Car Name: {car['car_name']}, Model Variant: {car['model_variant']}")

    # --- Step 2: Get new car data ---
    # Pass the existing df_cars DataFrame to get_new_car_data
    new_cars = get_new_car_data(df_cars)
    print("\n--- New Car Data (from your 'database') ---")
    for car in new_cars:
        # Access elements as dictionary keys
        print(f"New ID: {car['car_id']}, Car Name: {car['car_name']}, Model Variant: {car['model_variant']}")

    # --- Step 3: Generate updated Car object definitions ---
    updated_car_definitions = generate_updated_car_definitions(old_cars, new_cars)
    print("\n--- Updated Car Definitions ---")
    # You can write these to a new file, e.g., "updated_car_definitions.txt"
    # with open("updated_car_definitions.txt", "w", encoding="utf-8") as f:
    #     for definition in updated_car_definitions:
    #         f.write(definition + "\n")
    for definition in updated_car_definitions:
        print(definition)

    # --- Step 4: Rename car image files (Optional but recommended) ---
    # Create the directory if it doesn't exist for testing
    if not os.path.exists(IMAGE_DIRECTORY):
        os.makedirs(IMAGE_DIRECTORY)
        print(f"\nCreated directory: {IMAGE_DIRECTORY} for testing image renaming.")
        # Create some dummy image files for testing the rename function
        for car in old_cars:
            dummy_image_path_jpg = os.path.join(IMAGE_DIRECTORY, f"{car['image_old_id_from_src']}.jpg")
            dummy_image_path_png = os.path.join(IMAGE_DIRECTORY, f"{car['image_old_id_from_src']}.png")
            if not os.path.exists(dummy_image_path_jpg):
                with open(dummy_image_path_jpg, 'w') as f:
                    f.write("dummy content")
                print(f"Created dummy image: {dummy_image_path_jpg}")
            if not os.path.exists(dummy_image_path_png):
                with open(dummy_image_path_png, 'w') as f:
                    f.write("dummy content")
                print(f"Created dummy image: {dummy_image_path_png}")
    print("\n--- Renaming Car Images ---")
    rename_car_images(old_cars, new_cars, IMAGE_DIRECTORY)
    print("\n--- Image renaming process complete. ---")

--- Parsed Old Car Data ---
Old ID: ASPB4AFE6, Car Name: Ford, Model Variant: 1.5 Tdci Blu
Old ID: KUV9B596A, Car Name: Mahindra, Model Variant: K4+ 6Str
Old ID: INN3FACDA, Car Name: Toyota, Model Variant: 2.4 Vx 8 Str
Old ID: CRE87FB16, Car Name: Hyundai, Model Variant: 1.6 Vtvt Sx
Old ID: KUV14AC69, Car Name: Mahindra, Model Variant: K2 D 5Str Taxi
Old ID: AUR23B2F5, Car Name: Hyundai, Model Variant: Sx 1.2 Petrol
Old ID: BOL36C317, Car Name: Mahindra, Model Variant: Lx
Old ID: XUV496500, Car Name: Mahindra, Model Variant: W9 At
Old ID: GRA14E083, Car Name: Hyundai, Model Variant: Era T Crdi
Old ID: GRAAA9EAD, Car Name: Hyundai, Model Variant: Sportz Amt 1.2 Vtvt
Old ID: M448BF09, Car Name: Bmw, Model Variant: Coupe
Old ID: 7-SF1F2DA, Car Name: Bmw, Model Variant: 730Ld Design Pure Excellence Signature
Old ID: X40DE6F1, Car Name: Bmw, Model Variant: Xdrive30I M Sport X
Old ID: X3237D1E, Car Name: Bmw, Model Variant: Xdrive20D Xline
Old ID: 6-SDDB69A, Car Name: Bmw, Model Variant: 620

In [12]:
# Display car name (brand), model, and variant for each row
for row in rows[:5]:
    print(f"{row[1]} {row[2]} {row[3]}")

Hyundai Aura S 1.2 Amt Petrol
Mercedes-Benz Mercedes-Benz G-Class Amg G 63
Tata Nexon Xza Plus (O) Diesel
Hyundai Elite I20 Sportz Plus Crdi
Bmw 6-Series 620D Gt Luxury Line
