In [None]:
    from bs4 import BeautifulSoup  # Import BeautifulSoup for parsing HTML
    import requests  # Import requests for making HTTP requests
    import pandas as pd  # Import pandas for data manipulation and structured output
    import json # Import json for parsing the JSON data embedded in the HTML

    def scrape_car_details(url):
        """
        Scrapes car details from the given car collection URL by parsing both
        HTML elements and embedded JSON data.

        Args:
            url (str): The URL of the car collection page.

        Returns:
            list: A list of dictionaries, where each dictionary contains
                details of a car variant. Returns an empty list if scraping fails.
        """
        try:
            # 1. Send an HTTP GET request to the specified URL to retrieve the webpage content.
            response = requests.get(url)

            # 2. Check if the request was successful (status code 200).
            if response.status_code == 200:
                print("Successfully accessed the website.")
            else:
                # If the request was not successful, print an error and return an empty list.
                print(f"Failed to access the website. Status code: {response.status_code}")
                return []

            # 3. Parse the HTML content of the response using BeautifulSoup.
            # 'html.parser' is a built-in Python HTML parser.
            soup = BeautifulSoup(response.content, 'html.parser')

            # Find the script tag containing the __NEXT_DATA__ JSON.
            # This JSON often holds the structured data used by modern web applications.
            next_data_script = soup.find('script', id='__NEXT_DATA__')

            if not next_data_script:
                print("Could not find __NEXT_DATA__ script tag. Data extraction may be incomplete.")
                # Fallback to HTML-only scraping if JSON is not found, or return empty if essential
                # For this task, JSON is essential for detailed colors, so returning empty.
                return []

            # Extract the JSON string and parse it into a Python dictionary.
            json_data = json.loads(next_data_script.string)

            # Navigate through the JSON structure to find the main listing data.
            # The path is typically props -> pageProps -> listingData -> result
            listing_results = json_data.get('props', {}).get('pageProps', {}).get('listingData', {}).get('result', [])

            # Extract the global selected city from the JSON for location details.
            global_city = json_data.get('props', {}).get('pageProps', {}).get('cities', {}).get('selectedCity', 'N/A')
            location_details = f"in {global_city}" if global_city != 'N/A' else 'N/A'

            car_data = [] # Initialize an empty list to store dictionaries of car details

            # Iterate through each car model found in the JSON data.
            for car_model_json in listing_results:
                car_make = car_model_json.get('brand_name', 'N/A')
                car_model_base_name = car_model_json.get('model_name', 'N/A')

                # Iterate through each variant of the current car model.
                variants = car_model_json.get('variants', [])
                for variant_json in variants:
                    variant_name = variant_json.get('variant_name', 'N/A')

                    # Combine the base model name and variant name for the full car model.
                    full_car_model = f"{car_model_base_name} {variant_name}".strip()

                    # Extract price information. The 'final_price' is usually in a base unit (e.g., paise/rupees).
                    # Convert to Lakhs (1 Lakh = 100,000) and format.
                    price_info = variant_json.get('price', {})
                    raw_price = price_info.get('final_price')
                    price = f"₹{raw_price / 100000:.2f} L" if isinstance(raw_price, (int, float)) else "N/A"

                    # Extract basic features like fuel type and transmission.
                    basic_features = variant_json.get('basic_feature', {})
                    fuel_type = basic_features.get('fuel_type', 'N/A')
                    transmission_type = basic_features.get('transmission', 'N/A')

                    # Extract detailed available colors from the 'colors' array in JSON.
                    available_colors_list = []
                    colors_json = variant_json.get('colors', [])
                    for color_item in colors_json:
                        color_details = color_item.get('color', {})
                        brand_color = color_details.get('brand_color', 'Unknown Color')
                        hex_codes = color_details.get('hex_codes', [])

                        if hex_codes:
                            # Format as "Color Name (Hex Code)"
                            available_colors_list.append(f"{brand_color} ({', '.join(hex_codes)})")
                        else:
                            available_colors_list.append(brand_color) # Fallback if no hex code

                    # Create a text summary for the number of available colors.
                    num_colors_text = f"Available in {len(colors_json)} colors" if colors_json else "N/A"
                    # Join all detailed colors into a single string for the output column.
                    all_colors_detailed = "; ".join(available_colors_list) if available_colors_list else "N/A"

                    # Append the extracted details as a dictionary to the car_data list.
                    car_data.append({
                        "Car Make": car_make,
                        "Car Model": full_car_model,
                        "Price": price,
                        "Fuel Type": fuel_type,
                        "Transmission Type": transmission_type,
                        "Available Color Variants Count": num_colors_text,
                        "Available Colors (Detailed)": all_colors_detailed, # New column for detailed colors
                        "Location Details": location_details
                    })
            return car_data # Return the list of all scraped car data

        # Handle potential network or request-related errors.
        except requests.exceptions.RequestException as e:
            print(f"An error occurred during the request: {e}")
            return []
        # Handle errors specific to JSON parsing.
        except json.JSONDecodeError as e:
            print(f"Error decoding JSON from __NEXT_DATA__ script tag: {e}")
            return []
        # Handle any other unexpected errors during scraping.
        except Exception as e:
            print(f"An unexpected error occurred: {e}")
            return []

    # Main execution block: This code runs when the script is executed directly.
    if __name__ == "__main__":
        # Define the target website URL for Tata cars.
        website_url = 'https://ackodrive.com/collection/tata-cars/'

        # Call the scraping function with the URL.
        scraped_cars = scrape_car_details(website_url)

        # If data was successfully scraped, process and display it.
        if scraped_cars:
            # Convert the list of dictionaries into a pandas DataFrame for better readability and structure.
            df = pd.DataFrame(scraped_cars)

            # Set pandas display options to show all columns and more rows if needed
            pd.set_option('display.max_columns', None)
            pd.set_option('display.width', 1000)
            pd.set_option('display.max_rows', None)

            print("\n--- Scraped Car Data ---")
            print(df) # Print the DataFrame to the console

            # Save the DataFrame to a CSV file.
            # index=False prevents pandas from writing the DataFrame index as a column in the CSV.
            df.to_csv("scraped_tata_cars_with_colors.csv", index=False)
            print("\nData saved to scraped_tata_cars_with_colors.csv")
        else:
            print("No car data was scraped.")



Successfully accessed the website.

--- Scraped Car Data ---
    Car Make                                          Car Model     Price Fuel Type Transmission Type Available Color Variants Count                        Available Colors (Detailed) Location Details
0       Tata                                         Punch Pure   ₹6.41 L    Petrol            Manual          Available in 2 colors      Orcus White (#f1f6f9); Daytona Grey (#7c858c)              N/A
1       Tata                                      Punch Pure(O)   ₹7.49 L    Petrol            Manual          Available in 2 colors      Daytona Grey (#7c858c); Orcus White (#f1f6f9)              N/A
2       Tata                                    Punch Adventure   ₹7.87 L    Petrol            Manual          Available in 4 colors  Orcus White (#f1f6f9); Daytona Grey (#7c858c);...              N/A
3       Tata                                     Punch Pure CNG   ₹7.92 L       CNG            Manual          Available in 2 colors   