In [16]:
import time
import json
import random
import html
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.edge.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
from webdriver_manager.microsoft import EdgeChromiumDriverManager
import traceback
import logging
import re  # Make sure to include this import
from word2number import w2n
from datetime import datetime
import logging
from datetime import datetime

logging.basicConfig(level=logging.INFO)

def process_review_count(text):
    text = text.strip().replace(',', '')
    if 'K+' in text:
        return str(int(float(text.replace('(', '').replace(')', '').replace('K+', '').strip()) * 1000))
    return text

def setup_driver():
    options = webdriver.EdgeOptions()
    options.add_argument('--no-sandbox')
    try:
        driver = webdriver.Edge(service=Service(EdgeChromiumDriverManager().install()), options=options)
    except Exception as e:
        print(e)
        raise Exception("Failed to install Edge Chromium driver.")
    return driver



def scrape_extra_parameters(url: str, driver: webdriver.Edge) -> dict:
    try:
        driver.get(url)
        try:
            WebDriverWait(driver, 20).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "div[data-hook='review']")))

        except TimeoutException:
            print(f"TimeoutException: Could not find reviews for {url}")
            return {}
        
        soup = BeautifulSoup(driver.page_source, 'html.parser')

        # Extract the general reviews
        reviews_tags = soup.find_all('div', attrs={'data-hook': 'review'})

        result = {}
        for i, review_tag in enumerate(reviews_tags[:5]):
            result[f'Customer_{i + 1}_ID'] = review_tag.attrs.get('id', 'None')
            
            # Extract the Star Rating
            star_rating_tag = review_tag.select_one('i[data-hook="review-star-rating"] span.a-icon-alt')
            star_rating = float(star_rating_tag.text.split()[0]) if star_rating_tag else 0.0
            result[f'Customer_{i+1}_Star_Rating'] = star_rating
            
            # Extract the Comment Title
            comment_title_tag = review_tag.select_one('a[data-hook="review-title"]')
            # Inside the for loop, after extracting the comment title:
            if comment_title_tag:
                actual_comment_title = comment_title_tag.text.strip()
            else:
                # Handle alternate structure
                comment_title_tag = review_tag.select_one('span.cr-original-review-content')
                actual_comment_title = comment_title_tag.text.strip() if comment_title_tag else 'NaN'

            # Remove the pattern "k out of 5 stars\n" from the comment
            actual_comment_title = re.sub(r'\d+(\.\d+)? out of 5 stars\n', '', actual_comment_title)

            result[f'Customer_{i+1}_Comment'] = actual_comment_title

            # Extract the Number of people who found the review helpful
            helpful_vote_tag = review_tag.select_one('span[data-hook="helpful-vote-statement"]')
            helpful_count = w2n.word_to_num(helpful_vote_tag.text.split()[0]) if helpful_vote_tag else 0
            result[f'Customer_{i+1}_buying_influence'] = helpful_count
        
            # Extract all elements matching the criteria
            critical_review_tags = soup.select('div[id^="viewpoint-"]')
            # critical_review_tags = soup.select('div.a-column.a-span6.view-point-review.critical-review.a-span-last')
            if len(critical_review_tags) > 1:
                # If there is more than one matching element, select the second one
                critical_review_tag = critical_review_tags[1]
                critical_review_tag_pack = critical_review_tag.get('id', 'None').replace('viewpoint-', '')
                result['Customer_id_Critical_Review'] = critical_review_tag_pack
                
                # Extract Customer_Name
                customer_name_tags = soup.select('span.a-profile-name')
                result['Customer_Name'] = customer_name_tags[1].text if len(customer_name_tags) > 1 else 'None'
                    
                # Extract Customer_Review_Comment                
                review_comment_tag = critical_review_tag.find('div', class_='a-row a-spacing-top-mini')
                result['Customer_Review_Comment'] = review_comment_tag.text.strip() if review_comment_tag else 'None'
                
                # Extract Customer_Review_Title
                review_title_tag = critical_review_tag.select_one('span[data-hook="review-title"]')
                result['Customer_Review_Title'] = review_title_tag.text if review_title_tag else 'None'

                # Extract the post time
                critical_review_tags_date = critical_review_tag.select('div.a-expander-content.a-expander-partial-collapse-content span.a-size-base.a-color-secondary.review-date')
                if critical_review_tags_date:
                    post_time_text = critical_review_tags_date[0].text.strip()
                    match = re.search(r'on (.+)$', post_time_text)
                    if match:
                        date_string = match.group(1)
                        try:
                            post_date = datetime.strptime(date_string, '%B %d, %Y')
                            # Convert the datetime object to a string in ISO format
                            result['Post_Date'] = post_date.isoformat()                            
                        except ValueError as ve:
                            print(f"Error parsing date string {date_string}: {ve}")
                            result['Post_Date'] = '-'
                    else:
                        print("Date not found in text:", post_time_text)
                        result['Post_Date'] = '-'
                else:
                    print("Date tag not found")
                    result['Post_Date'] = None

                # Use soup.select() to find all matching elements
                critical_review_tags_ = soup.select('div.a-column.a-span6.view-point-review.critical-review.a-span-last div.a-row.a-spacing-top-small span.a-size-small.a-color-tertiary span.review-votes')

                # Check if any elements were found
                if critical_review_tags_:
                    # Take the first found element (if there are multiple) and directly extract the text
                    helpful_text = critical_review_tags_[0].text.strip()
                    print("Helpful Text:", helpful_text)  # Debugging line
                    
                    # Check if the text starts with a digit and extract the first contiguous digit sequence
                    match = re.match(r'\d+', helpful_text)
                    if match:
                        helpful_count = int(match.group())
                    else:
                        # If the text doesn't start with a digit, try converting the first word to a number
                        helpful_count = w2n.word_to_num(helpful_text.split()[0])
                else:
                    print("Tag not found")  # Debugging line
                    helpful_count = 0

                result['Customers_Influenced'] = helpful_count


            else:
                # Handle the case where there is only one or no matching element
                result['Customer_id_Critical_Review'] = 'None'
                result['Customer_Name'] = 'None'
                result['Customer_Review_Comment'] = 'None'
                result['Customer_Review_Title'] = 'None'
                result['Customers_Influenced'] = 0

        return result
    except Exception as e:
        print(f"Error scraping extra parameters for {url}: {e}")
        traceback.print_exc()
    return {}


def scrape_amazon(categories):
    driver = setup_driver()
    all_products = []
    seen_products = set()

    for category, base_url in categories.items():
        products = []

        for page in range(1, 2):
            url = f"{base_url}&page={page}"

            try:
                driver.get(url)
                WebDriverWait(driver, 25).until(EC.presence_of_element_located((By.CSS_SELECTOR, "[data-asin]")))
            except TimeoutException:
                print(f"Timed out waiting for elements on page {page} of category {category}.")
                continue

            time.sleep(random.uniform(3.0, 6.0))
            soup = BeautifulSoup(driver.page_source, 'html.parser')

            for product in soup.find_all('div', attrs={"data-asin": True}):
                product_dict = {}

                product_dict['Product_ID'] = product.attrs.get('data-asin', None)

                item_name = product.find('span', class_='a-text-normal')
                if item_name:
                    product_dict['product'] = item_name.text.strip()

                product_price = product.find('span', class_='a-offscreen')
                if product_price:
                    product_price = product_price.text.strip().replace("$", "").replace(",", "").strip()
                    product_dict['price'] = product_price

                rating_spans = product.find_all('span', attrs={"aria-label": True})
                for rating_span in rating_spans:
                    aria_label_value = rating_span.attrs["aria-label"]
                    if "stars" in aria_label_value:
                        product_dict['ratings'] = aria_label_value.split(" ")[0]
                    else:
                        if 'K+' in aria_label_value:
                            product_dict['review_responders'] = aria_label_value
                        else:
                            try:
                                int_value = int(aria_label_value)
                                product_dict['review_responders'] = aria_label_value
                            except ValueError:
                                pass

                item_reviews = product.find('span', class_='a-size-base s-underline-text')
                if item_reviews:
                    reviews_text = item_reviews.text.strip()
                    reviews_count = process_review_count(reviews_text)
                    product_dict['reviews'] = reviews_count


                # Extract ASIN
                product_dict['Product_ID'] = product.attrs.get('data-asin', None)

                # Construct the review URL using ASIN
                if product_dict['Product_ID']:
                    asin = product_dict['Product_ID']
                    product_dict['url'] = f"https://www.amazon.com/product-reviews/{asin}/ref=cm_cr_dp_d_show_all_top?ie=UTF8&reviewerType=all_reviews"
                    
                else:
                    product_dict['url'] = "None"


                product_dict['category'] = category

                if 'Product_ID' in product_dict and product_dict['Product_ID']:
                # Create a unique identifier for the product
                    identifier = product_dict['Product_ID']

                    if identifier not in seen_products:
                        seen_products.add(identifier) #
                        if product_dict.get('url'):
                            extra_params = scrape_extra_parameters(product_dict['url'], driver)
                            product_dict.update(extra_params)
                        products.append(product_dict) #
            all_products.extend(products)
    driver.quit()
    return json.dumps(all_products)


if __name__ == '__main__':
    categories = {
        'Smartphones': 'https://www.amazon.com/s?k=smartphone&ref=nb_sb_noss',
        # 'Laptops': 'https://www.amazon.com/s?k=Laptops&ref=nb_sb_noss',
        # 'video_games': 'https://www.amazon.com/s?k=video_games&ref=nb_sb_noss',
        # 'Dresses':'https://www.amazon.com/s?k=Dresses&ref=nb_sb_noss',
        # 'Shoes':'https://www.amazon.com/s?k=Shoes&ref=nb_sb_noss',
        # 'Accessories':'https://www.amazon.com/s?k=accessories+for+clothes&ref=nb_sb_noss',
    }

    all_products = []
    try:
        all_products = json.loads(scrape_amazon(categories))
    except Exception as e:
        print(f"Error occurred during scraping: {e}")
    finally:
        with open('amazon_data_ext.json', 'w') as file:
            json.dump(all_products, file)


INFO:WDM:Get LATEST edgedriver version for Edge 117.0.2045
INFO:WDM:Get LATEST edgedriver version for Edge 117.0.2045
INFO:WDM:There is no [win64] edgedriver "117.0.2045.43" for browser edge "117.0.2045" in cache
INFO:WDM:Get LATEST edgedriver version for Edge 117.0.2045
INFO:WDM:About to download new driver from https://msedgedriver.azureedge.net/117.0.2045.43/edgedriver_win64.zip
INFO:WDM:Driver downloading response is 200
INFO:WDM:Get LATEST edgedriver version for Edge 117.0.2045
INFO:WDM:Driver has been saved in cache [C:\Users\Kasim\.wdm\drivers\edgedriver\win64\117.0.2045.43]


Helpful Text: 82 people found this helpful
Helpful Text: 82 people found this helpful
Helpful Text: 82 people found this helpful
Helpful Text: 82 people found this helpful
Helpful Text: 82 people found this helpful
Helpful Text: 5 people found this helpful
Helpful Text: 5 people found this helpful
Helpful Text: 5 people found this helpful
Helpful Text: 5 people found this helpful
Helpful Text: 5 people found this helpful
Helpful Text: 2 people found this helpful
Helpful Text: 2 people found this helpful
Helpful Text: 2 people found this helpful
Helpful Text: 2 people found this helpful
Helpful Text: 2 people found this helpful
TimeoutException: Could not find reviews for https://www.amazon.com/product-reviews/B0C37QXBH3/ref=cm_cr_dp_d_show_all_top?ie=UTF8&reviewerType=all_reviews
Helpful Text: 5 people found this helpful
Helpful Text: 5 people found this helpful
Helpful Text: 5 people found this helpful
Helpful Text: 5 people found this helpful
Helpful Text: 5 people found this helpful

In [59]:
import pandas as pd
import psycopg2
import numpy as np
import re
import logging
from datetime import datetime

logging.basicConfig(level=logging.INFO)

# Load the JSON data into a pandas DataFrame
df = pd.read_json('amazon_data_ext.json')


# Replace NaN values in specified string columns with "None"
string_columns = [
    'Customer_1_ID',
    'Customer_1_Comment',
    'Customer_id_Critical_Review',
    'Customer_Name',
    'Customer_Review_Comment',
    'Customer_Review_Title',
    'Customer_2_ID'
]
df[string_columns] = df[string_columns].fillna("None")

# Replace NaN values in specified integer columns with 0
integer_columns = [
    'Customer_1_Star_Rating',
    'Customer_1_buying_influence',
    'Customers_Influenced'
]
df[integer_columns] = df[integer_columns].fillna(0)

# Check if the 'Customers_Influenced' column is in the DataFrame
if 'Customers_Influenced' not in df.columns:
    print("Column 'Customers_Influenced' not found in the DataFrame. Please check the column name in the JSON file.")

# Convert 'Post_Date' to datetime object and handle any conversion errors by setting them to NaT
df['Post_Date'] = pd.to_datetime(df['Post_Date'], errors='coerce', format='%Y-%m-%dT%H:%M:%S')

# Replace NaT values with a date within bounds or use NaT
df['Post_Date'].fillna(pd.NaT, inplace=True)

# Convert 'Post_Date' to 'yyyy-mm-dd' string format for PostgreSQL
df['Post_Date'] = df['Post_Date'].apply(lambda x: x.strftime('%Y-%m-%d') if pd.notna(x) else '1677-09-21')



# Replace NaN values with 'None' in the specified columns
df['Customer_id_Critical_Review'] = df['Customer_id_Critical_Review'].replace({'"NaN"': 'None', 'NaN': 'None'})
df['Customer_Name'] = df['Customer_Name'].replace({'"NaN"': 'None', 'NaN': 'None'})
df['Customer_Review_Comment'] = df['Customer_Review_Comment'].replace({'"NaN"': 'None', 'NaN': 'None'})
df['Customer_Review_Title'] = df['Customer_Review_Title'].replace({'"NaN"': 'None', 'NaN': 'None'})
df['Customers_Influenced'] = df['Customers_Influenced'].replace({'"NaN"': 'None', 'NaN': 'None'})

# Define the columns to be updated
columns_to_update = [f'Customer_{i}_Comment' for i in range(1, 6)]
for column in columns_to_update:
    df[column] = df[column].replace({'"NaN"': 'None', 'NaN': 'None'})

columns_to_update = [f'Customer_{i}_ID' for i in range(1, 6)]
for column in columns_to_update:
    df[column] = df[column].replace({'"NaN"': 'None', 'NaN': 'None'})

# Drop all items with review having zero values
df.drop(df.index[df['Customer_1_ID'] == 'None'], inplace=True)

# Define the columns to be updated
new_columns = ['Customer_id_Critical_Review', 'Customer_Name', 'Customer_Review_Comment', 'Customer_Review_Title', 'Customers_Influenced']
for column in new_columns:
    if column == 'Customers_Influenced': df[column] = df[column].replace({'"NaN"': 'None', 'NaN': 'None'})

# Drop the 'review_responders' column if it exists
if 'review_responders' in df.columns:
    df.drop(columns=['review_responders'], inplace=True)

# Handle other columns similarly
df['price'] = df['price'].apply(lambda x: float(x) if pd.notnull(x) else None)
df['ratings'] = df['ratings'].apply(lambda x: float(x) if pd.notnull(x) else None)
df['reviews'] = df['reviews'].fillna(0).astype(int)

# Drop rows where the 'price' column is NaN
df.dropna(subset=['price'], inplace=True)

# Drop all items with review having zero values or Customer_1_ID as 'None'
df.drop(df.index[(df['reviews'] == 0) | (df['Customer_1_ID'] == 'None')], inplace=True)

################################
# df.info()
# print(df.tail())
################################
# Connect to PostgreSQL
conn = psycopg2.connect(
    host="localhost",
    database="postgres",
    user="postgres",
    password="demopass",
    client_encoding='utf8'
)

cur = conn.cursor()

# Modify the CREATE TABLE query to include additional columns
create_table_query = """
DROP TABLE IF EXISTS amazon_data_ext;
CREATE TABLE IF NOT EXISTS amazon_data_ext (
    product_id TEXT NOT NULL,
    product TEXT NOT NULL,
    star_ratings NUMERIC NULL,
    price_dollars NUMERIC NULL,
    total_ratings INTEGER NOT NULL,
    category TEXT NOT NULL,
    url TEXT NOT NULL,
    Customer_id_Critical_Review TEXT,
    Customer_Name TEXT,
    Post_Date DATE,
    Customer_Review_Comment TEXT,
    Customer_Review_Title TEXT,
    Customers_Influenced INTEGER,
    """ + ",\n    ".join([f"Customer_{i}_ID TEXT, Customer_{i}_Star_Rating NUMERIC, Customer_{i}_Comment TEXT, Customer_{i}_buying_influence INTEGER" for i in range(1, 6)]) + """
)
"""
cur.execute(create_table_query)
conn.commit()

def clean_format_data(row):
    # Extract the values directly, as they are already cleaned
    ratings = row['ratings']
    price = row['price']
    reviews = row['reviews']
    product_id = row['Product_ID']
    product = psycopg2.extensions.adapt(str(row['product']).encode('utf-8', 'replace')).getquoted().decode('utf-8')[1:-1]
    category = psycopg2.extensions.adapt(row['category'].encode('utf-8', 'replace')).getquoted().decode('utf-8')[1:-1]
    url = row['url']
      
    critical_review_id = row['Customer_id_Critical_Review'] if row['Customer_id_Critical_Review'] != 'None' else None
    customer_name = row['Customer_Name'] if row['Customer_Name'] != 'None' else None
    customer_review_comment = row['Customer_Review_Comment'] if row['Customer_Review_Comment'] != 'None' else None
    customer_review_title = row['Customer_Review_Title'] if row['Customer_Review_Title'] != 'None' else None
    customers_influenced = row['Customers_Influenced'] if row['Customers_Influenced'] != 'None' else None  # Correctly handle NaN values
    
    # Handle additional customer information
    customer_data = []
    for i in range(1, 6):
        customer_id = row[f'Customer_{i}_ID'] if row[f'Customer_{i}_ID'] != 'None' else None
        star_rating = row[f'Customer_{i}_Star_Rating'] if pd.notna(row[f'Customer_{i}_Star_Rating']) else None
        comment = psycopg2.extensions.adapt(str(row[f'Customer_{i}_Comment']).encode('utf-8', 'replace')).getquoted().decode('utf-8')[1:-1]
        buying_influence = row[f'Customer_{i}_buying_influence'] if pd.notna(row[f'Customer_{i}_buying_influence']) else None

        customer_data.extend([customer_id, star_rating, comment, buying_influence])

    # Validate and format the 'Post_Date' before returning

    # Validate and format the 'Post_Date' before returning
    post_date = row['Post_Date']
    if post_date is not None and isinstance(post_date, str):
        try:
            # Check if post_date is already in 'YYYY-MM-DD' format
            if re.match(r'\d{4}-\d{2}-\d{2}', post_date):
                datetime.strptime(post_date, '%Y-%m-%d')  # Validate the format
            else:
                # Try to parse the post_date to a datetime object with a different format
                parsed_date = datetime.strptime(post_date, '%Y-%m-%dT%H:%M:%S')
                # Format it back to a string in the desired format
                post_date = parsed_date.strftime('%Y-%m-%d')
        except ValueError as ve:
            logging.error(f"Invalid date format for Post_Date: {post_date}. Setting it to default value.")
            post_date = "0001-01-01"  # Default value for invalid date format
    else:
        logging.error(f"Post_Date is not a string or is None: {post_date}. Setting it to default value.")
        post_date = "0001-01-01"  # Default value for None or non-string values
    
    # Replace other None values with appropriate default values
    critical_review_id = critical_review_id if critical_review_id is not None else "Unavailable"
    customer_name = customer_name if customer_name is not None else "Unavailable"
    customer_review_comment = customer_review_comment if customer_review_comment is not None else "Unavailable"
    customer_review_title = customer_review_title if customer_review_title is not None else "Unavailable"
    customers_influenced = customers_influenced if customers_influenced is not None else 0  # Default numeric value
    
    # Handle additional customer information
    customer_data = []
    for i in range(1, 6):
        customer_id = row[f'Customer_{i}_ID'] if row[f'Customer_{i}_ID'] != 'None' else "Unavailable"
        star_rating = row[f'Customer_{i}_Star_Rating'] if pd.notna(row[f'Customer_{i}_Star_Rating']) else 0.0
        comment = psycopg2.extensions.adapt(str(row[f'Customer_{i}_Comment']).encode('utf-8', 'replace')).getquoted().decode('utf-8')[1:-1]
        comment = comment if comment != 'None' else "Unavailable"
        buying_influence = row[f'Customer_{i}_buying_influence'] if pd.notna(row[f'Customer_{i}_buying_influence']) else 0
        
        customer_data.extend([customer_id, star_rating, comment, buying_influence])

    logging.info(f"Formatted Post_Date: {post_date}")  # Log the formatted post_date
    
    return product_id, product, price, ratings, reviews, category, url, critical_review_id, customer_name, post_date, customer_review_comment, customer_review_title, customers_influenced, *customer_data


insert_query = """
INSERT INTO amazon_data_ext (
    product_id, product, price_dollars, star_ratings, total_ratings, category, url,
    Customer_id_Critical_Review, Customer_Name, Post_Date, Customer_Review_Comment, Customer_Review_Title, Customers_Influenced,
    """ + ", ".join([f"Customer_{i}_ID, Customer_{i}_Star_Rating, Customer_{i}_Comment, Customer_{i}_buying_influence" for i in range(1, 6)]) + """
) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, """ + ", ".join(["%s"] * 20) + ")"


# Insert the data from the pandas DataFrame into the PostgreSQL table
for index, row in df.iterrows():
    try:
        cur.execute(insert_query, clean_format_data(row))
    except Exception as e:
        logging.error(f"Error inserting row at index {index}: {e}")
        logging.error(f"Row data: {row}")  # Log the entire row data
        conn.rollback()



conn.commit()
cur.close()
conn.close()

# Rename the columns in the DataFrame
df.rename(columns={'ratings': 'star_ratings', 'reviews': 'total_ratings', 'price': 'price_dollars'}, inplace=True)

# Define the base order of columns
column_order = [
    "Product_ID", "product", "price_dollars", "star_ratings", 
    "total_ratings", "url", "category", "Customer_id_Critical_Review", 
    "Customer_Name", "Customer_Review_Comment", "Customer_Review_Title", 
    "Post_Date", "Customers_Influenced"
]

# Add the customer related columns to the order list
for i in range(1, 6):
    column_order.extend([
        f"Customer_{i}_ID", f"Customer_{i}_Star_Rating", 
        f"Customer_{i}_Comment", f"Customer_{i}_buying_influence"
    ])

# Reorder the columns
df = df.reindex(columns=column_order)

df.info()
# Save the DataFrame to a CSV file with updated column names
df.to_csv('amazon_data_ext.csv', index=False, encoding='utf-8')

INFO:root:Formatted Post_Date: 2023-06-30
INFO:root:Formatted Post_Date: 2023-03-07
INFO:root:Formatted Post_Date: 2023-08-17
INFO:root:Formatted Post_Date: 2023-04-24
INFO:root:Formatted Post_Date: 2023-05-04
INFO:root:Formatted Post_Date: 2023-07-13
INFO:root:Formatted Post_Date: 2020-11-18
INFO:root:Formatted Post_Date: 2023-09-22
INFO:root:Formatted Post_Date: 2023-08-04
INFO:root:Formatted Post_Date: 2022-08-04
INFO:root:Formatted Post_Date: 2023-09-09
INFO:root:Formatted Post_Date: 2023-09-14
INFO:root:Formatted Post_Date: 2023-08-23
INFO:root:Formatted Post_Date: 2023-08-23
INFO:root:Formatted Post_Date: 2023-09-23
INFO:root:Formatted Post_Date: 1677-09-21
INFO:root:Formatted Post_Date: 2023-09-22
INFO:root:Formatted Post_Date: 2022-11-16
INFO:root:Formatted Post_Date: 2023-09-05
INFO:root:Formatted Post_Date: 2023-09-07
INFO:root:Formatted Post_Date: 2023-09-14
INFO:root:Formatted Post_Date: 2023-08-26
INFO:root:Formatted Post_Date: 2023-08-21
INFO:root:Formatted Post_Date: 202

<class 'pandas.core.frame.DataFrame'>
Index: 24 entries, 0 to 25
Data columns (total 33 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   Product_ID                   24 non-null     object 
 1   product                      24 non-null     object 
 2   price_dollars                24 non-null     float64
 3   star_ratings                 24 non-null     float64
 4   total_ratings                24 non-null     int32  
 5   url                          24 non-null     object 
 6   category                     24 non-null     object 
 7   Customer_id_Critical_Review  24 non-null     object 
 8   Customer_Name                24 non-null     object 
 9   Customer_Review_Comment      24 non-null     object 
 10  Customer_Review_Title        24 non-null     object 
 11  Post_Date                    24 non-null     object 
 12  Customers_Influenced         24 non-null     float64
 13  Customer_1_ID              