In [37]:
import pandas as pd
from sqlalchemy import create_engine, MetaData, Table, Column, String, Integer, Float, DateTime, ForeignKey, text
from snowflake.sqlalchemy import URL
import os
from dotenv import load_dotenv
import logging
import sys

# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger()

# Load environment variables
load_dotenv()

# Get Snowflake connection details from environment variables
def get_snowflake_engine():
    logger.info("Attempting to create Snowflake engine.")
    try:
        engine = create_engine(URL(
            user=os.getenv("MY_SNOWFLAKE_USER"),
            password=os.getenv("MY_SNOWFLAKE_PASSWORD"),
            account=os.getenv("MY_SNOWFLAKE_ACCOUNT"),
            database=os.getenv("SNOWFLAKE_DATABASE"),
            schema=os.getenv("SNOWFLAKE_SCHEMA"),
            warehouse=os.getenv("SNOWFLAKE_WAREHOUSE"),
            role=os.getenv("SNOWFLAKE_ROLE"),
        ))
        logger.info("Snowflake engine created successfully.")
        return engine
    except Exception as e:
        logger.error("Error creating Snowflake engine: %s", e)
        sys.exit(1)

def __run_query(query, engine = get_snowflake_engine(), update = False):
    connection = engine.connect()
    logging.info("Connected to Snowflake")
    
    try:
        # connection.execute(text("USE ASG_4_P2.PUBLIC;"))
        # logging.info("Switched to ASG_4_P2.PUBLIC")
        
        # Make parameterized query such that no DELETE / UPDATE queries can be run
        query = text(query)
        logger.info(f"Executing query: {query}")
        result = connection.execute(query).fetchall()
        if update:
            connection.commit()
            logging.info("Transaction committed.")
        try:
            result_df = pd.DataFrame(result)
            logging.info("Query executed successfully and results fetched")
            return result_df
        except Exception as e:
            logging.error(f"Error converting query results to DataFrame: {e}")
            return None
    except Exception as e:
        logging.error(f"Error executing query: {e}")
        return None
    finally:
        connection.close()
        logging.info("Connection closed")

def update_churn_flag(churned_cust_ids, non_churned_customer_ids):
    update_query_churn = f"""
    UPDATE "Customers"
    SET churn_flag = TRUE
    WHERE customer_id IN ({', '.join(f"'{customer_id}'" for customer_id in churned_cust_ids)});
    """
    __run_query(update_query_churn, update=True)
    
    update_query_non_churn = f"""
    UPDATE "Customers"
    SET churn_flag = TRUE
    WHERE customer_id IN ({', '.join(f"'{customer_id}'" for customer_id in non_churned_customer_ids)});
    """
    __run_query(update_query_non_churn, update=True)

    

2024-08-12 22:51:25,355 - INFO - Attempting to create Snowflake engine.
2024-08-12 22:51:25,360 - INFO - Snowflake engine created successfully.


In [44]:

customers = __run_query('SELECT * FROM "Customers";')
orders = __run_query('SELECT * FROM "Orders";')

2024-08-04 11:36:20,290 - INFO - Snowflake Connector for Python Version: 3.12.0, Python Version: 3.10.12, Platform: Linux-5.15.153.1-microsoft-standard-WSL2-x86_64-with-glibc2.35
2024-08-04 11:36:20,301 - INFO - Connecting to GLOBAL Snowflake domain
2024-08-04 11:36:20,303 - INFO - This connection is in OCSP Fail Open Mode. TLS Certificates would be checked for validity and revocation status. Any other Certificate Revocation related exceptions or OCSP Responder failures would be disregarded in favor of connectivity.
2024-08-04 11:36:21,079 - INFO - Number of results in first chunk: 1
2024-08-04 11:36:21,255 - INFO - Number of results in first chunk: 1
2024-08-04 11:36:21,256 - INFO - Connected to Snowflake
2024-08-04 11:36:21,579 - INFO - Number of results in first chunk: 0
2024-08-04 11:36:24,162 - INFO - Query executed successfully and results fetched
2024-08-04 11:36:24,339 - INFO - Number of results in first chunk: 1
2024-08-04 11:36:24,339 - INFO - Connection closed
2024-08-04 11:

# Calculate Churn

In [4]:
def calculate_churn_rate(orders, customers, churn_period_days=200):
    # Convert order_purchase_timestamp to datetime
    orders['order_purchase_timestamp'] = pd.to_datetime(orders['order_purchase_timestamp'])

    # Determine the reference date (e.g., the latest order date in the dataset)
    reference_date = orders['order_purchase_timestamp'].max()

    # Calculate the last purchase date for each customer
    last_purchase_date = orders.groupby('customer_id')['order_purchase_timestamp'].max().reset_index()

    # Identify churned customers: those who haven't purchased within the churn period
    churned_customers = last_purchase_date[
        last_purchase_date['order_purchase_timestamp'] < reference_date - pd.Timedelta(days=churn_period_days)
    ]

    # Identify non-churned customers: those who have purchased within the churn period
    non_churned_customers = last_purchase_date[
        last_purchase_date['order_purchase_timestamp'] >= reference_date - pd.Timedelta(days=churn_period_days)
    ]

    # Convert customer IDs to lists
    churned_customer_ids = churned_customers['customer_id'].tolist()
    non_churned_customer_ids = non_churned_customers['customer_id'].tolist()

    # Count the total number of unique customers
    total_customers = customers['customer_id'].nunique()

    # Count the number of churned customers
    num_churned_customers = len(churned_customer_ids)

    # Calculate churn rate
    churn_rate = (num_churned_customers / total_customers) * 100

    # Output the results
    logger.info(f"Total Customers: {total_customers}")
    logger.info(f"Churned Customers: {num_churned_customers}")
    logger.info(f"Churn Rate: {churn_rate:.2f}%")

    return churned_customer_ids, non_churned_customer_ids

In [50]:
churned_cust_ids, non_churned_customer_ids = calculate_churn_rate(orders, customers)

2024-08-04 11:55:48,648 - INFO - Total Customers: 99441
2024-08-04 11:55:48,649 - INFO - Churned Customers: 66569
2024-08-04 11:55:48,649 - INFO - Churn Rate: 66.94%


In [53]:
update_churn_flag(churned_cust_ids, non_churned_customer_ids)

2024-08-04 12:01:49,235 - INFO - Snowflake Connector for Python Version: 3.12.0, Python Version: 3.10.12, Platform: Linux-5.15.153.1-microsoft-standard-WSL2-x86_64-with-glibc2.35
2024-08-04 12:01:49,236 - INFO - Connecting to GLOBAL Snowflake domain
2024-08-04 12:01:49,237 - INFO - This connection is in OCSP Fail Open Mode. TLS Certificates would be checked for validity and revocation status. Any other Certificate Revocation related exceptions or OCSP Responder failures would be disregarded in favor of connectivity.
2024-08-04 12:01:49,883 - INFO - Number of results in first chunk: 1
2024-08-04 12:01:50,025 - INFO - Number of results in first chunk: 1
2024-08-04 12:01:50,026 - INFO - Connected to Snowflake
2024-08-04 12:02:03,469 - INFO - Number of results in first chunk: 1
2024-08-04 12:02:03,470 - INFO - Transaction committed.
2024-08-04 12:02:03,471 - INFO - Query executed successfully and results fetched
2024-08-04 12:02:03,683 - INFO - Number of results in first chunk: 1
2024-08-0

# Get Churn Status by cust_id

In [11]:
#Query to get the churn flag for a customer
def get_churn_flag(customer_id):
    sql_query = f'''select churn_flag from "Customers" where customer_id = '{customer_id}';'''
    return __run_query(sql_query)

#Query to get the min and max price of a product
def __get_min_max_price(product_id):
    sql_query = f'''SELECT 
        MAX(price) AS max_price,
        MIN(price) AS min_price
    FROM 
        "Order_Items"
    GROUP BY 
        product_id
    having product_id = '{product_id}';'''
    return __run_query(sql_query)

#Function to get the price for product according to churn flag
def get_price(product_id, churn_flag):
    min_max_price = __get_min_max_price(product_id)
    if churn_flag:
        return min_max_price['min_price'][0]
    else:
        return min_max_price['max_price'][0]



In [13]:
customer_id = "00012a2ce6f8dcda20d059ce98491703"
print(get_price("4244733e06e7ecb4970a6e2683c13e61", get_churn_flag(customer_id)['churn_flag'][0]))

2024-08-06 00:01:23,386 - INFO - Connected to Snowflake
2024-08-06 00:01:23,388 - INFO - Executing query: select churn_flag from "Customers" where customer_id = '00012a2ce6f8dcda20d059ce98491703';
2024-08-06 00:01:23,740 - INFO - Number of results in first chunk: 1
2024-08-06 00:01:23,742 - INFO - Query executed successfully and results fetched
2024-08-06 00:01:23,834 - INFO - Number of results in first chunk: 1
2024-08-06 00:01:23,835 - INFO - Connection closed
2024-08-06 00:01:23,836 - INFO - Connected to Snowflake
2024-08-06 00:01:23,836 - INFO - Executing query: SELECT 
        MAX(price) AS max_price,
        MIN(price) AS min_price
    FROM 
        "Order_Items"
    GROUP BY 
        product_id
    having product_id = '4244733e06e7ecb4970a6e2683c13e61';
2024-08-06 00:01:23,941 - INFO - Number of results in first chunk: 1
2024-08-06 00:01:23,943 - INFO - Query executed successfully and results fetched
2024-08-06 00:01:24,032 - INFO - Number of results in first chunk: 1
2024-08-06

55.9


## Update the products table

In [64]:
import os
from dotenv import load_dotenv
import requests
import base64
import json

# Load environment variables from .env file
load_dotenv()

# Get the environment variables
CLIENT_ID = os.getenv('EBAY_APP_ID')
CLIENT_SECRET = os.getenv('EBAY_CERT_ID')
OAUTH_URL = 'https://api.ebay.com/identity/v1/oauth2/token'

# Encode the client ID and client secret
credentials = base64.b64encode(f'{CLIENT_ID}:{CLIENT_SECRET}'.encode('utf-8')).decode('utf-8')

headers = {
    'Content-Type': 'application/x-www-form-urlencoded',
    'Authorization': f'Basic {credentials}'
}

data = {
    'grant_type': 'client_credentials',
    'scope': 'https://api.ebay.com/oauth/api_scope'
}

response = requests.post(OAUTH_URL, headers=headers, data=data)
if response.status_code == 200:
    access_token = response.json()['access_token']
    # print(f'Access Token: {access_token}')
else:
    print(f'Error: {response.status_code}')
    print(response.json())



In [71]:
import requests
import json

def search_ebay_items(params, access_token=access_token):
    endpoint = 'https://api.ebay.com/buy/browse/v1/item_summary/search'
    
    # Set up the request headers
    headers = {
        'Authorization': f'Bearer {access_token}',
        'Content-Type': 'application/json',
    }
    
    # Make the API request
    response = requests.get(endpoint, headers=headers, params=params)
    
    # Check if the request was successful
    if response.status_code == 200:
        data = response.json()
        items = data.get('itemSummaries', [])

        q_param = params.get('q', 'results').replace(' ', '_')
        file_name = f'data_ebay/ebay_browse_search_results_{q_param}_2.json'
        
        # Print and save the response
        # with open(file_name, 'w') as file:
        #     json.dump(items, file, indent=4)
        #     print(f'Saved {len(items)} items to {file_name}')
        # print(len(items))
        return items
    else:
        print(f'Error: {response.status_code}')
        print(response.json())

def get_category_id_by_name(category_name, access_token=access_token):
    # Define the endpoint and headers
    url = 'https://api.ebay.com/commerce/taxonomy/v1/category_tree/0/get_category_suggestions'
    headers = {
        'Authorization': f'Bearer {access_token}',
        'Content-Type': 'application/json',
    }
    
    # Define the query parameters
    params = {
        'q': category_name  # The name of the category you want to search for
    }
    
    # Make the GET request
    response = requests.get(url, headers=headers, params=params)
    
    # Check the response status and parse the response
    if response.status_code == 200:
        data = response.json()
        # formatted_json = json.dumps(data, indent=4)
        # print(formatted_json)
        categories = data.get('categorySuggestions')
        
        # Print out category names and IDs
        for category in categories:
            print(f"Category Name: {category['category']['categoryName']}, Category ID: {category['category']['categoryId']}")
        return categories
    else:
        print(f"Error: {response.status_code}, {response.text}")
        return None
    
def call_ebay_api_by_price_count(q, min_price, max_price, count):
    params = {
        'q': q,
        
        'limit': count
    }
    return search_ebay_items(params)

In [56]:
x = call_ebay_api_by_price_count('laptop', 100, 1000, 10)
x

10


[{'itemId': 'v1|156164986922|0',
  'title': 'Lenovo Ideapad 1 15Iau7 15.6" Laptop Intel Core i5-1235U 8GB RAM 512GB SSD W11H',
  'leafCategoryIds': ['177'],
  'categories': [{'categoryId': '177',
    'categoryName': 'PC Laptops & Netbooks'},
   {'categoryId': '58058', 'categoryName': 'Computers/Tablets & Networking'},
   {'categoryId': '175672', 'categoryName': 'Laptops & Netbooks'}],
  'image': {'imageUrl': 'https://i.ebayimg.com/images/g/Qe0AAOSwKkFmGVlB/s-l225.jpg'},
  'price': {'value': '289.99', 'currency': 'USD'},
  'itemHref': 'https://api.ebay.com/buy/browse/v1/item/v1%7C156164986922%7C0',
  'seller': {'username': 'buyrefurbishedus',
   'feedbackPercentage': '97.7',
   'feedbackScore': 13568},
  'marketingPrice': {'originalPrice': {'value': '434.99', 'currency': 'USD'},
   'discountPercentage': '33',
   'discountAmount': {'value': '145.00', 'currency': 'USD'},
   'priceTreatment': 'LIST_PRICE'},
  'condition': 'Certified - Refurbished',
  'conditionId': '2000',
  'thumbnailImag

In [41]:
categories = __run_query('SELECT DISTINCT PRODUCT_CATEGORY_NAME_ENGLISH  FROM "Product_Category_Translation";')['product_category_name_english'].tolist()

2024-08-13 17:16:32,234 - INFO - Connected to Snowflake


2024-08-13 17:16:32,251 - INFO - Executing query: SELECT DISTINCT PRODUCT_CATEGORY_NAME_ENGLISH  FROM "Product_Category_Translation";
2024-08-13 17:16:32,962 - ERROR - Error executing query: (snowflake.connector.errors.ProgrammingError) 390114 (08001): None: Authentication token has expired.  The user must authenticate again.
[SQL: SELECT DISTINCT PRODUCT_CATEGORY_NAME_ENGLISH  FROM "Product_Category_Translation";]
(Background on this error at: https://sqlalche.me/e/20/f405)


ProgrammingError: (snowflake.connector.errors.ProgrammingError) 390114 (08001): None: Authentication token has expired.  The user must authenticate again.
(Background on this error at: https://sqlalche.me/e/20/f405)

In [35]:
query = '''SELECT 
    product_id,
    MAX(price) AS max_price,
    MIN(price) AS min_price
FROM 
    "Order_Items"
GROUP BY 
    product_id;'''

products_by_price = __run_query(query)

2024-08-12 04:39:47,212 - INFO - Connected to Snowflake
2024-08-12 04:39:47,214 - INFO - Executing query: SELECT 
    product_id,
    MAX(price) AS max_price,
    MIN(price) AS min_price
FROM 
    "Order_Items"
GROUP BY 
    product_id;
2024-08-12 04:39:48,079 - INFO - Number of results in first chunk: 2082
2024-08-12 04:39:48,552 - INFO - Query executed successfully and results fetched
2024-08-12 04:39:48,628 - INFO - Number of results in first chunk: 1
2024-08-12 04:39:48,629 - INFO - Connection closed


In [34]:
print(call_ebay_api_by_price(i, 70, 80))


Saved 1 items to data_ebay/ebay_browse_search_results_construction_tools_construction_2.json
1
None


In [19]:
categories[1]

'computers_accessories'

In [60]:
query = '''select 
    * 
from 
    top_products_with_price_summary
where 
    product_id in ( 
    select 
        product_id 
    from
        top_products_with_price_summary
    );'''

joined_df_english_category = __run_query(query)


2024-08-13 20:29:11,556 - INFO - Snowflake Connector for Python Version: 3.12.0, Python Version: 3.10.12, Platform: Linux-5.15.153.1-microsoft-standard-WSL2-x86_64-with-glibc2.35
2024-08-13 20:29:11,560 - INFO - Connecting to GLOBAL Snowflake domain
2024-08-13 20:29:11,561 - INFO - This connection is in OCSP Fail Open Mode. TLS Certificates would be checked for validity and revocation status. Any other Certificate Revocation related exceptions or OCSP Responder failures would be disregarded in favor of connectivity.
2024-08-13 20:29:12,525 - INFO - Connected to Snowflake
2024-08-13 20:29:12,527 - INFO - Executing query: select 
    * 
from 
    top_products_with_price_summary
where 
    product_id in ( 
    select 
        product_id 
    from
        top_products_with_price_summary
    );
2024-08-13 20:29:13,688 - INFO - Number of results in first chunk: 979
2024-08-13 20:29:13,700 - INFO - Query executed successfully and results fetched
2024-08-13 20:29:13,820 - INFO - Number of resu

In [39]:
joined_df_english_category.head()

Unnamed: 0,product_id,min_price,max_price,product_category_name_english
0,00066f42aeeb9f3007548bb9d3f33c38,101.65,101.65,perfumery
1,00088930e925c41fd95ebfe695fd2655,129.9,129.9,auto
2,0009406fd7479715e4bef61dd91f2462,229.0,229.0,bed_bath_table
3,000b8f95fcb9e0096488278317764d19,58.9,58.9,housewares
4,000d9be29b5207b54e86aa1b1ac54872,199.0,199.0,watches_gifts


In [42]:
category_counts = joined_df_english_category['product_category_name_english'].value_counts().to_dict()
category_counts

{'bed_bath_table': 3029,
 'sports_leisure': 2867,
 'furniture_decor': 2657,
 'health_beauty': 2444,
 'housewares': 2335,
 'auto': 1900,
 'computers_accessories': 1639,
 'toys': 1411,
 'watches_gifts': 1329,
 'telephony': 1134,
 'baby': 919,
 'perfumery': 868,
 'stationery': 849,
 'fashion_bags_accessories': 849,
 'cool_stuff': 789,
 'garden_tools': 753,
 'pet_shop': 719,
 'electronics': 517,
 'construction_tools_construction': 400,
 'home_appliances': 370,
 'luggage_accessories': 349,
 'consoles_games': 317,
 'office_furniture': 309,
 'musical_instruments': 289,
 'small_appliances': 231,
 'home_construction': 225,
 'books_general_interest': 216,
 'fashion_shoes': 173,
 'furniture_living_room': 156,
 'air_conditioning': 124,
 'books_technical': 123,
 'fixed_telephony': 116,
 'home_confort': 111,
 'market_place': 104,
 'food_drink': 104,
 'fashion_male_clothing': 95,
 'kitchen_dining_laundry_garden_furniture': 94,
 'signaling_and_security': 93,
 'construction_tools_safety': 91,
 'home_ap

## Without Price matching 

In [77]:
import pandas as pd
from tqdm import tqdm

# List to hold the results
results = []

# Set to track used eBay item IDs to ensure uniqueness
used_ebay_item_ids = set()

# Loop through the entire DataFrame with a progress bar
for index, row in tqdm(joined_df_english_category.iterrows(), total=len(joined_df_english_category)):
    category = row['product_category_name_english']
    count = category_counts.get(category, 20)  # Use the count from the category_counts dictionary
    
    # Ensure count does not exceed 200
    if count > 200:
        count = 200
    
    # Call the eBay API
    response = call_ebay_api_by_price_count(category, row['min_price'], row['max_price'], count)
    
    # Process the response, which is a list of dictionaries
    selected_item = None
    if response:  # Ensure there are items in the response
        for item in response:
            ebay_item_id = item.get('itemId', None)
            if ebay_item_id and ebay_item_id not in used_ebay_item_ids:
                # Found a unique product
                used_ebay_item_ids.add(ebay_item_id)
                selected_item = item
                break

    # If a unique product was found, extract title and image URL
    if selected_item:
        title = selected_item.get('title', 'No title')
        image_url = selected_item.get('image', {}).get('imageUrl', 'No image URL')
        ebay_item_id = selected_item.get('itemId', 'No itemId')
        web_url = selected_item.get('itemWebUrl', 'No web URL')
        ebay_price = selected_item.get('price', {}).get('value', 'No price')
    else:
        title = 'No title'
        image_url = 'No image URL'
        ebay_item_id = 'No itemId'
        web_url = 'No web URL'
        ebay_price = 'No price'
    
    # Append the extracted data to the results
    results.append({
        'product_id': row['product_id'],
        'min_price': row['min_price'],
        'max_price': row['max_price'],
        'product_category_name_english': category,
        'title': title,
        'image_url': image_url,
        'ebay_item_id': ebay_item_id,
        'web_url': web_url,
        'ebay_price' : ebay_price
    })

# Convert the results list into a DataFrame
df_results = pd.DataFrame(results)

# Merge the new results DataFrame with the original joined_df_english_category on shared columns
final_df = pd.concat([joined_df_english_category.reset_index(drop=True), df_results[['title', 'image_url', 'ebay_item_id']]], axis=1)

# Display the final DataFrame
final_df


  6%|▌         | 57/979 [00:50<12:13,  1.26it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


  6%|▌         | 58/979 [00:50<09:36,  1.60it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


  6%|▌         | 59/979 [00:50<07:58,  1.92it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


  6%|▌         | 60/979 [00:50<06:32,  2.34it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


  6%|▌         | 61/979 [00:51<06:00,  2.55it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


  6%|▋         | 63/979 [00:51<04:46,  3.20it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}
Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


  7%|▋         | 64/979 [00:51<04:29,  3.39it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


  7%|▋         | 65/979 [00:52<04:17,  3.55it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


  7%|▋         | 66/979 [00:52<04:25,  3.44it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


  7%|▋         | 67/979 [00:52<04:03,  3.74it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


  7%|▋         | 68/979 [00:52<03:46,  4.02it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


  7%|▋         | 69/979 [00:53<03:39,  4.14it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


  7%|▋         | 70/979 [00:53<03:35,  4.21it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


  7%|▋         | 71/979 [00:53<03:36,  4.19it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


  7%|▋         | 72/979 [00:53<03:48,  3.97it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


  7%|▋         | 73/979 [00:54<03:46,  4.01it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


  8%|▊         | 74/979 [00:54<03:38,  4.15it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


  8%|▊         | 75/979 [00:54<03:33,  4.23it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


  8%|▊         | 76/979 [00:54<03:29,  4.32it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


  8%|▊         | 78/979 [00:55<03:19,  4.52it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}
Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


  8%|▊         | 79/979 [00:55<03:19,  4.51it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


  8%|▊         | 80/979 [00:55<03:21,  4.46it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


  8%|▊         | 81/979 [00:55<03:32,  4.22it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


  8%|▊         | 82/979 [00:56<03:25,  4.37it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


  8%|▊         | 83/979 [00:56<03:23,  4.40it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


  9%|▊         | 84/979 [00:56<03:22,  4.42it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


  9%|▊         | 85/979 [00:56<03:25,  4.35it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


  9%|▉         | 86/979 [00:57<03:26,  4.31it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


  9%|▉         | 87/979 [00:57<03:24,  4.37it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


  9%|▉         | 88/979 [00:57<03:17,  4.50it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


  9%|▉         | 89/979 [00:57<03:32,  4.18it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


  9%|▉         | 91/979 [00:58<03:22,  4.38it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}
Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


  9%|▉         | 93/979 [00:58<03:11,  4.63it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}
Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


 10%|▉         | 94/979 [00:58<03:05,  4.77it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


 10%|▉         | 95/979 [00:59<07:28,  1.97it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


 10%|▉         | 96/979 [01:00<06:19,  2.33it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


 10%|▉         | 97/979 [01:00<05:30,  2.67it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


 10%|█         | 98/979 [01:00<05:13,  2.81it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


 10%|█         | 99/979 [01:00<04:38,  3.16it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


 10%|█         | 100/979 [01:01<04:16,  3.43it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


 10%|█         | 101/979 [01:01<04:02,  3.63it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


 10%|█         | 102/979 [01:01<04:08,  3.53it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


 11%|█         | 103/979 [01:01<03:52,  3.77it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


 11%|█         | 104/979 [01:02<03:40,  3.97it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


 11%|█         | 105/979 [01:02<03:51,  3.78it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


 11%|█         | 106/979 [01:02<03:39,  3.97it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


 11%|█         | 107/979 [01:03<04:08,  3.52it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


 11%|█         | 108/979 [01:03<03:46,  3.85it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


 11%|█         | 109/979 [01:03<03:41,  3.93it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


 11%|█         | 110/979 [01:03<04:17,  3.38it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


 11%|█▏        | 111/979 [01:04<03:58,  3.64it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


 11%|█▏        | 112/979 [01:04<03:55,  3.69it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


 12%|█▏        | 113/979 [01:04<03:44,  3.85it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


 12%|█▏        | 114/979 [01:04<03:44,  3.85it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


 12%|█▏        | 116/979 [01:05<03:25,  4.20it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}
Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


 12%|█▏        | 117/979 [01:05<03:17,  4.36it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


 12%|█▏        | 118/979 [01:05<03:19,  4.31it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


 12%|█▏        | 119/979 [01:06<03:27,  4.15it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


 12%|█▏        | 120/979 [01:06<03:28,  4.12it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


 12%|█▏        | 121/979 [01:06<03:19,  4.31it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}


 12%|█▏        | 122/979 [01:06<07:50,  1.82it/s]

Error: 429
{'errors': [{'errorId': 2001, 'domain': 'ACCESS', 'category': 'REQUEST', 'message': 'Too many requests.', 'longMessage': 'The request limit has been reached for the resource.'}]}





KeyboardInterrupt: 

## With Price Matching

In [76]:
import pandas as pd
from tqdm import tqdm

# List to hold the results
results = []

# Set to track used eBay item IDs to ensure uniqueness
used_ebay_item_ids = set()

# Loop through the entire DataFrame with a progress bar
for index, row in tqdm(joined_df_english_category.iterrows(), total=len(joined_df_english_category)):
    category = row['product_category_name_english']
    count = category_counts.get(category, 20)  # Use the count from the category_counts dictionary
    
    # Ensure count does not exceed 200
    # if count > 200:
    #     count = 200

    count = 200
    
    # Call the eBay API
    response = call_ebay_api_by_price_count(category, row['min_price'], row['max_price'], count)

    min_price = row['min_price'] * 0.8
    max_price = row['max_price'] * 1.2
    
    # Process the response, which is a list of dictionaries
    selected_item = None
    if response:  # Ensure there are items in the response
        for item in response:
            ebay_item_id = item.get('itemId', None)
            item_price_str = item.get('price', {}).get('value', None)
            
            # Convert item_price_str to float if it's not None
            try:
                item_price = float(item_price_str) if item_price_str is not None else None
            except ValueError:
                item_price = None
            
            # Check if the item has a price and it is within the min and max price range
            if (ebay_item_id and ebay_item_id not in used_ebay_item_ids and 
                item_price is not None and min_price <= item_price <= max_price):
                # Found a unique product within the price range
                used_ebay_item_ids.add(ebay_item_id)
                selected_item = item
                break

    # If a unique product was found, extract title and image URL
    if selected_item:
        title = selected_item.get('title', 'No title')
        image_url = selected_item.get('image', {}).get('imageUrl', 'No image URL')
        ebay_item_id = selected_item.get('itemId', 'No itemId')
        web_url = selected_item.get('itemWebUrl', 'No web URL')
    else:
        title = 'No title'
        image_url = 'No image URL'
        ebay_item_id = 'No itemId'
        web_url = 'No web URL'
    
    # Append the extracted data to the results
    results.append({
        'product_id': row['product_id'],
        'min_price': row['min_price'],
        'max_price': row['max_price'],
        'product_category_name_english': category,
        'title': title,
        'image_url': image_url,
        'ebay_item_id': ebay_item_id,
        'web_url': web_url
    })

# Convert the results list into a DataFrame
df_results = pd.DataFrame(results)

# Merge the new results DataFrame with the original joined_df_english_category on shared columns
final_df = pd.concat([joined_df_english_category.reset_index(drop=True), df_results[['title', 'image_url', 'ebay_item_id']]], axis=1)

# Display the final DataFrame
final_df


100%|██████████| 979/979 [14:07<00:00,  1.15it/s]


Unnamed: 0,product_id,cnt,min_price,max_price,product_category_name_english,title,image_url,ebay_item_id
0,5f504b3a1c75b73d6151be81eb05bdc9,63,572.00,642.30,cool_stuff,No title,No image URL,No itemId
1,4473f3e5c65952b074ef987fa5c24662,17,27.99,29.99,telephony,Grandstream GS-HT802 2 Port Analog Telephone A...,https://i.ebayimg.com/images/g/cEQAAOSwKIlgze4...,v1|255022550928|0
2,f4d705aa95ccca448e5b0deb6e5290ba,27,24.90,24.90,bed_bath_table,No title,No image URL,No itemId
3,bbaef2eadf31fe3ea6702077398be06c,66,56.99,56.99,perfumery,The Crown Perfumery Crown Bouquet Eau De Parfu...,https://i.ebayimg.com/images/g/fNkAAOSwm3hlasP...,v1|375095251685|0
4,0e40b1ed4cfd3da1962ec91913e54ba8,15,13.65,13.65,electronics,MINI GAME Anniversary Edition ENTERTAINMENT SY...,https://i.ebayimg.com/images/g/iF8AAOSwZM5msFt...,v1|126610057291|0
...,...,...,...,...,...,...,...,...
974,2002fc3ec2d8a30a7588628fe7aff958,17,30.00,30.00,electronics,HDMI 4K TV Retro Game Stick Console Built-in 6...,https://i.ebayimg.com/images/g/D5wAAOSwei9kSOX...,v1|385572899658|653561916097
975,d3e1006ba3735c0d44160026b6e0ced3,28,108.90,109.90,auto,No title,No image URL,No itemId
976,fb7a100ec8c7b34f60cec22b1a9a10e0,36,49.99,49.99,toys,No title,No image URL,No itemId
977,71a5f1c2a5fd9889ef26b5ac22aec9c6,27,19.90,24.90,furniture_decor,"Vintage Solid Brass Ornate Rose Decal Wall,Doo...",https://i.ebayimg.com/images/g/a6MAAOSw2hdcxbL...,v1|184391345936|0


In [75]:
final_df.to_csv('final_df_without_proce_match.csv', index=False)