In [1]:
import pandas as pd
import numpy as np
import seaborn as sns

In [12]:
# Load the dataset
data = pd.read_csv("amazon_data_latest.csv")
data.head(3)

Unnamed: 0,Uniq Id,Product Name,Brand Name,Category,Product Description,Selling Price,Product Specification,Technical Details,Shipping Weight,Image,Product Url,Is Amazon Seller,Product Description.1
0,A1,Mobile Phone,Apple,Electronic device,cutting-edge technology,$237.68,Shipping Weight: 10.7 pounds (View shipping ra...,,10.7 pounds,https://images-na.ssl-images-amazon.com/images...,https://www.amazon.com/DB-Longboards-CoreFlex-...,Y,Adidas products offer best performance
1,A2,Laptop,Samsung,,,$99.95,Product Dimensions: 14.7 x 11.1 x 10.2...,The snap circuits mini kits classpack provides...,4 pounds,https://images-na.ssl-images-amazon.com/images...,https://www.amazon.com/Electronic-Circuits-Cla...,Y,Best gaming experience
2,A3,Jacket,Puma,Clothing,high quality material,$34.99,ProductDimensions:10.3x3.4x0.8inches|ItemWeigh...,show up to 2 reviews by default No longer are ...,12.8 ounces,https://images-na.ssl-images-amazon.com/images...,https://www.amazon.com/3Doodler-Plastic-Innova...,Y,Best gaming experience


In [13]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype 
---  ------                 --------------  ----- 
 0   Uniq Id                100 non-null    object
 1   Product Name           100 non-null    object
 2   Brand Name             100 non-null    object
 3   Category               74 non-null     object
 4   Product Description    56 non-null     object
 5   Selling Price          100 non-null    object
 6   Product Specification  90 non-null     object
 7   Technical Details      89 non-null     object
 8   Shipping Weight        88 non-null     object
 9   Image                  100 non-null    object
 10  Product Url            100 non-null    object
 11  Is Amazon Seller       100 non-null    object
 12  Product Description.1  96 non-null     object
dtypes: object(13)
memory usage: 10.3+ KB


In [14]:
data.isnull().sum()

Uniq Id                   0
Product Name              0
Brand Name                0
Category                 26
Product Description      44
Selling Price             0
Product Specification    10
Technical Details        11
Shipping Weight          12
Image                     0
Product Url               0
Is Amazon Seller          0
Product Description.1     4
dtype: int64

In [15]:
# Define the scoring parameters and their weights
scoring_parameters = {
    'compliance': {
        'label_compliance': 0.3,
        'display_compliance': 0.3,
    },
    'correctness': {
        'authenticity': 0.4,
        'branding': 0.6,
    },
    'completeness': {
        'attributes': 0.7,
    },
}

In [16]:
def assess_compliance(row):
    # Check if the product labels and display comply with the applicable laws
    # Return a score between 0 and 1
    
    # Assume we have a list of required labels and displays
    required_labels = ['Selling Price', 'Brand Name', 'Product Name']
    required_displays = ['Image', 'Product Description']
    
    # Calculate the compliance score
    compliance_score = 0
    
    # Check if the required labels are present
    for label in required_labels:
        if label in row:
            compliance_score += 1 / len(required_labels)
    
    # Check if the required displays are present
    for display in required_displays:
        if display in row:
            compliance_score += 1 / len(required_displays)
    
    return int(compliance_score * 10) / 10

In [17]:
def assess_correctness(row):
    # Check if the product is authentic and uses appropriate branding
    # Return a score between 0 and 1
    
    # Assume we have a list of authentic brands
    authentic_brands = ['Apple', 'Samsung', 'Nike','Bosch','Johnson & Johnson','Carrefour','LEGO','Adidas','Johnson & Johnson','MAC cosmetics']
    
    # Calculate the correctness score
    correctness_score = 0
    
    # Check if the product is authentic
    if row['Brand Name'] in authentic_brands:
        correctness_score += 1 / 2
    
    # Check if the product uses appropriate branding
    if row['Brand Name'] in row['Product Name']:
        correctness_score += 1 / 2
    
    return int(correctness_score * 10) / 10

In [18]:
def assess_completeness(row):
    # Check if the catalogue provides the minimum attributes required for the buyer to understand the product features clearly
    # Return a score between 0 and 1
    
    # Assume we have a list of required attributes
    required_attributes = ['Selling Price', 'Brand Name', 'Product Name', 'Image', 'Product Description']
    
    # Calculate the completeness score
    completeness_score = 0
    
    # Check if the required attributes are present
    for attribute in required_attributes:
        if attribute in row:
            completeness_score += 1 / len(required_attributes)
    
    return int(completeness_score * 10) / 10

In [19]:
# Function to compute the objective store for a merchant catalog
def compute_objective_store(row):
    compliance_score = assess_compliance(row)
    correctness_score = assess_correctness(row)
    completeness_score = assess_completeness(row)
    
    objective_store = (
        compliance_score * scoring_parameters['compliance']['label_compliance'] +
        correctness_score * scoring_parameters['correctness']['authenticity'] +
        completeness_score * scoring_parameters['completeness']['attributes']
    )
    
    return objective_store

In [21]:
# Compute the objective store for each product in the dataset
data['objective_store'] = data.apply(compute_objective_store, axis=1)

In [22]:
# Save the updated dataset to a new CSV file
data.to_csv('amazon_product_dataset_with_objective_store.csv', index=False)

In [23]:
# Load the dataset
data = pd.read_csv('amazon_product_dataset_with_objective_store.csv')
data.head(3)

Unnamed: 0,Uniq Id,Product Name,Brand Name,Category,Product Description,Selling Price,Product Specification,Technical Details,Shipping Weight,Image,Product Url,Is Amazon Seller,Product Description.1,objective_store
0,A1,Mobile Phone,Apple,Electronic device,cutting-edge technology,$237.68,Shipping Weight: 10.7 pounds (View shipping ra...,,10.7 pounds,https://images-na.ssl-images-amazon.com/images...,https://www.amazon.com/DB-Longboards-CoreFlex-...,Y,Adidas products offer best performance,1.5
1,A2,Laptop,Samsung,,,$99.95,Product Dimensions: 14.7 x 11.1 x 10.2...,The snap circuits mini kits classpack provides...,4 pounds,https://images-na.ssl-images-amazon.com/images...,https://www.amazon.com/Electronic-Circuits-Cla...,Y,Best gaming experience,1.5
2,A3,Jacket,Puma,Clothing,high quality material,$34.99,ProductDimensions:10.3x3.4x0.8inches|ItemWeigh...,show up to 2 reviews by default No longer are ...,12.8 ounces,https://images-na.ssl-images-amazon.com/images...,https://www.amazon.com/3Doodler-Plastic-Innova...,Y,Best gaming experience,1.3
