In [1]:
import requests
import pandas as pd
from sklearn.neighbors import NearestNeighbors
import numpy as np

# Function to validate hex color codes
def is_valid_hex(hex_value):
    # Check if the hex value is exactly 6 characters long and contains only valid hex digits
    if len(hex_value) == 6 and all(c in '0123456789abcdefABCDEF' for c in hex_value):
        return True
    return False

# Step 1: Fetch Data from API
url = "https://makeup-api.herokuapp.com/api/v1/products.json"
response = requests.get(url)
data = response.json()

# Step 2: Prepare the Data
product_names = []
hex_colors = []
rgb_colors = []
brands = []  # New list to store brand names

for product in data:
    product_name = product.get('name', 'Unknown Product')
    brand_name = product.get('brand', 'Unknown Brand')  # Extract brand name
    if 'product_colors' in product:
        for color in product['product_colors']:
            hex_value = color.get('hex_value', '').strip('#')
            if is_valid_hex(hex_value):  # Validate hex values
                product_names.append(product_name)
                hex_colors.append(hex_value)
                brands.append(brand_name)  # Add the brand name to the list
                # Convert hex to RGB
                rgb_colors.append(tuple(int(hex_value[i:i+2], 16) for i in (0, 2, 4)))

# Convert lists to DataFrame
df = pd.DataFrame({
    'Product Name': product_names,
    'Hex Color': hex_colors,
    'RGB Color': rgb_colors,
    'Brand': brands  # Include the Brand column
})

# Step 3: Implement KNN
# Convert RGB list to a numpy array
rgb_array = np.array(rgb_colors)

# Initialize the Nearest Neighbors model
knn = NearestNeighbors(n_neighbors=10 * len(rgb_array), metric='euclidean')
knn.fit(rgb_array)

# Function to recommend products based on input hex colors (multiple)
def recommend_products(hex_colors_list, num_recommendations=10):
    all_distances = []
    all_indices = []

    for input_hex_color in hex_colors_list:
        # Convert input hex color to RGB
        input_rgb = tuple(int(input_hex_color[i:i+2], 16) for i in (0, 2, 4))

        # Find the nearest neighbors
        distances, indices = knn.kneighbors([input_rgb], n_neighbors=len(rgb_array))

        # Accumulate distances and indices
        all_distances.extend(distances[0])
        all_indices.extend(indices[0])

    # Combine and sort by distance, keeping the closest 'num_recommendations'
    sorted_indices = np.argsort(all_distances)[:num_recommendations]
    top_indices = np.array(all_indices)[sorted_indices]

    # Retrieve the recommended products
    recommendations = df.iloc[top_indices].copy()  # Create a copy to avoid SettingWithCopyWarning
    recommendations.loc[:, 'Input Hex Color'] = [hex_colors_list[i // len(df)] for i in sorted_indices]  # Map the input color to the recommendations

    return recommendations.reset_index(drop=True)

# Example: Recommend products for multiple input hex colors
input_hex_colors = ["bf5a7f", "f4a460", "4682b4"]  # Example hex colors
recommended_products = recommend_products(input_hex_colors)

# Display recommended products
print("Recommended Products for Hex Colors {}:".format(", ".join(input_hex_colors)))
print(recommended_products[['Product Name', 'Hex Color', 'Brand']])


Recommended Products for Hex Colors bf5a7f, f4a460, 4682b4:
                                        Product Name Hex Color  \
0                                 B Glossy Lip Gloss    BF5D7F   
1                            Duo Chromatic Lip Gloss    EFA367   
2               Suncoat Girl Water-Based Nail Polish    b95b77   
3                  Marcelle Rouge Xpression Lipstick    C75878   
4                                   Jumbo Eye Pencil    F59E57   
5  Clinique Pop&trade; Lip Shadow Cushion Matte L...    BB5974   
6                                        Cloud Paint    B65F79   
7                                      DIORIFIC KHÔL    C95B86   
8                                   Jumbo Eye Pencil    F7AF65   
9        Marcelle Forever Sharp Waterproof Lip Liner    CC5A7C   

                    Brand  
0  sally b's skin yummies  
1                     nyx  
2                 suncoat  
3                marcelle  
4                     nyx  
5                clinique  
6                

In [3]:
import requests
from PIL import Image
from io import BytesIO
import pandas as pd
from concurrent.futures import ThreadPoolExecutor, as_completed

# Function to convert RGB to hex
def rgb_to_hex(rgb):
    return '#{:02x}{:02x}{:02x}'.format(*rgb)

# Function to get the dominant color from an image URL using PIL
def get_dominant_color(image_url):
    try:
        response = requests.get(image_url, timeout=5)  # Set a timeout for faster failure
        response.raise_for_status()  # Check for HTTP errors
        img = Image.open(BytesIO(response.content))
        img = img.convert('RGB')  # Ensure image is in RGB mode

        # Check if image is empty
        if img.size[0] == 0 or img.size[1] == 0:
            return None

        img = img.resize((10, 10))  # Resize image to 10x10 pixels for better sampling
        pixels = list(img.getdata())
        dominant_color = tuple(sum(col) // len(pixels) for col in zip(*pixels))  # Average color
        return rgb_to_hex(dominant_color)
    except Exception as e:
        return None

# Function to process each row
def process_row(row):
    urls = row['img'].split(',')  # Assuming multiple URLs are comma-separated
    for url in urls:
        color = get_dominant_color(url)
        if color:
            return row['name'], color
    return row['name'], None

# Load the dataset
file_path = '/Users/nattybatty/Downloads/Data - Copy.csv'
df = pd.read_csv(file_path)

# Filter out rows where the 'images' column is missing or empty
df_filtered = df.dropna(subset=['img'])

# Process the DataFrame in parallel
results = []
with ThreadPoolExecutor(max_workers=10) as executor:  # Adjust number of workers based on your system
    futures = [executor.submit(process_row, row) for _, row in df_filtered.iterrows()]
    for future in as_completed(futures):
        results.append(future.result())

# Create a new DataFrame with the results
df_final = pd.DataFrame(results, columns=['name', 'dominant_color'])

# Drop rows where the color could not be extracted
df_final = df_final.dropna(subset=['dominant_color'])

# Save the resulting DataFrame to a new CSV file
output_path = '/Users/nattybatty/Downloads/Processed_Data.csv'
df_final.to_csv(output_path, index=False)

# Display the first few rows of the resulting DataFrame
print(df_final.head())


KeyError: 'name'