Lets start by dividing the task into some components that are necessary to implement:

1. Data Preprocessing 
2. Text-Based Product Recommendation
3. Image-Based Product Recommendation
4. Integration - Combine recommendation systems and get a list of final recommendations
5. Evaluation - Fine tunning the parameters

In [11]:
import pandas as pd
from urllib.request import urlopen
from PIL import Image
from io import BytesIO
import os
import requests

# Load the dataset
dataset = pd.read_csv('28k_apparel_data.csv')

def preprocessing(dataset):
    # Check for missing values
    null_values = dataset.isnull().sum().sort_values(ascending=False)
    null_values = pd.DataFrame(data=null_values, columns=['Null Values'])
    missing_values = null_values[null_values['Null Values'] > 0]
    print(missing_values)

    dataset['brand'].fillna(dataset['brand'].mode()[0], inplace=True)

    null_values_after_fill = dataset.isnull().sum().sort_values(ascending=False)
    missing_values_after_fill = null_values_after_fill[null_values_after_fill > 0]
    if len(missing_values_after_fill) == 0:
        print("All missing values filled successfully.")
    else:
        print("Some missing values could not be filled.")

    return dataset 


preprocessing(dataset)

       Null Values
brand           50
All missing values filled successfully.


Unnamed: 0.1,Unnamed: 0,asin,brand,color,medium_image_url,product_type_name,title,formatted_price
0,4,B004GSI2OS,FeatherLite,Onyx Black/ Stone,https://images-na.ssl-images-amazon.com/images...,SHIRT,Featherlite Ladies' Long Sleeve Stain Resistan...,$26.26
1,6,B012YX2ZPI,HX-Kingdom Fashion T-shirts,White,https://images-na.ssl-images-amazon.com/images...,SHIRT,Women's Unique 100% Cotton T - Special Olympic...,$9.99
2,15,B003BSRPB0,FeatherLite,White,https://images-na.ssl-images-amazon.com/images...,SHIRT,FeatherLite Ladies' Moisture Free Mesh Sport S...,$20.54
3,27,B014ICEJ1Q,FNC7C,Purple,https://images-na.ssl-images-amazon.com/images...,SHIRT,Supernatural Chibis Sam Dean And Castiel O Nec...,$7.39
4,43,B0079BMKDS,FeatherLite,White,https://images-na.ssl-images-amazon.com/images...,APPAREL,Featherlite Ladies' Silky Smooth Pique (White)...,$13.53
...,...,...,...,...,...,...,...,...
17588,183081,B01MRV2IFS,YueLian,Black,https://images-na.ssl-images-amazon.com/images...,SHIRT,YueLian Women's Chiffon Short Sleeves Sun Prot...,$19.25
17589,183092,B01LY4QWLF,Vintage America,White,https://images-na.ssl-images-amazon.com/images...,SHIRT,Vintage America Women's Large Lace Up Collared...,$23.24
17590,183096,B07167SCNH,Tart Collections,Black,https://images-na.ssl-images-amazon.com/images...,SHIRT,"Tart Womens Collections Ann Wrap Top, Xs, Black",$29.99
17591,183101,B07575N2WX,Soprano,Gray,https://images-na.ssl-images-amazon.com/images...,SHIRT,Soprano Womens Small Tie-Fringe Slub-Knit Tank...,$22.83


In [3]:
image_directory = 'images/'
os.makedirs(image_directory, exist_ok=True)

def download_and_save_image(url, filename):
    try:
        response = requests.get(url)
        if response.status_code == 200:
            with open(filename, 'wb') as f:
                f.write(response.content)
                print(f"Image saved: {filename}")
        else:
            print(f"Failed to download image from {url}. Status code: {response.status_code}")
    except Exception as e:
        print(f"Error downloading image from {url}: {e}")

# Iterate over the dataset and download images
for index, row in dataset.iterrows():
    image_url = row['medium_image_url']
    image_name = f"{row['asin']}.jpg"
    image_path = os.path.join(image_directory, image_name)
    download_and_save_image(image_url, image_path)

Image saved: images/B004GSI2OS.jpg
Image saved: images/B012YX2ZPI.jpg
Image saved: images/B003BSRPB0.jpg
Image saved: images/B014ICEJ1Q.jpg
Image saved: images/B0079BMKDS.jpg
Image saved: images/B01NACPBG2.jpg
Image saved: images/B00480IRZS.jpg
Image saved: images/B014ICB9A0.jpg
Image saved: images/B011YPK0MW.jpg
Image saved: images/B0079QXJ3S.jpg
Image saved: images/B00R7DO9ZA.jpg
Image saved: images/B00480IS52.jpg
Image saved: images/B00BXOQ3X0.jpg
Image saved: images/B011JQWCCM.jpg
Image saved: images/B01NAZ3L3C.jpg
Image saved: images/B01I5GRO18.jpg
Image saved: images/B002ZZT446.jpg
Image saved: images/B015K88D78.jpg
Image saved: images/B07453B7L3.jpg
Image saved: images/B003BSPZ5I.jpg
Image saved: images/B01MS8J0HE.jpg
Image saved: images/B073WKCX36.jpg
Image saved: images/B014ICD9YO.jpg
Image saved: images/B01MAWUZB7.jpg
Image saved: images/B004TUJ6MA.jpg
Image saved: images/B01EZ6XKR4.jpg
Image saved: images/B014SSOEM4.jpg
Image saved: images/B074SGWFBN.jpg
Image saved: images/