In [79]:
import numpy as np
import pandas as pd
import re
from contextlib import contextmanager
import time
import concurrent.futures
import requests
from PIL import Image
from io import BytesIO
from keras.applications import InceptionV3
from tensorflow.keras.preprocessing import image
from keras.applications.inception_v3 import preprocess_input
import os

In [8]:
import chardet

# Read the first few bytes of the file to detect encoding
with open('books-1.csv', 'rb') as file:
    raw_data = file.read(10000)  # Read first 10,000 bytes or so
    result = chardet.detect(raw_data)
    print(result)

{'encoding': 'ISO-8859-1', 'confidence': 0.73, 'language': ''}


In [3]:
# Regex pattern: match semicolons not preceded by '&amp'
pattern = r'\";\"'


# Custom function to handle bad lines
def log_bad_lines(bad_line):
    print(f"Bad line: {bad_line}")
    return None  # Return None to skip the line

df_books = pd.read_csv('books-1.csv', delimiter=pattern, engine='python', encoding='ISO-8859-1', on_bad_lines=log_bad_lines)

In [51]:
len(df_books)

271379

In [56]:
df_books.head()

Unnamed: 0,"""ISBN",Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,"Image-URL-L"""
0,"""0195153448",Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...
1,"""0002005018",Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...
2,"""0060973129",Decision in Normandy,Carlo D'Este,1991,HarperPerennial,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...
3,"""0374157065",Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...
4,"""0393045218",The Mummies of Urumchi,E. J. W. Barber,1999,W. W. Norton &amp; Company,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...


In [57]:
df_books.tail()

Unnamed: 0,"""ISBN",Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,"Image-URL-L"""
271374,"""0440400988",There's a Bat in Bunk Five,Paula Danziger,1988,Random House Childrens Pub (Mm),http://images.amazon.com/images/P/0440400988.0...,http://images.amazon.com/images/P/0440400988.0...,http://images.amazon.com/images/P/0440400988.0...
271375,"""0525447644",From One to One Hundred,Teri Sloat,1991,Dutton Books,http://images.amazon.com/images/P/0525447644.0...,http://images.amazon.com/images/P/0525447644.0...,http://images.amazon.com/images/P/0525447644.0...
271376,"""006008667X",Lily Dale : The True Story of the Town that Ta...,Christine Wicker,2004,HarperSanFrancisco,http://images.amazon.com/images/P/006008667X.0...,http://images.amazon.com/images/P/006008667X.0...,http://images.amazon.com/images/P/006008667X.0...
271377,"""0192126040",Republic (World's Classics),Plato,1996,Oxford University Press,http://images.amazon.com/images/P/0192126040.0...,http://images.amazon.com/images/P/0192126040.0...,http://images.amazon.com/images/P/0192126040.0...
271378,"""0767409752",A Guided Tour of Rene Descartes' Meditations o...,Christopher Biffle,2000,McGraw-Hill Humanities/Social Sciences/Languages,http://images.amazon.com/images/P/0767409752.0...,http://images.amazon.com/images/P/0767409752.0...,http://images.amazon.com/images/P/0767409752.0...


In [94]:
df_books.loc[1,'Image-URL-M']

'http://images.amazon.com/images/P/0002005018.01.MZZZZZZZ.jpg'

In [42]:
# Load a pre-trained model
model = InceptionV3(weights='imagenet', include_top=False, pooling='avg')

In [67]:
# Context manager to suppress TensorFlow output


def extract_features_with_progress(df, img_column):
    # Initialize counter
    counter = 0
    start_time = time.time()

    # Function to extract features and track progress
    def extract_features(img_url):
        nonlocal counter, start_time
        try:
            # Set custom headers
            headers = {
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
            }
    
            # Download the image from the URL with custom headers
            response = requests.get(img_url, headers=headers)
            
            # Check if the response is successful
            if response.status_code != 200:
                print(f"Failed to retrieve image from {img_url}, status code: {response.status_code}")
                return None
    
            # Check if the content type is an image
            if 'image' not in response.headers['Content-Type']:
                print(f"URL {img_url} does not point to an image. Content-Type: {response.headers['Content-Type']}")
                return None

            # Open the image
            img = Image.open(BytesIO(response.content)).convert('RGB').resize((75, 75))  # Resize image to match InceptionV3 input
            # Load and preprocess image
            img_array = np.expand_dims(np.array(img), axis=0)
            img_array = preprocess_input(img_array)


            features = model.predict(img_array,verbose=0)
            
            # Increment the counter
            counter += 1
            
            # Display progress every 1000 rows
            if counter % 100 == 0:
                elapsed_time = time.time() - start_time
                print(f"Processed {counter} rows. Time taken for last 1000 rows: {elapsed_time:.2f} seconds")
                # Reset the timer
                start_time = time.time()
            
            return features.flatten()
        except Exception as e:
            print(f"Error processing image at URL {img_url}: {e}")
            return None

    # Apply the function to the specified image column
    df['image_features'] = df[img_column].apply(extract_features)

    print("Feature extraction complete.")
    return df

In [None]:
# Extract features for each book cover image
a = extract_features("https://images.amazon.com/images/P/0195153448.01.LZZZZZZZ.jpg")

In [82]:
print(a)

[0.1583257  0.06574039 0.10741256 ... 0.14937358 0.2787666  0.5564468 ]


In [84]:
a.shape

(2048,)

In [87]:
# Extract features for each book cover image
b = extract_features("https://images.amazon.com/images/P/0195153448.01.MZZZZZZZ.jpg")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


In [88]:
def cosine_similarity(a, b):
    # Convert tuples to numpy arrays
    a = np.array(a)
    b = np.array(b)
    
    # Calculate the dot product
    dot_product = np.dot(a, b)
    
    # Calculate the magnitudes
    norm_a = np.linalg.norm(a)
    norm_b = np.linalg.norm(b)
    
    # Calculate cosine similarity
    if norm_a == 0 or norm_b == 0:
        return 0.0  # Avoid division by zero
    else:
        return dot_product / (norm_a * norm_b)

In [97]:
print(cosine_similarity(b,c))

0.37555373


In [96]:
c = extract_features("http://images.amazon.com/images/P/0002005018.01.MZZZZZZZ.jpg")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step


In [7]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 9371782593621818880
xla_global_id: -1
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 14232322048
locality {
  bus_id: 1
  links {
  }
}
incarnation: 289145369720946211
physical_device_desc: "device: 0, name: NVIDIA GeForce RTX 3080 Ti Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6"
xla_global_id: 416903419
]


In [16]:
import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    print("GPUs Available:")
    for gpu in gpus:
        print(gpu)
else:
    print("No GPUs Available.")

GPUs Available:
PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')


In [68]:
df_books = extract_features_with_progress(df_books, 'Image-URL-M')

Processed 100 rows. Time taken for last 1000 rows: 39.47 seconds
Processed 200 rows. Time taken for last 1000 rows: 33.85 seconds
Processed 300 rows. Time taken for last 1000 rows: 27.90 seconds
Processed 400 rows. Time taken for last 1000 rows: 75.81 seconds
Processed 500 rows. Time taken for last 1000 rows: 98.84 seconds
Processed 600 rows. Time taken for last 1000 rows: 88.06 seconds
Processed 700 rows. Time taken for last 1000 rows: 94.44 seconds
Processed 800 rows. Time taken for last 1000 rows: 92.00 seconds
Processed 900 rows. Time taken for last 1000 rows: 93.84 seconds
Processed 1000 rows. Time taken for last 1000 rows: 38.70 seconds
Processed 1100 rows. Time taken for last 1000 rows: 31.55 seconds
Processed 1200 rows. Time taken for last 1000 rows: 45.13 seconds
Processed 1300 rows. Time taken for last 1000 rows: 40.80 seconds
Processed 1400 rows. Time taken for last 1000 rows: 45.40 seconds
Processed 1500 rows. Time taken for last 1000 rows: 37.51 seconds
Processed 1600 rows

KeyboardInterrupt: 

In [48]:
original_log_level = tf.get_logger().level
print(original_log_level)

20


In [52]:
 os.environ['TF_CPP_MIN_LOG_LEVEL']

'0'

In [70]:
import subprocess
import tensorflow as tf

# Get GPU details using nvidia-smi through a subprocess call
def get_gpu_memory():
    result = subprocess.run(['nvidia-smi', '--query-gpu=memory.total,memory.free,memory.used', '--format=csv,nounits,noheader'], stdout=subprocess.PIPE)
    gpu_memory = result.stdout.decode('utf-8').strip().split("\n")
    
    for i, mem_info in enumerate(gpu_memory):
        total, free, used = mem_info.split(',')
        print(f"GPU {i}:")
        print(f"  Total Memory: {total} MB")
        print(f"  Free Memory: {free} MB")
        print(f"  Used Memory: {used} MB")

# Ensure TensorFlow can detect GPUs
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    print("TensorFlow detected the following GPUs:")
    for gpu in gpus:
        print(gpu)
    # Use nvidia-smi to get GPU memory details
    get_gpu_memory()
else:
    print("No GPU detected.")


TensorFlow detected the following GPUs:
PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')
GPU 0:
  Total Memory: 16384 MB
  Free Memory:  1616 MB
  Used Memory:  14560 MB


In [74]:
# Adjust this based on GPU memory
BATCH_SIZE = 64

# Pre-trained model (InceptionV3 in this case)
model = tf.keras.applications.InceptionV3(weights='imagenet', include_top=False, input_shape=(75, 75, 3))

def extract_features_with_timing(df, img_column):

    def extract_features_batch(batch_urls):
        batch_images = []
        valid_urls = []


        for img_url in batch_urls:
            try:

                # Set custom headers for request
                headers = {
                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
                }
    
                response = requests.get(img_url, headers=headers)
                if response.status_code != 200:
                    print(f"Failed to retrieve image from {img_url}, status code: {response.status_code}")
                    continue
                if 'image' not in response.headers['Content-Type']:
                    print(f"URL {img_url} does not point to an image. Content-Type: {response.headers['Content-Type']}")
                    continue

                # Open the image and preprocess
                img = Image.open(BytesIO(response.content)).convert('RGB').resize((75, 75))
                img_array = preprocess_input(np.expand_dims(np.array(img), axis=0))
                batch_images.append(img_array)
                valid_urls.append(img_url)
            except Exception as e:
                print(f"Error processing image at URL {img_url}: {e}")
                continue

        # After error came as the first images were not downloaded due to network issue
        if not batch_images:
            return [None] * len(batch_urls), download_times, feature_extraction_times

        batch_images = np.vstack(batch_images)
        
        try:
            # Run prediction for the batch
            features_batch = model.predict(batch_images, verbose=0)
        except Exception as e:
            print(f"Error during batch prediction: {e}")
            return [None] * len(batch_urls)

        # Return features and the timing details
        return [features.flatten() for features in features_batch]

    # Process images in batches and track time
    total_download_time = 0
    total_feature_extraction_time = 0

    for i in range(0, len(df), BATCH_SIZE):
        batch_urls = df[img_column].iloc[i:i + BATCH_SIZE].tolist()
        features, download_times, feature_extraction_times = extract_features_batch(batch_urls)

        # Update total times
        total_download_time += sum(download_times)
        total_feature_extraction_time += sum(feature_extraction_times)

    # Print total time
    print(f"Total download time for all batches: {total_download_time:.2f} seconds")
    print(f"Total feature extraction time for all batches: {total_feature_extraction_time:.2f} seconds")

    return df

In [84]:
df = df_books.loc[0:6399,:]

In [78]:
# Call the function and time the processing
extract_features_with_timing(df, 'Image-URL-M')

Error processing image at URL http://images.amazon.com/images/P/0553492411.01.MZZZZZZZ.jpg: ('Connection aborted.', ConnectionAbortedError(10053, 'An established connection was aborted by the software in your host machine', None, 10053, None))
Error processing image at URL http://images.amazon.com/images/P/0590448315.01.MZZZZZZZ.jpg: ('Connection aborted.', ConnectionAbortedError(10053, 'An established connection was aborted by the software in your host machine', None, 10053, None))
Error processing image at URL http://images.amazon.com/images/P/0553295691.01.MZZZZZZZ.jpg: HTTPConnectionPool(host='images.amazon.com', port=80): Max retries exceeded with url: /images/P/0553295691.01.MZZZZZZZ.jpg (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x000001BB4791D510>: Failed to establish a new connection: [WinError 10065] A socket operation was attempted to an unreachable host'))
Error processing image at URL http://images.amazon.com/images/P/0553562754.01.MZZZZZZZ

Unnamed: 0,"""ISBN",Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,"Image-URL-L"""
0,"""0195153448",Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...
1,"""0002005018",Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...
2,"""0060973129",Decision in Normandy,Carlo D'Este,1991,HarperPerennial,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...
3,"""0374157065",Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...
4,"""0393045218",The Mummies of Urumchi,E. J. W. Barber,1999,W. W. Norton &amp; Company,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...
...,...,...,...,...,...,...,...,...
6395,"""0006479383",There Will Be Wolves,Karleen Bradford,1994,Harpercollins Canada,http://images.amazon.com/images/P/0006479383.0...,http://images.amazon.com/images/P/0006479383.0...,http://images.amazon.com/images/P/0006479383.0...
6396,"""0330375253",Bridget Jones's Diary,Helen Fielding,2001,Picador,http://images.amazon.com/images/P/0330375253.0...,http://images.amazon.com/images/P/0330375253.0...,http://images.amazon.com/images/P/0330375253.0...
6397,"""0373834993",Somebody'S Dad,Judith Arnold,2002,Harlequin,http://images.amazon.com/images/P/0373834993.0...,http://images.amazon.com/images/P/0373834993.0...,http://images.amazon.com/images/P/0373834993.0...
6398,"""055337933X",Fierce Invalids Home from Hot Climates,TOM ROBBINS,2001,Bantam,http://images.amazon.com/images/P/055337933X.0...,http://images.amazon.com/images/P/055337933X.0...,http://images.amazon.com/images/P/055337933X.0...


In [88]:
import concurrent.futures
import requests
from PIL import Image
from io import BytesIO
import numpy as np
import time
import tensorflow as tf
from tensorflow.keras.applications.inception_v3 import preprocess_input

# Batch size for faster processing
BATCH_SIZE = 64

# Pre-trained model (InceptionV3 in this case)
model = tf.keras.applications.InceptionV3(weights='imagenet', include_top=False, input_shape=(75, 75, 3))

# Function to download a single image from a URL
def fetch_image(img_url):
    try:
        # Set custom headers for the request
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }

        response = requests.get(img_url, headers=headers)
        if response.status_code != 200:
            print(f"Failed to retrieve image from {img_url}, status code: {response.status_code}")
            return None

        if 'image' not in response.headers['Content-Type']:
            print(f"URL {img_url} does not point to an image. Content-Type: {response.headers['Content-Type']}")
            return None

        # Open the image and preprocess it
        img = Image.open(BytesIO(response.content)).conv75, 75))ert('RGB').resize((
        return img

    except Exception as e:
        print(f"Error processing image at URL {img_url}: {e}")
        return None

# Function to process batches of URLs
def extract_features_batch(batch_urls):
    start_fetch_time = time.time()  # Start time for fetching images

    # Use ThreadPoolExecutor to parallelize image downloading
    with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
        batch_images = list(executor.map(fetch_image, batch_urls))
    
    # Filter out failed downloads
    batch_images = [img for img in batch_images if img is not None]
    
    # If no images were successfully downloaded, return None
    if not batch_images:
        return [None] * len(batch_urls)

    # Stack images into a single batch for model prediction
    batch_images = np.vstack(batch_images)

    fetch_time = time.time() - start_fetch_time  # Time taken for fetching images
    print(f"Time taken to fetch images: {fetch_time:.2f} seconds")

    start_feature_time = time.time()  # Start time for feature extraction
    
    # Run feature extraction using the pre-trained model
    try:
        features_batch = model.predict(batch_images, verbose=0)
    except Exception as e:
        print(f"Error during batch prediction: {e}")
        return [None] * len(batch_urls)

    feature_time = time.time() - start_feature_time  # Time taken for feature extraction
    print(f"Time taken for feature extraction: {feature_time:.2f} seconds")

    # Return extracted features
    return [features.flatten() for features in features_batch]

# Ensure you are working with a copy of the DataFrame, not a slice
df = df.copy()

# Apply the groupby and feature extraction without chaining
extracted_features = df['Image-URL-M'].groupby(np.arange(len(df)) // BATCH_SIZE).apply(
    lambda x: extract_features_batch(x.tolist())
)

# Explode the result
extracted_features = extracted_features.explode().reset_index(drop=True)

# Assign back to the DataFrame using .loc to avoid the SettingWithCopyWarning
df.loc[:, 'image_features'] = extracted_features


Time taken to fetch images: 0.34 seconds
Time taken for feature extraction: 1.19 seconds
Time taken to fetch images: 0.22 seconds
Time taken for feature extraction: 0.08 seconds
Time taken to fetch images: 0.22 seconds
Time taken for feature extraction: 0.07 seconds
Time taken to fetch images: 0.21 seconds
Time taken for feature extraction: 0.06 seconds
Time taken to fetch images: 0.58 seconds
Time taken for feature extraction: 0.09 seconds
Time taken to fetch images: 0.45 seconds
Time taken for feature extraction: 0.13 seconds
Time taken to fetch images: 0.46 seconds
Time taken for feature extraction: 0.13 seconds
Time taken to fetch images: 0.64 seconds
Time taken for feature extraction: 0.12 seconds
Time taken to fetch images: 0.47 seconds
Time taken for feature extraction: 0.13 seconds
Time taken to fetch images: 0.45 seconds
Time taken for feature extraction: 0.15 seconds
Time taken to fetch images: 0.46 seconds
Time taken for feature extraction: 0.13 seconds
Time taken to fetch i

In [86]:
df.head()

Unnamed: 0,"""ISBN",Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,"Image-URL-L""",image_features
0,"""0195153448",Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,"[0.0, 0.0, 3.263706, 0.0, 0.0, 0.0, 0.0, 0.173..."
1,"""0002005018",Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,"[1.743868, 1.4206514, 1.855142, 0.3013416, 0.0..."
2,"""0060973129",Decision in Normandy,Carlo D'Este,1991,HarperPerennial,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...,"[0.0, 0.0, 0.0, 0.0, 5.3369713, 2.1563406, 0.0..."
3,"""0374157065",Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...,"[0.0, 0.3148033, 0.0, 0.0, 0.3396586, 0.0, 0.0..."
4,"""0393045218",The Mummies of Urumchi,E. J. W. Barber,1999,W. W. Norton &amp; Company,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...,"[3.3515277, 1.4376166, 0.0, 0.0, 4.3395886, 0...."


In [89]:
df[df["Image-URL-M"]=="http://images.amazon.com/images/P/0440419468.01.MZZZZZZZ.jpg"]

Unnamed: 0,"""ISBN",Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,"Image-URL-L""",image_features
3724,"""0440419468",Holes,LOUIS SACHAR,2003,Yearling,http://images.amazon.com/images/P/0440419468.0...,http://images.amazon.com/images/P/0440419468.0...,http://images.amazon.com/images/P/0440419468.0...,"[0.0, 0.0, 0.45375916, 0.0, 2.6519742, 0.0, 1...."


In [90]:
df.tail()

Unnamed: 0,"""ISBN",Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,"Image-URL-L""",image_features
6395,"""0006479383",There Will Be Wolves,Karleen Bradford,1994,Harpercollins Canada,http://images.amazon.com/images/P/0006479383.0...,http://images.amazon.com/images/P/0006479383.0...,http://images.amazon.com/images/P/0006479383.0...,"[0.33765462, 4.2126675, 1.971797, 2.9752412, 0..."
6396,"""0330375253",Bridget Jones's Diary,Helen Fielding,2001,Picador,http://images.amazon.com/images/P/0330375253.0...,http://images.amazon.com/images/P/0330375253.0...,http://images.amazon.com/images/P/0330375253.0...,"[0.25537303, 0.0, 0.7917996, 0.0, 1.3358392, 0..."
6397,"""0373834993",Somebody'S Dad,Judith Arnold,2002,Harlequin,http://images.amazon.com/images/P/0373834993.0...,http://images.amazon.com/images/P/0373834993.0...,http://images.amazon.com/images/P/0373834993.0...,"[0.0, 0.7393055, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
6398,"""055337933X",Fierce Invalids Home from Hot Climates,TOM ROBBINS,2001,Bantam,http://images.amazon.com/images/P/055337933X.0...,http://images.amazon.com/images/P/055337933X.0...,http://images.amazon.com/images/P/055337933X.0...,"[0.0, 0.0, 0.0, 0.0, 5.3369713, 2.1563406, 0.0..."
6399,"""0002245663",Forms of Devotion: Stories and Pictures,Diane Schoemperlen,1998,Harpercollins Canada,http://images.amazon.com/images/P/0002245663.0...,http://images.amazon.com/images/P/0002245663.0...,http://images.amazon.com/images/P/0002245663.0...,
