In [None]:
## Install Dependencies
!pip install faiss-cpu tensorflow opencv-python pandas tqdm requests

In [None]:
## Imports & Setup
import os
import pandas as pd
import numpy as np
import faiss
import requests
from tqdm import tqdm
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tensorflow.keras.preprocessing import image

In [None]:
## Upload your `501-Bottle-Dataset.csv`
from google.colab import files
uploaded = files.upload()  # click “Choose Files” and pick your CSV

In [None]:
## Load the Dataset
csv_path = next(iter(uploaded.keys()))
bottle_df = pd.read_csv(csv_path)
bottle_df.head()

In [None]:
## Download Bottle Images (robust to missing URLs)
import pandas as pd
import os
import requests
from tqdm import tqdm

# Adjust these to your actual column names:
URL_COL = 'image_url'
ID_COL  = 'id'

image_dir = 'bottle_images'
os.makedirs(image_dir, exist_ok=True)

for _, row in tqdm(bottle_df.iterrows(), total=len(bottle_df)):
    url = row.get(URL_COL)
    bid = row.get(ID_COL)

    # Skip if URL or ID is missing
    if pd.isna(url) or pd.isna(bid):
        continue

    # Make sure it's a string
    url = str(url).strip()
    if not url.lower().startswith(('http://', 'https://')):
        continue

    # Build a safe filename
    ext = os.path.splitext(url)[1].split('?')[0] or '.jpg'
    filename = f"{int(bid)}{ext}"
    out_path = os.path.join(image_dir, filename)

    # Download if not already present
    if not os.path.exists(out_path):
        try:
            r = requests.get(url, stream=True, timeout=5)
            r.raise_for_status()
            with open(out_path, 'wb') as f:
                for chunk in r.iter_content(1024):
                    f.write(chunk)
        except Exception as e:
            print(f"⚠️ Failed to download {url}: {e}")

In [None]:
## Feature Extraction Function
model = MobileNetV2(weights='imagenet', include_top=False, pooling='avg')

def extract_features(img_path):
    img = image.load_img(img_path, target_size=(224, 224))
    x = image.img_to_array(img)
    x = preprocess_input(np.expand_dims(x, axis=0))
    feats = model.predict(x)
    return feats[0].astype('float32')


In [None]:
## 🧬 Build FAISS Index

# Extract all features
features = []
ids = []

for _, row in tqdm(bottle_df.iterrows(), total=len(bottle_df)):
    bid = row['id']
    # find downloaded file
    matches = [f for f in os.listdir(image_dir) if f.startswith(str(bid))]
    if not matches:
        continue
    feats = extract_features(os.path.join(image_dir, matches[0]))
    features.append(feats)
    ids.append(bid)

features = np.stack(features)

# Create and populate FAISS index
d = features.shape[1]
index = faiss.IndexFlatL2(d)
index.add(features)
print(f"Indexed {index.ntotal} bottles")


In [None]:
## Evaluate Overall Accuracy over the Entire Dataset
import os
import pandas as pd

ID_COL = 'id'

# Settings
top_k = 5
results = []
correct_top1 = 0
correct_topk = 0
total = 0

# Loop through every downloaded image
for fname in os.listdir(image_dir):
    # Only consider files named like "<id>.<ext>"
    try:
        true_id = int(os.path.splitext(fname)[0])
    except ValueError:
        continue

    img_path = os.path.join(image_dir, fname)
    # Extract features & search
    qf = extract_features(img_path).reshape(1, -1)
    D, I = index.search(qf, top_k)
    preds = [ids[i] for i in I[0]]

    hit1 = (preds[0] == true_id)
    hitk = (true_id in preds)

    total += 1
    correct_top1 += int(hit1)
    correct_topk += int(hitk)

    # Lookup metadata (e.g. Name, Price) if present
    meta = bottle_df[bottle_df[ID_COL] == true_id].iloc[0].to_dict()
    results.append({
        'filename': fname,
        'true_id': true_id,
        'pred_top1': preds[0],
        'hit_top1': hit1,
        f'preds_top{top_k}': preds,
        f'hit_top{top_k}': hitk,
        **{k: meta[k] for k in meta if k not in (ID_COL,)}
    })

# Compute accuracies
top1_acc = correct_top1 / total if total else 0
topk_acc = correct_topk / total if total else 0

print(f"Evaluated {total} images")
print(f"Top-1 Accuracy: {correct_top1}/{total} = {top1_acc:.2%}")
print(f"Top-{top_k} Accuracy: {correct_topk}/{total} = {topk_acc:.2%}")

# Show detailed per-image results
results_df = pd.DataFrame(results)
results_df.head(10)  # show the first 10 rows; adjust as desired


In [None]:
## Accuracy Visualization and Report
import matplotlib.pyplot as plt
import pandas as pd

# Prepare metrics (these variables should already be defined in your notebook)
#   - top1_acc, topk_acc: your computed accuracy floats
#   - top_k: the K you used for Top-K
#   - results_df: DataFrame with per-image results including 'hit_top1'
metrics = {
    'Top-1 Accuracy': top1_acc,
    f'Top-{top_k} Accuracy': topk_acc
}

# 1) Bar chart of overall accuracies
plt.figure()
plt.bar(metrics.keys(), metrics.values())
plt.ylim(0, 1)
plt.ylabel('Accuracy')
plt.title('Overall Identification Accuracy')
plt.show()

# 2) Hit vs Miss counts for Top-1
hit_count = results_df['hit_top1'].sum()
miss_count = len(results_df) - hit_count

plt.figure()
plt.bar(['Hit Top-1', 'Miss Top-1'], [hit_count, miss_count])
plt.ylabel('Number of Images')
plt.title('Top-1 Hit vs. Miss Counts')
plt.show()

# 3) Summary report as a DataFrame
report_df = pd.DataFrame({
    'Metric': list(metrics.keys()),
    'Accuracy': list(metrics.values())
})

# Display the report
report_df


In [None]:
## Try it on a New Image
from google.colab import files

## Bottle Identification Function
def identify_bottle(query_path, top_k=5):
    qf = extract_features(query_path).reshape(1, -1)
    D, I = index.search(qf, top_k)
    results = []
    for dist, idx in zip(D[0], I[0]):
        bid = ids[idx]
        info = bottle_df[bottle_df['id'] == bid].iloc[0].to_dict()
        info['confidence'] = float(1.0 / (1.0 + dist))
        results.append(info)
    return pd.DataFrame(results)


qry = files.upload()
query_path = next(iter(qry.keys()))
res_df = identify_bottle(query_path, top_k=5)
res_df
