<a href="https://colab.research.google.com/github/Dashcloud-ML/Amazon_ml_challenge/blob/main/amazon_ml.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import requests
from PIL import Image
from io import BytesIO
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical



In [None]:
# Step 1: Load Data
train_data = pd.read_csv('/content/dataset/train.csv')


In [None]:
# Step 2: Function to Download and Preprocess Images
def download_and_preprocess_image(image_url, target_size=(128, 128)):
    try:
        response = requests.get(image_url, timeout=10)
        response.raise_for_status()
        img = Image.open(BytesIO(response.content))
        img = img.resize(target_size)
        return np.array(img)
    except:
        print(f"Failed to download image: {image_url}")
        return np.zeros((128, 128, 3))  # Return a blank image if download fails


In [None]:
# Step 3: Download and Preprocess All Images
image_features = []
for image_url in train_data['image_link']:
    img = download_and_preprocess_image(image_url)
    image_features.append(img)

image_features = np.array(image_features)


In [None]:
# Step 4: Prepare Labels (Numeric Value and Unit)
values = train_data['value'].values  # Regression target
units = pd.get_dummies(train_data['unit']).values  # One-hot encoded units

In [None]:
# Step 5: Split Data into Training and Validation Sets
X_train, X_val, y_train, y_val = train_test_split(image_features, values, test_size=0.2, random_state=42)
unit_train, unit_val = train_test_split(units, test_size=0.2, random_state=42)


In [None]:
# Step 6: Build the CNN Model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dense(1, activation='linear')  # For predicting numeric values
])

In [None]:
# Step 7: Compile the Model
model.compile(optimizer='adam', loss='mse', metrics=['mae'])


In [None]:
# Step 8: Train the Model (Regression on values)
model.fit(X_train, y_train, epochs=10, validation_data=(X_val, y_val))



In [None]:
# Step 9: Predict on Test Data
test_features = []
for image_url in test_data['image_link']:
    img = download_and_preprocess_image(image_url)
    test_features.append(img)

test_features = np.array(test_features)
predictions = model.predict(test_features)


In [None]:
# Step 10: Output Predictions in Desired Format
# Format the output as required by the challenge