In [1]:
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import preprocess_input
import numpy as np

# Load pre-trained ResNet50 model
model = ResNet50(weights='imagenet')

def preprocess_image(image_path):
    img = image.load_img(image_path, target_size=(224, 224))  # Resize image to fit model input
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    return x

In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import preprocess_input

# Load CSV files
mapped_df = pd.read_csv('dataset/mapped_data.csv').head(1000)
text_mapped_df = pd.read_csv('dataset/test_mapped_data.csv').head(1000)

# Define a function to preprocess images and extract features using ResNet50
def extract_features(img_path, model):
    try:
        img = image.load_img(img_path, target_size=(224, 224))
        img_array = image.img_to_array(img)
        img_array = np.expand_dims(img_array, axis=0)
        img_array = preprocess_input(img_array)
        features = model.predict(img_array)
        return features.flatten()
    except Exception as e:
        print(f"Error processing image {img_path}: {e}")
        return np.zeros(model.output_shape[1])

# Load pre-trained ResNet50 model for feature extraction
resnet_model = ResNet50(weights='imagenet', include_top=False, pooling='avg')

# Extract features from mapped_df
features = []
labels = []
for index, row in mapped_df.iterrows():
    img_path = row['image_path']
    feature = extract_features(img_path, resnet_model)
    features.append(feature)
    labels.append(row['entity_value'])

X = np.array(features)
y = np.array(labels)

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Evaluate the model
y_pred = rf_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

# Predict entity values for the test_mapped.csv file
test_features = []
for index, row in text_mapped_df.iterrows():
    img_path = row['image_path']
    feature = extract_features(img_path, resnet_model)
    test_features.append(feature)

X_test_mapped = np.array(test_features)
predicted_labels_encoded = rf_model.predict(X_test_mapped)
predicted_labels = label_encoder.inverse_transform(predicted_labels_encoded)

# Prepare output DataFrame with row index and predictions
output_df = pd.DataFrame({
    'row_index': text_mapped_df.index,
    'predicted_entity_value': predicted_labels
})

# Save the predictions to a CSV file
output_df.to_csv('predicted_text_mapped.csv', index=False)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 510ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4

In [3]:
import pandas as pd
import re
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
import cv2
import numpy as np

# Load your mapped_data.csv
df = pd.read_csv('dataset/mapped_data.csv')

# Limit to the first 10,000 rows
df = df.head(10000)

# Helper function to split the entity_value into numerical part and unit
def split_entity_value(entity_value):
    match = re.match(r"([0-9.]+)\s*(\w+)", entity_value)
    if match:
        return float(match.group(1)), match.group(2)
    return None, None

# Apply the function to the entity_value column
df['entity_number'], df['entity_unit'] = zip(*df['entity_value'].apply(split_entity_value))

# Encode the entity_unit column
unit_encoder = LabelEncoder()
df['unit_encoded'] = unit_encoder.fit_transform(df['entity_unit'].astype(str))

# Extract image paths and encoded entity values
image_paths = df['image_path'].values
entity_numbers = df['entity_number'].values
unit_encoded = df['unit_encoded'].values

# Split the entity_name to handle different predictions (like weight and volume)
entity_names = df['entity_name'].values

# Check the encoding
print(df[['entity_unit', 'unit_encoded']].drop_duplicates())


     entity_unit  unit_encoded
0           gram            11
1            cup             5
4      milligram            19
10      kilogram            14
12         ounce            22
21        gallon             9
25          volt            28
36          watt            29
38         pound            25
63    millilitre            20
81         cubic             4
139         None             0
157        fluid             8
196          ton            27
204    decilitre             6
423        litre            16
459    microgram            18
536   centimetre             3
693        quart            26
798   horsepower            12
900     kilowatt            15
1674    gigabyte            10
1739  millimetre            21
1863           e             7
2574        pint            24
3067  centilitre             2
3996     candela             1
5981        inch            13
6002      person            23
8483       metre            17


In [5]:
import pytesseract
from pytesseract import Output

# Ensure that Tesseract is installed and accessible from your PATH
# pytesseract.pytesseract.tesseract_cmd = r'<path_to_tesseract_executable>'

# Helper function to extract text from an image
def extract_text_from_image(image_path):
    # Load image using OpenCV
    image = cv2.imread(image_path)
    
    # Convert the image to gray scale for better OCR performance
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Use Tesseract to do OCR on the image
    text = pytesseract.image_to_string(gray_image, output_type=Output.STRING)
    
    return text.strip()

# Extract text from each image
df['extracted_text'] = df['image_path'].apply(extract_text_from_image)

# Preview the extracted text
print(df[['image_path', 'extracted_text']].head())

# Optionally: save the updated DataFrame with extracted text
df.to_csv('dataset/mapped_data_with_text.csv', index=False)


                      image_path          extracted_text
0  output_images/61I9XdN6OFL.jpg                        
1  output_images/71gSRbyXmoL.jpg                        
2  output_images/61BZ4zrjZXL.jpg                        
3  output_images/612mrlqiI4L.jpg                        
4  output_images/617Tl40LOXL.jpg  PSYLLIUM\nHUSK\n\n1400


In [9]:
import tensorflow as tf
import pandas as pd

# Function to load and preprocess images
def preprocess_image(image_path, number, unit):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [224, 224])  # Resize to match your model input size
    image = image / 255.0  # Normalize image to [0, 1]
    return image, (number, unit)

# TensorFlow dataset function
def image_dataset(image_paths, numbers, units, batch_size=32):
    dataset = tf.data.Dataset.from_tensor_slices((image_paths, numbers, units))
    dataset = dataset.map(lambda x, y, z: preprocess_image(x, y, z))
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)  # Optimize dataset performance
    return dataset

# Create the dataset
image_paths = df['image_path'].values
numbers = df['entity_number'].values
units = df['unit_encoded'].values

train_data = image_dataset(image_paths, numbers, units)


In [13]:
num_classes = len(unit_encoder.classes_)
one_hot_units = to_categorical(df['unit_encoded'], num_classes=num_classes)

def image_dataset(image_paths, numbers, one_hot_units, batch_size=32):
    dataset = tf.data.Dataset.from_tensor_slices((image_paths, numbers, one_hot_units))
    dataset = dataset.map(lambda x, y, z: preprocess_image(x, y, z))
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)  # Optimize dataset performance
    return dataset


In [15]:
import tensorflow as tf
import pandas as pd
import re
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

# Load and preprocess data
df = pd.read_csv('dataset/mapped_data.csv')
df = df.head(10000)

def split_entity_value(entity_value):
    match = re.match(r"([0-9.]+)\s*(\w+)", entity_value)
    if match:
        return float(match.group(1)), match.group(2)
    return None, None

df['entity_number'], df['entity_unit'] = zip(*df['entity_value'].apply(split_entity_value))

unit_encoder = LabelEncoder()
df['unit_encoded'] = unit_encoder.fit_transform(df['entity_unit'].astype(str))

num_classes = len(unit_encoder.classes_)
one_hot_units = to_categorical(df['unit_encoded'], num_classes=num_classes)

# TensorFlow dataset function
def preprocess_image(image_path, number, one_hot_unit):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [224, 224])  # Resize to match your model input size
    image = image / 255.0  # Normalize image to [0, 1]
    return image, (number, one_hot_unit)

def image_dataset(image_paths, numbers, one_hot_units, batch_size=32):
    dataset = tf.data.Dataset.from_tensor_slices((image_paths, numbers, one_hot_units))
    dataset = dataset.map(lambda x, y, z: preprocess_image(x, y, z))
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)  # Optimize dataset performance
    return dataset

# Create the dataset
image_paths = df['image_path'].values
numbers = df['entity_number'].values
one_hot_units = one_hot_units  # Use the one-hot encoded units

train_data = image_dataset(image_paths, numbers, one_hot_units)


In [17]:
from tensorflow.keras.applications import ResNet50
from tensorflow.keras import layers, models
import tensorflow as tf

# Load ResNet50 for feature extraction (you can also use any other model)
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False  # Freeze the base model layers

# Add custom layers on top
x = layers.Flatten()(base_model.output)
x = layers.Dense(1024, activation='relu')(x)

# Output 1: Predict numerical value
number_output = layers.Dense(1, name='number_output')(x)

# Output 2: Predict unit (classification)
num_classes = len(unit_encoder.classes_)  # Number of unique units from previous encoding
unit_output = layers.Dense(num_classes, activation='softmax', name='unit_output')(x)

# Create the model
model = models.Model(inputs=base_model.input, outputs=[number_output, unit_output])

# Compile the model with two losses
model.compile(optimizer='adam', 
              loss={'number_output': 'mse', 'unit_output': 'categorical_crossentropy'},
              metrics={'number_output': 'mae', 'unit_output': 'accuracy'})

# Summary of the model
model.summary()


In [19]:
def preprocess_image(image_path, number, one_hot_unit):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [224, 224])  # Resize to match your model input size
    image = image / 255.0  # Normalize image to [0, 1]
    return image, (number, one_hot_unit)

def image_dataset(image_paths, numbers, one_hot_units, batch_size=32, repeat=False):
    dataset = tf.data.Dataset.from_tensor_slices((image_paths, numbers, one_hot_units))
    dataset = dataset.map(lambda x, y, z: preprocess_image(x, y, z))
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)  # Optimize dataset performance
    if repeat:
        dataset = dataset.repeat()  # Ensure the dataset can be reused
    return dataset

# Split the dataset into training and validation manually
validation_size = int(0.2 * len(image_paths))  # 20% of data for validation
train_size = len(image_paths) - validation_size

# Note: Ensure your dataset can be repeated for full epochs
train_dataset = image_dataset(image_paths[:train_size], numbers[:train_size], one_hot_units[:train_size], repeat=True)
validation_dataset = image_dataset(image_paths[train_size:], numbers[train_size:], one_hot_units[train_size:])

# Train the model
history = model.fit(
    train_dataset, 
    validation_data=validation_dataset,
    epochs=1,  # Adjust epochs as needed
    steps_per_epoch=train_size // 32,  # Adjust batch size if necessary
    validation_steps=validation_size // 32,  # Adjust batch size if necessary
    verbose=1
)

# Save the trained model
model.save('multi_output_model.h5')


[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m414s[0m 2s/step - loss: nan - number_output_loss: nan - number_output_mae: nan - unit_output_accuracy: 0.0410 - unit_output_loss: nan - val_loss: nan - val_number_output_loss: nan - val_number_output_mae: nan - val_unit_output_accuracy: 0.0302 - val_unit_output_loss: nan




In [None]:
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import preprocess_input

# Load your test dataset
test_df = pd.read_csv('dataset/test_mapped_data.csv').head(10000)  # Adjust the path to your test CSV file

# Define function to preprocess image
def preprocess_image(img_path):
    img = image.load_img(img_path, target_size=(224, 224))  # Load image and resize to target size
    img_array = image.img_to_array(img)  # Convert image to array
    img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension
    img_array = preprocess_input(img_array)  # Preprocess image as required by ResNet50
    return img_array

# Initialize lists for predictions
number_predictions = []
unit_predictions = []

# Loop through test dataset and make predictions
for index, row in test_df.iterrows():
    img_path = row['image_path']  # Adjust column name as necessary
    img_array = preprocess_image(img_path)
    
    # Predict
    preds = model.predict(img_array)
    number_pred = preds[0][0][0]  # Extract numerical value prediction
    unit_pred = np.argmax(preds[1], axis=-1)[0]  # Extract unit prediction (class index)
    
    number_predictions.append(number_pred)
    unit_predictions.append(unit_pred)

# Optionally map unit indices to unit names if needed
unit_names = list(unit_encoder.classes_)  # Get unit names from encoder
unit_predictions = [unit_names[idx] for idx in unit_predictions]

# Add predictions to test dataframe
test_df['predicted_number'] = number_predictions
test_df['predicted_unit'] = unit_predictions

# Save or display the results
test_df.to_csv('dataset/predictions.csv', index=False)  # Adjust path as necessary
print(test_df.head())


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 976ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6

In [95]:
# Load test data
test_df = pd.read_csv('dataset/test.csv')

# Print the column names to check
print(test_df.columns)


Index(['index', 'image_link', 'group_id', 'entity_name'], dtype='object')


In [None]:
# Add predictions to the test DataFrame
test_df['prediction'] = predictions

# Save the predictions
test_df[['index', 'prediction']].to_csv('dataset/predictions.csv', index=False)