In [41]:
import pandas as pd

train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

In [50]:
material_mapping = {
    # Plastic
    'plastic': 'plastic',
    'pvc': 'plastic',
    'silicon': 'plastic',
    'plastic and metal' : 'plastic',
    'plastic metals': 'plastic',  # Assuming primary material is plastic
    'faux leather': 'plastic',
    'leather / some plastic?' : 'plastic',
    
    # Glass
    'glass': 'glass',
    'glass and metal?': 'glass',  # Assuming primary material is glass
    
    # Ceramic
    'ceramic': 'ceramic',
    'clay': 'ceramic',
    'bricks': 'ceramic',
    'bricks (maybe it goes into ceramic)' : 'ceramic',
    
    # Metal
    'metal': 'metal',
    'aliminium': 'metal',
    'alluminium': 'metal',
    'steel': 'metal',
    'tin': 'metal',
    'gold': 'metal',
    'silver': 'metal',
    'can' : 'metal',
    'metal and plastic?': 'metal',  # Assuming primary material is metal
    'metals / plastic?' : 'metal',
    
    # Wood
    'wood': 'wood',
    'wood and metals?': 'wood',  # Assuming primary material is wood
    
    # Paper
    'paper': 'paper',
    'cardboard': 'paper',
    'sticky paper?': 'paper',
    'cellulose': 'paper',
    
    # Fabric
    'fabric': 'fabric',
    'cotton': 'fabric',
    'silk': 'fabric',
    'wool': 'fabric',
    'synthetic fabric': 'fabric',
    'polyester': 'fabric',
    'nylon': 'fabric',
    'leather': 'fabric',
    'cowhide?': 'fabric',
    'rubber fabric composite': 'fabric',
    
    # Food
    'food': 'food',
    'food / for shaker its glass and metal' : 'food',
    'food / for shaker its glass and metal?' : 'food',
    
    # Unknown
    'unknown': 'unknown',
    'not sure': 'unknown',
    'na': 'unknown',
    
    # Other
    'plant': 'other',
    'plant natural': 'other',
    'plant organic': 'other',
    'plant?': 'other',
    'real plant. natural': 'other',
    'foliage': 'other',
    'straw': 'other',
    'composite': 'other',
    'rubber': 'other',
    'latex': 'other',
    'foam': 'other',
    'liquid': 'other',
    'water': 'other',
    'componets eletronic': 'other',
    'wax': 'other',
    'pearl': 'other',
    'pile': 'other',
    'tape': 'other',
    'organic material': 'other',
    'stone': 'other'
}

import re
def remove_extra_whitespace(text):
    """
    Remove extra whitespace from the middle of the string.
    
    Parameters:
    -----------
    text : str
        Input string with potential extra whitespace
    
    Returns:
    --------
    str
        String with extra whitespace removed
    """
    return re.sub(r'\s+', ' ', text).strip()

def count_most_common_material(train_df):
    object_material_count = {}
    for _, row in train_df.iterrows():
        o, y = row['object'], row['label']
        y = material_mapping[remove_extra_whitespace(y.strip())]
        if o in object_material_count:
            if y in object_material_count[o]:
                object_material_count[o][y] += 1
            else:
                object_material_count[o][y] = 1
        else:
            object_material_count[o] = {}
            object_material_count[o][y] = 1

    most_common = {object_name: max(counts) for object_name, counts in object_material_count.items()}
    return most_common

most_common = count_most_common_material(train_df)

In [53]:
correct_test = 0
total_test = test_df.shape[0]

for _, row in test_df.iterrows():
    y = material_mapping[remove_extra_whitespace(row['label'].strip())]
    if most_common[row['object']] == y:
        correct_test += 1

test_accuracy = 100 * correct_test / total_test
print(f'Test Accuracy: {test_accuracy:.2f}%')

Test Accuracy: 25.74%
