### IMPORTS

In [15]:
import pandas as pd
import numpy as np
import re
from fuzzywuzzy import fuzz, process

### DATA

In [2]:
wl_cams = pd.read_json('../data/WELAB/welab_cameras.json')
wl_lens = pd.read_json('../data/WELAB/welab_lenses.json')
wl_audio = pd.read_json('../data/WELAB/welab_audio.json')
wl_lights = pd.read_json('../data/WELAB/welab_lights.json')

In [3]:
rc_cams = pd.read_json('../data/RC SERVICE/rcservice_cameras.json')
rc_lenses = pd.read_json('../data/RC SERVICE/rcservice_lenses.json')

In [4]:
ov_cams = pd.read_json('../data/OVIDE/ovide_cameras.json')
ov_lights = pd.read_json('../data/OVIDE/ovide_lights.json')
ov_lenses = pd.read_json('../data/OVIDE/ovide_lenses.json')
ov_audio = pd.read_json('../data/OVIDE/ovide_audio.json')

In [5]:
rental_places = pd.read_json('../data/rental_places.json') 

### TRANSFORMATION

In [6]:
def transform_data(data_frame, rental_places_df):    #this is a function to transform all data in each dataframe at once.
    
    # first I want to convert all column names to lowercase
    data_frame.columns = data_frame.columns.str.lower()

    # and also I want to rename the price a day column.
    data_frame.rename(columns={'price a day': 'price_a_day'}, inplace=True)
    data_frame.rename(columns={'rental': 'rental_place_name'}, inplace=True)

    # I also would like to convert all values in the DataFrame to lowercase
    data_frame = data_frame.map(lambda x: x.lower() if isinstance(x, str) else x)

    # And I need to standardise the rental_place names and IDs
    rental_id_mapping = dict(zip(rental_places_df['rental_place_name'].str.lower(), rental_places_df['rental_place_id']))

    # Update the 'rental_place' column in the DataFrame with rental IDs
    data_frame['rental_place_id'] = data_frame['rental_place_name'].str.lower().map(rental_id_mapping)

    # and lastly drop the original 'rental_place'
    data_frame.drop(columns=['rental_place_name'], inplace=True) 
    
    return data_frame

In [7]:
wl_cams_2 = transform_data(wl_cams, rental_places)
wl_lens_2 = transform_data(wl_lens, rental_places)
wl_audio_2 = transform_data(wl_audio, rental_places)
wl_lights_2 = transform_data(wl_lights, rental_places)

In [8]:
rc_cams_2 = transform_data(rc_cams, rental_places)
rc_lenses_2 = transform_data(rc_lenses, rental_places)

In [9]:
ov_audio_2 = transform_data(ov_audio, rental_places)
ov_cams_2 = transform_data(ov_cams, rental_places)
ov_lenses_2 = transform_data(ov_lenses, rental_places)
ov_lights_2 = transform_data(ov_lights, rental_places)

In [10]:
ov_cams_2['name'] = ov_cams_2.name.str.replace('cámara', '').str.replace('mini','')

In [52]:
def perform_fuzzy_matching(df_list):
    product_id = 1
    id_mapping = {}

    for idx, df in enumerate(df_list):
        df['product_id'] = df.index + 1  # Creating a product_id column starting from 1

        df['tokens'] = df['name'].str.split()
        df['matched_id'] = 0  # Create a new column for the matched product ID

        for _, row in df.iterrows():
            found = False
            token_str = ' '.join(row['tokens'])  # Convert list of tokens to string

            for key, value in id_mapping.items():
                # Compare tokenized product names for similarity
                score = fuzz.ratio(token_str, key)

                # If similarity score is above a threshold (e.g., 80), consider them as potential matches
                if score > 98:
                    df.at[_, 'matched_id'] = value
                    found = True
                    break

            if not found:
                id_mapping[token_str] = product_id
                df.at[_, 'matched_id'] = product_id
                product_id += 1

    return df_list

# Rest of your code for defining DataFrames and calling the function...



In [53]:
cams_list = [wl_cams_2, rc_cams_2, ov_cams_2]
lens_list = [wl_lens_2, rc_lenses_2, ov_lenses_2]
audio_list = [wl_audio_2, ov_audio_2]
lights_list = [wl_lights_2, ov_lights_2]

In [54]:
cams = perform_fuzzy_matching(cams_list)

In [55]:
wl_cams_3, rc_cams_3, ov_cams_3 = cams

In [56]:
ov_cams_3.head()

Unnamed: 0,category,type,brand,name,price_a_day,link,rental_place_id,product_id,tokens,matched_id
0,cameras,cine digital,arri,arri alexa 35 4k,pedir presupuesto,https://www.ovide.com/alquiler-es/camara-arri-...,3,1,"[arri, alexa, 35, 4k]",85
1,cameras,cine digital,arri,arri alexa lf,pedir presupuesto,https://www.ovide.com/alquiler-es/camara-arri-...,3,2,"[arri, alexa, lf]",49
2,cameras,cine digital,arri,arri alexa,pedir presupuesto,https://www.ovide.com/alquiler-es/arri-alexa-m...,3,3,"[arri, alexa]",86
3,cameras,cine digital,arri,arri alexa lf,pedir presupuesto,https://www.ovide.com/alquiler-es/camara-arri-...,3,4,"[arri, alexa, lf]",49
4,cameras,cine digital,arri,arri alexa sxt plus 4:3,pedir presupuesto,https://www.ovide.com/alquiler-es/arri-alexa-s...,3,5,"[arri, alexa, sxt, plus, 4:3]",87


In [57]:
rc_cams_3.head()

Unnamed: 0,category,type,brand,name,price_a_day,link,rental_place_id,product_id,tokens,matched_id
0,cameras,analogica,arri,packs arricam lt,pedir presupuesto,http://www.rcservice.es/es/camaras-35mm-16mm/2...,2,1,"[packs, arricam, lt]",62
1,cameras,analogica,movicam,packs movicam compact 35,pedir presupuesto,http://www.rcservice.es/es/camaras-35mm-16mm/2...,2,2,"[packs, movicam, compact, 35]",63
2,cameras,analogica,arri,packs arri 435,pedir presupuesto,http://www.rcservice.es/es/camaras-35mm-16mm/2...,2,3,"[packs, arri, 435]",64
3,cameras,analogica,arri,packs arri sr3,pedir presupuesto,http://www.rcservice.es/es/camaras-35mm-16mm/2...,2,4,"[packs, arri, sr3]",65
4,cameras,digital,arri,arri alexa 35,pedir presupuesto,http://www.rcservice.es/es/alquiler-camaras-pe...,2,5,"[arri, alexa, 35]",47


In [58]:
wl_cams_3.head()

Unnamed: 0,category,brand,name,price_a_day,link,rental_place_id,product_id,tokens,matched_id
0,cameras,phantom,phantom flex 4k 128gb,pedir presupuesto,https://welabplus.com/shop8/camaras/phantom/ph...,1,1,"[phantom, flex, 4k, 128gb]",1
1,cameras,gopro,gopro hero 11 black edition,85,https://welabplus.com/shop8/camaras/gopro/gopr...,1,2,"[gopro, hero, 11, black, edition]",2
2,cameras,gopro,gopro max 360,70,https://welabplus.com/shop8/camaras/gopro/gopr...,1,3,"[gopro, max, 360]",3
3,cameras,gopro,gopro hero 10 black edition,75,https://welabplus.com/shop8/camaras/gopro/gopr...,1,4,"[gopro, hero, 10, black, edition]",4
4,cameras,gopro,gopro hero 9 black edition,69,https://welabplus.com/shop8/camaras/gopro/gopr...,1,5,"[gopro, hero, 9, black, edition]",5


In [59]:
lens = perform_fuzzy_matching(lens_list)
audio = perform_fuzzy_matching(audio_list)
lights = perform_fuzzy_matching(lights_list)

In [60]:
wl_lens_3, rc_lenses_3, ov_lenses_3 = lens
wl_audio_3, ov_audio_3 = audio
wl_lights_3, ov_lights_3 = lights