In [1]:
import pandas as pd
import re

In [6]:

# Load the CSV files
eshop_df = pd.read_csv('HaComp_CPU_List.csv')  # e-shop data
cpu_perf_df = pd.read_csv('CPU_Performance_list.csv')  # CPU performance list

# Function to clean the product name for matching
def clean_product_name(product_name):
    if not isinstance(product_name, str):  # Check if the product name is a string
        return ''  # If it's not a string (e.g., NaN or blank), return an empty string
    # Remove the 'CPU ' prefix and any notes in parentheses
    cleaned_name = re.sub(r"^CPU\s+", "", product_name)  # Remove 'CPU ' prefix
    cleaned_name = re.sub(r"\(.*\)", "", cleaned_name)   # Remove everything inside parentheses
    cleaned_name = cleaned_name.strip()  # Remove leading/trailing spaces
    return cleaned_name

# Handle NaN or blank values in the 'Product Name' column
eshop_df['Product Name'] = eshop_df['Product Name'].fillna('')  # Replace NaN with empty string
eshop_df['Product Name'] = eshop_df['Product Name'].apply(lambda x: x.strip() if isinstance(x, str) else '')  # Remove leading/trailing spaces

# Clean product names in e-shop data
eshop_df['Cleaned Product Name'] = eshop_df['Product Name'].apply(clean_product_name)

# Merge the data based on cleaned product names
merged_df = pd.merge(eshop_df, cpu_perf_df[['Product Name', 'Normalized Score (%)']], 
                     left_on='Cleaned Product Name', 
                     right_on='Product Name', 
                     how='left')

# Drop the 'Cleaned Product Name' column as it's not needed in the output
merged_df.drop(columns=['Cleaned Product Name', 'Product Name_y'], inplace=True)

# Rename the columns for clarity
merged_df.rename(columns={'Normalize Score (%)': 'Normalize Score'}, inplace=True)

# Save the output to a new CSV file
merged_df.to_csv('output_with_scores.csv', index=False)