### Load the RGB mapping data and do modifications

In [4]:
import pandas as pd
import numpy as np

# Load the Munsell to RGB mapping CSV
munsell_rgb_df = pd.read_csv('munsell-rgb.csv')

# Clean up whitespace and extra commas in the 'Munsell' column if it's a string
if munsell_rgb_df['Munsell'].dtype == object:
    munsell_rgb_df['Munsell'] = munsell_rgb_df['Munsell'].str.strip()

# Ensure R, G, B columns are numeric and handle potential trailing commas
for color in ['R', 'G', 'B']:
    if munsell_rgb_df[color].dtype == object:
        munsell_rgb_df[color] = munsell_rgb_df[color].str.replace(',', '').str.strip()
    munsell_rgb_df[color] = pd.to_numeric(munsell_rgb_df[color], errors='coerce')

# Create a dictionary to map Munsell values to RGB tuples
munsell_to_rgb = {row['Munsell']: [row['R'], row['G'], row['B']] for index, row in munsell_rgb_df.iterrows()}


AttributeError: Can only use .str accessor with string values!

### Load the emotions data 
#### Clean any whitespace from my data which can cause mismatches in the merging process.

In [32]:
import pandas as pd
import numpy as np  # Import numpy for NaN

# Load the CSV file
emotions_df = pd.read_csv('emotions-munsell.csv')

# Replace empty strings with NaN for uniformity
emotions_df.replace(r'^\s*$', np.nan, regex=True, inplace=True)  # This handles purely white space entries as well.

emotions_df.columns = emotions_df.columns.str.strip()

# Display the first few rows to confirm correct loading and cleaning
print(emotions_df.head(5))  # Displaying first 15 to match your example

          Emotional_Word Munsell_Code_1 Munsell_Code_2 Munsell_Code_3  \
Cute              5R 8/6         5Y 9/6       5GY 8/11         5G 8/6   
Childlike        5R 7/10         5Y 9/6         5B 8/5       5RP 7/10   
Pretty            5R 8/6        5Y 8/11        5BG 7/9       5RP 7/10   
sweet            5R 7/10        5YR 9/4        5RP 8/6            NaN   
Amusing          5YR 8/7       5GY 7/12      5BG 6.5/6        5P 4/12   

          Munsell_Code_4 Munsell_Code_5 Munsell_Code_6 Munsell_Code_7  \
Cute              5B 8/5       5RP 7/10            NaN            NaN   
Childlike            NaN            NaN            NaN            NaN   
Pretty               NaN            NaN            NaN            NaN   
sweet                NaN            NaN            NaN            NaN   
Amusing         5RP 4/10            NaN            NaN            NaN   

          Munsell_Code_8 Munsell_Code_9  
Cute                 NaN            NaN  
Childlike            NaN            Na

### Mapping the Munsell Code to RGB valeus
#### I have create a function to map the Munsell codes from my emotional words to their corresponding RGB values using the color Dataframe

In [42]:
# Define a helper function to convert Munsell codes to RGB values
def munsell_to_rgb(munsell_code):
    if pd.isna(munsell_code) or munsell_code.strip() == '':
        return np.nan  # Replace missing Munsell codes with NaN
    match = rgb_df[rgb_df['Munsell'] == munsell_code.strip()]
    if not match.empty:
        return match.iloc[0][['R', 'G', 'B']].tolist()  # Return RGB values as a list
    else:
        return np.nan  # Return NaN if no match is found

# Iterate over each row in the emotions DataFrame to apply the conversion
results = []
for index, row in emotions_df.iterrows():
    # Extract and store the emotional word
    emotional_word = row['Emotional_Word']
    rgb_values = [emotional_word]  # Start the list with the emotional word
    # Iterate over each Munsell code in the row, starting from the second column
    for munsell_code in row[1:]:
        rgb_values.append(munsell_to_rgb(munsell_code))  # Append RGB list or NaN
    results.append(rgb_values)


### Create the Final Dataset
#### Once I have the mappings, I can save this new DataFrame as my final dataset which includes emotional words and their corresponding RGB values:

In [44]:
# Creating a DataFrame from the results
# We need to dynamically set the number of columns based on the maximum list length in results
max_columns = max(len(row) for row in results)  # Find the length of the longest list
column_names = ['Emotional_Word'] + [f'RGB_{i+1}' for i in range(max_columns - 1)]
final_df = pd.DataFrame(results, columns=column_names)

# Save the new DataFrame to a CSV file
final_df.to_csv('emotions_rgb.csv', index=False)