In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
dataset = pd.read_parquet("color_pedia.parquet")
dataset.head()

Unnamed: 0,Color Name,HEX Code,Category,Description,Emotion,Personality,Mood,Symbolism,Use Case,Keywords,R,G,B,Hue,Saturation,Lightness,Contrast Level
0,Deep Maroon,#D62559,Red Family (Ruby Red),A vibrant and deep shade of maroon.,"Passionate, Intense","Bold, Dramatic","Strong, Powerful","Deep Maroon often symbolizes power, strength, ...",Ideal for creating a striking impact in design...,"Powerful, Passionate, Bold, Deep, Strong, Inte...",214,37,89,342.37,70.52,49.22,Dark
1,Golden Brick,#E5B262,"Warm, Earthy",A warm and inviting shade of golden brown.,"Comfort, Warmth, Optimism","Reliable, Friendly, Approachable","Cozy, Sunlit, Nurturing","Growth, Stability, Harvest",Ideal for creating a warm and inviting atmosph...,"Warm, Golden, Brown, Earthy, Comfortable, Cozy...",229,178,98,36.64,71.58,64.12,Dark
2,Lime Green,#8CCD48,"Bright, Vibrant",A lively shade of green with yellow undertones.,"Energetic, Fresh","Outgoing, Adventurous","Playful, Exciting","Growth, Renewal, Envy (in some cultures)","Ideal for modern design elements, branding for...","Bright, Vibrant, Fresh, Energy, Nature, Green,...",140,205,72,89.32,57.08,54.31,Dark
3,Sage Green #356A40,#356A40,Green,"A rich, muted green shade with a hint of blue.",Calmness and Relaxation,"Stable and reliable, like an old friend.",Serene and earthy.,"Growth, harmony, and balance in nature.",Ideal for spaces promoting relaxation and tran...,"Calm, Earthy, Nature, Reliable, Stable.",53,106,64,132.45,33.33,31.18,Dark
4,Deep Sapphire Blue,#0618DE,"Dark, Intense","A vibrant and rich shade of blue, reminiscent ...","Serene, Luxurious","Majestic, Mysterious","Calm, Elegant","Deep Sapphire Blue often symbolizes wisdom, lo...","Ideal for branding luxury items, technology, o...","Luxurious, Deep, Ocean, Premium, Elegant, Myst...",6,24,222,235.0,94.74,44.71,Dark


In [None]:
dataset.dropna(inplace=True)

In [None]:
dataset.isnull().sum().sum()

0

In [None]:
text_colors = ["Description", "Emotion", "Personality", "Mood", "Symbolism", "Use Case", "Keywords"]
dataset["Combined Text"] = dataset[text_colors].fillna("").agg(" ".join, axis=1)
dataset.head()

Unnamed: 0,Color Name,HEX Code,Category,Description,Emotion,Personality,Mood,Symbolism,Use Case,Keywords,R,G,B,Hue,Saturation,Lightness,Contrast Level,Combined Text
0,Deep Maroon,#D62559,Red Family (Ruby Red),A vibrant and deep shade of maroon.,"Passionate, Intense","Bold, Dramatic","Strong, Powerful","Deep Maroon often symbolizes power, strength, ...",Ideal for creating a striking impact in design...,"Powerful, Passionate, Bold, Deep, Strong, Inte...",214,37,89,342.37,70.52,49.22,Dark,A vibrant and deep shade of maroon. Passionate...
1,Golden Brick,#E5B262,"Warm, Earthy",A warm and inviting shade of golden brown.,"Comfort, Warmth, Optimism","Reliable, Friendly, Approachable","Cozy, Sunlit, Nurturing","Growth, Stability, Harvest",Ideal for creating a warm and inviting atmosph...,"Warm, Golden, Brown, Earthy, Comfortable, Cozy...",229,178,98,36.64,71.58,64.12,Dark,A warm and inviting shade of golden brown. Com...
2,Lime Green,#8CCD48,"Bright, Vibrant",A lively shade of green with yellow undertones.,"Energetic, Fresh","Outgoing, Adventurous","Playful, Exciting","Growth, Renewal, Envy (in some cultures)","Ideal for modern design elements, branding for...","Bright, Vibrant, Fresh, Energy, Nature, Green,...",140,205,72,89.32,57.08,54.31,Dark,A lively shade of green with yellow undertones...
3,Sage Green #356A40,#356A40,Green,"A rich, muted green shade with a hint of blue.",Calmness and Relaxation,"Stable and reliable, like an old friend.",Serene and earthy.,"Growth, harmony, and balance in nature.",Ideal for spaces promoting relaxation and tran...,"Calm, Earthy, Nature, Reliable, Stable.",53,106,64,132.45,33.33,31.18,Dark,"A rich, muted green shade with a hint of blue...."
4,Deep Sapphire Blue,#0618DE,"Dark, Intense","A vibrant and rich shade of blue, reminiscent ...","Serene, Luxurious","Majestic, Mysterious","Calm, Elegant","Deep Sapphire Blue often symbolizes wisdom, lo...","Ideal for branding luxury items, technology, o...","Luxurious, Deep, Ocean, Premium, Elegant, Myst...",6,24,222,235.0,94.74,44.71,Dark,"A vibrant and rich shade of blue, reminiscent ..."


In [None]:
import re
def clean_text(text):
    text = str(text).lower().strip()
    text = re.sub(r'[^a-z0-9\s]+', " ", text)
    text = re.sub(r'\s', " ", text)
    return text.strip()

In [None]:
dataset["Combined Text"] = dataset["Combined Text"].apply(clean_text)
dataset["Combined Text"][0]

'a vibrant and deep shade of maroon  passionate  intense bold  dramatic strong  powerful deep maroon often symbolizes power  strength  courage  and determination  it can also represent passion  desire  and deep emotions  ideal for creating a striking impact in design elements such as logos  branding  packaging  and advertising  powerful  passionate  bold  deep  strong  intense  maroon  ruby red'

In [None]:
x = dataset["Combined Text"]
y = dataset[["R", "G", "B"]]

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [None]:
import re

def remove_numbers(s):
    s = str(s)
    s = re.sub(r"\b\d+[a-zA-Z]*\b", " ", s)
    s = re.sub(r"\s+", " ", s).strip()
    return s

In [None]:
x_train = x_train.apply(remove_numbers)
x_test  = x_test.apply(remove_numbers)

In [None]:
vec = TfidfVectorizer(
    lowercase=True,
    stop_words="english",
    ngram_range=(1, 2),
    sublinear_tf=True,
    token_pattern=r"(?u)\b[a-zA-Z][a-zA-Z-]{1,}\b",
    min_df=5,
    max_df=0.9,
    max_features=20000,
)
x_train = vec.fit_transform(x_train)
x_test = vec.transform(x_test)

In [33]:
from sklearn.linear_model import Ridge
from sklearn.svm import LinearSVR
from sklearn.ensemble import VotingRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

In [None]:
ridge = Ridge(alpha=1.0)
svr = LinearSVR(C=0.1, epsilon=0.05, loss="squared_epsilon_insensitive", max_iter=30000, random_state=42)

Ensemble (Ridge + SVR) -> R2: 0.803 MAE: 25.7935 RMSE: 32.7451


In [38]:
vote = VotingRegressor(
    estimators=[("ridge", ridge), ("svr", svr)],
    weights=[0.4, 0.6],
)
ensemble = MultiOutputRegressor(vote)
ensemble.fit(x_train, y_train)

In [36]:
ensemble.score(x_test, y_test), ensemble.score(x_train, y_train)

(0.8030299252383314, 0.8394743491062885)

In [39]:
pred = ensemble.predict(x_test)

In [40]:
r2 = r2_score(y_test, pred, multioutput="uniform_average")
mae = mean_absolute_error(y_test, pred, multioutput="uniform_average")
rmse = np.sqrt(mean_squared_error(y_test, pred, multioutput="uniform_average"))

print("Ensemble (Ridge + SVR) -> R2:", round(r2, 4), "MAE:", round(mae, 4), "RMSE:", round(rmse, 4))

Ensemble (Ridge + SVR) -> R2: 0.803 MAE: 25.7935 RMSE: 32.7451
