In [64]:
import pandas as pd
import spacy
from plotly import express as px
from sklearn.decomposition import PCA
import numpy as np
from sklearn.cluster import KMeans

In [2]:
nlp = spacy.load('en_core_web_md')

In [22]:
df = pd.read_csv('../data/maps/pantone/pantone.tsv', delimiter='\t')

In [23]:
df.columns

Index(['names', 'values'], dtype='object')

In [24]:
docs = [nlp(color) for color in list(df['names'])]

In [25]:
vecs = [doc.vector for doc in docs]

In [26]:
pca = PCA(n_components=3)

In [27]:
pcaOut = pca.fit_transform(vecs)

In [28]:
pcaDF = pd.DataFrame(pcaOut, columns=['C1', 'C2', 'C3'])

In [29]:
pcaDF['name'] = df['names']

In [30]:
pcaDF['color'] = df['values']

In [31]:
pcaDF.head(10)

Unnamed: 0,C1,C2,C3,name,color
0,-0.127152,1.711997,1.165119,egret,#f3ece0
1,-0.874313,-0.82278,-0.047095,snow-white,#f2f0eb
2,-1.028146,-1.443323,0.380005,bright-white,#f4f5f0
3,-1.324862,0.100517,-0.904191,cloud-dancer,#f0eee9
4,0.96026,1.358065,2.997662,gardenia,#f1e8df
5,3.315359,-0.951615,-0.368472,marshmallow,#f0eee4
6,-1.013172,-0.460748,-2.089997,blanc-de-blanc,#e7e9e7
7,-0.318877,1.628171,-0.145707,pristine,#f2e8da
8,-0.869547,-0.731927,-0.176264,whisper-white,#ede6db
9,0.734009,-1.205841,-0.555461,white-asparagus,#e1dbc8


In [62]:
plotDF = pcaDF
fig = px.scatter_3d(plotDF, x='C1', y='C2', z='C3', text='name', color='color', color_discrete_sequence=plotDF['color'])

In [63]:
fig.write_html('out.html')

In [65]:
km = KMeans(n_clusters=6)

In [68]:
kmOut = km.fit_transform(vecs) 

In [73]:
km.labels_.shape

(2310,)

[('egret', 3),
 ('snow-white', 4),
 ('bright-white', 4),
 ('cloud-dancer', 4),
 ('gardenia', 0),
 ('marshmallow', 5),
 ('blanc-de-blanc', 4),
 ('pristine', 3),
 ('whisper-white', 4),
 ('white-asparagus', 1),
 ('birch', 3),
 ('turtledove', 2),
 ('bone-white', 4),
 ('silver-birch', 4),
 ('vanilla-ice', 1),
 ('papyrus', 2),
 ('antique-white', 4),
 ('winter-white', 4),
 ('cloud-cream', 1),
 ('angora', 3),
 ('seedpearl', 2),
 ('vanilla-custard', 1),
 ('almond-oil', 1),
 ('alabaster-gleam', 4),
 ('vanilla', 5),
 ('rutabaga', 5),
 ('banana-crepe', 1),
 ('italian-straw', 1),
 ('whitecap-gray', 4),
 ('fog', 3),
 ('white-swan', 4),
 ('sandshell', 2),
 ('tapioca', 5),
 ('creme-brulee', 1),
 ('parchment', 5),
 ('sheer-pink', 4),
 ('dew', 0),
 ('powder-puff', 1),
 ('pearled-ivory', 1),
 ('white-smoke', 4),
 ('ecru', 0),
 ('navajo', 2),
 ('almost-mauve', 4),
 ('delicacy', 0),
 ('petal-pink', 4),
 ('bridal-blush', 4),
 ('cream-pink', 1),
 ('angel-wing', 4),
 ('pastel-parchment', 1),
 ('star-white', 4

In [80]:
categorized = {i: [] for i in range(6)}
for name, cat in list(zip(list(df['names']), km.labels_)): 
    categorized[cat].append(name)

In [91]:
df.index = df['names']

In [92]:
df

Unnamed: 0_level_0,names,values
names,Unnamed: 1_level_1,Unnamed: 2_level_1
egret,egret,#f3ece0
snow-white,snow-white,#f2f0eb
bright-white,bright-white,#f4f5f0
cloud-dancer,cloud-dancer,#f0eee9
gardenia,gardenia,#f1e8df
...,...,...
crystal-teal,crystal-teal,#00637c
deep-lagoon,deep-lagoon,#005265
sea-moss,sea-moss,#254445
forest-biome,forest-biome,#184a45


In [104]:
for i in categorized:
    print(f"Category {i}: ", end='')
    for color in categorized[i][:20]:
        print(f"{color} {df.loc[color]['values']}, ", end='')
    print('\n')

Category 0: gardenia #f1e8df, dew #eeded1, ecru #f3dfca, delicacy #f5e3e2, icicle #dadcd0, twill #a79b82, gunmetal #5c5d5b, pewter #666564, sheepskin #dab58f, beige #d5ba98, tan #b69574, pinecone #61473b, ermine #836b4f, sepia #6b543e, linen #edd2c0, hazel #ae7250, silver #a2a2a1, russet #8f5f50, henna #7c423c, afterglow #f3e6c9, 

Category 1: white-asparagus #e1dbc8, vanilla-ice #f0eada, cloud-cream #e6ddc5, vanilla-custard #f3e0be, almond-oil #f4efc1, banana-crepe #e7d3ad, italian-straw #e7d1a1, creme-brulee #dbccb5, powder-puff #f3e0d6, pearled-ivory #f0dfcc, cream-pink #f6e4d9, pastel-parchment #e5d9d3, hint-of-mint #d8e8e6, sprout-green #cbd7d2, winter-wheat #dfc09f, summer-melon #ead3ae, apricot-gelato #f5d7af, asparagus-green #d2cdb4, smoked-pearl #656466, chocolate-chip #685a4e, 

Category 2: turtledove #ded7c8, papyrus #f5edd6, seedpearl #e6dac4, sandshell #d8ccbb, navajo #efdcc3, bluewash #e2e6e0, murmur #d2d8d2, cornhusk #f2d6ae, chinchilla #9c8e7b, tuffet #a59788, brindle #

In [105]:
words = ["apple", "cinnamon", "apple-cinnamon", "apple cinnamon"]
vecs = [nlp(w).vector for w in words]

In [108]:
pcaOut = pca.fit_transform(vecs)

In [109]:
pcaDf = pd.DataFrame(pcaOut)

In [116]:
fig = px.scatter_3d(pcaDf, x=0, y=1, z=2, text='names')

In [114]:
pcaDf['names'] = words

In [117]:
fig.write_html('out2.html')