In [492]:
import colorsys
import json
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans

In [493]:
# original-dataset\\GlobalTVBC\\css\\23579edb1e12f906d2b83f522d1c27011dc1b1d8.css
with open("minified-dataset\\Facebook\\index_files\\1GsqYFnXaZQ.css", "rb") as file:
    css_text_test = file.read().decode(errors="replace")

In [494]:
def getSelectorStyleMap(styles):
    styles = np.array(styles.split("{"))
    styles = list(map(lambda selectorStyle: selectorStyle.split("}"), styles))
    styles = [j for sub in styles for j in sub] # empty string means end of an at rule
    
    selectorToStyle = {}
    mediaQuery = ""
    atRule = ""
    isSelector = True

    for i, cssComponent in enumerate(styles):
        if "@media" in cssComponent:
            mediaQuery = cssComponent
            continue
        elif "@" in atRule:
            atRule = cssComponent
            continue

        if isSelector:
            selectorToStyle[cssComponent] = None
            isSelector = False
        else:
            selectorToStyle[styles[i-1]] = {
                "style": list(map(lambda x: x.split(":"), cssComponent.split(";"))),
                "atRule": atRule,
                "mediaQuery": mediaQuery
            }
            isSelector = True

    del selectorToStyle[""]

    return selectorToStyle

In [495]:
testStyleMap = getSelectorStyleMap(css_text_test)
testStyleMap

{'form': {'style': [['margin', '0'], ['padding', '0']],
  'atRule': '',
  'mediaQuery': ''},
 'label': {'style': [['cursor', 'pointer'],
   ['color', '#666'],
   ['font-weight', 'bold'],
   ['vertical-align', 'middle']],
  'atRule': '',
  'mediaQuery': ''},
 'label input': {'style': [['font-weight', 'normal']],
  'atRule': '',
  'mediaQuery': ''},
 'textarea,.inputtext,.inputpassword': {'style': [['border',
    '1px solid #bdc7d8'],
   ['margin', '0'],
   ['padding', '3px'],
   ['-webkit-appearance', 'none'],
   ['-webkit-border-radius', '0']],
  'atRule': '',
  'mediaQuery': ''},
 'textarea': {'style': [['max-width', '100%']],
  'atRule': '',
  'mediaQuery': ''},
 'select': {'style': [['border', '1px solid #bdc7d8'], ['padding', '2px']],
  'atRule': '',
  'mediaQuery': ''},
 '.inputtext,.inputpassword': {'style': [['padding-bottom', '4px']],
  'atRule': '',
  'mediaQuery': ''},
 '.inputtext:invalid,.inputpassword:invalid': {'style': [['-webkit-box-shadow',
    'none']],
  'atRule': ''

In [496]:
def colorToHexCode(value):
    try:
        # error is caused with gradients
        if "hsla" in value or "rgba" in value:
            return value

        # seperate hsl or rgb args
        value = value.replace("%", "")
        colors = [int(x) for x in value[value.find("(")+len("("):value.rfind(")")].split(",")]

        # if hsl convert to rgb
        if "hsl" in value:
            h, s, l = colors
            h = h/360
            s = s/100
            l = l/100
            colors = colorsys.hls_to_rgb(h, l, s)
            colors = [int(round(x*255.0)) for x in colors]

        # convert rgb to hexcode
        colors = list(map(lambda color: str(hex(color).split('x')[-1][-2:]).zfill(2), colors))
        colors = "".join(colors)

        return f"#{colors}"
    except:
        return value

In [497]:
def seperateSizeValues(values):
    values = values.replace(" ", "")
    values = values.replace("auto", "1auto")

    seperatedValues = []
    value = ""
    isUnit = False

    for char in values:
        if isUnit and char.isdigit():
            isUnit = False
            seperatedValues.append(value)
            value = ""

        value = f"{value}{char}"

        if not char.isdigit():
            isUnit = True

    seperatedValues.append(value)

    return [x if x != "1auto" else "auto" for x in seperatedValues]

In [498]:
seperateSizeValues("20px auto 20px 10px")

['20px', 'auto', '20px', '10px']

In [499]:
def sizeShortHandToLongHand(attr, seperatedValues):
    try:
        fourPosArgs = [["top"], ["right"], ["bottom"], ["left"]]
        threePosArgs = [["top"], ["left", "right"], ["bottom"]]
        twoPosArgs = [["top", "bottom"], ["left", "right"]]

        posArgs = [twoPosArgs, threePosArgs, fourPosArgs][len(seperatedValues)-2]

        longHand = []

        for i, pos in enumerate(posArgs):
            for posName in pos:
                longHand.append([f"{attr}-{posName}", seperatedValues[i]])

        return longHand
    except:
        return None

In [500]:
sizeShortHandToLongHand("padding", seperateSizeValues("20px25%31px21vw"))

[['padding-top', '20px'],
 ['padding-right', '25%'],
 ['padding-bottom', '31px'],
 ['padding-left', '21vw']]

In [501]:
sizeShortHandToLongHand("padding", seperateSizeValues("0"))

In [502]:
def normalizeStyle(attr, value):
    isMultiSelectorChange = False

    if "color" in attr:
        if not "#" in value:
           value = colorToHexCode(value)
    elif attr == "margin" or attr == "padding":
        isMultiSelectorChange = True
        seperateValues = seperateSizeValues(value)

        if seperateValues:
            isMultiSelectorChange = False
            attr, value = None, sizeShortHandToLongHand(attr, seperateValues)

    return attr, value, isMultiSelectorChange

for selector in testStyleMap:
    if selector:
        for i, x in enumerate(testStyleMap[selector]["style"]):
            try:
                attr, value = x
                a, v, isMultiSelectorChange = normalizeStyle(attr, value)

                if isMultiSelectorChange:
                    del attr, value
                    testStyleMap[selector]["style"].extend(v)
                else:
                    attr, value = a, v
            except:
                pass


if "" in testStyleMap:
    del testStyleMap[""]

In [503]:
testStyleMap

{'form': {'style': [['margin', '0'], ['padding', '0']],
  'atRule': '',
  'mediaQuery': ''},
 'label': {'style': [['cursor', 'pointer'],
   ['color', '#666'],
   ['font-weight', 'bold'],
   ['vertical-align', 'middle']],
  'atRule': '',
  'mediaQuery': ''},
 'label input': {'style': [['font-weight', 'normal']],
  'atRule': '',
  'mediaQuery': ''},
 'textarea,.inputtext,.inputpassword': {'style': [['border',
    '1px solid #bdc7d8'],
   ['margin', '0'],
   ['padding', '3px'],
   ['-webkit-appearance', 'none'],
   ['-webkit-border-radius', '0']],
  'atRule': '',
  'mediaQuery': ''},
 'textarea': {'style': [['max-width', '100%']],
  'atRule': '',
  'mediaQuery': ''},
 'select': {'style': [['border', '1px solid #bdc7d8'], ['padding', '2px']],
  'atRule': '',
  'mediaQuery': ''},
 '.inputtext,.inputpassword': {'style': [['padding-bottom', '4px']],
  'atRule': '',
  'mediaQuery': ''},
 '.inputtext:invalid,.inputpassword:invalid': {'style': [['-webkit-box-shadow',
    'none']],
  'atRule': ''

In [504]:
def getStyleText(styleMap):
    selectorStyles = [styleMap[key]["style"] for key in styleMap if not styleMap[key]["atRule"] and not styleMap[key]["mediaQuery"]]
    styles = []

    for selectorStyle in selectorStyles:
        style = ""
        for attr, value in selectorStyle:
            style += f"{attr}: {value}; "
        style = style.strip()

        styles.append(style)

    return styles

In [505]:
# does not include styles in media queries
testStyles = getStyleText(testStyleMap)

In [506]:
testStyles[:3]

['margin: 0; padding: 0;',
 'cursor: pointer; color: #666; font-weight: bold; vertical-align: middle;',
 'font-weight: normal;']

In [507]:
runKmeans = False

Iteration: 435/517
C:\Users\mattg_3roa89k\AppData\Local\Temp\ipykernel_16676\36860394.py:6: ConvergenceWarning: Number of distinct clusters (435) found smaller than n_clusters (436). Possibly due to duplicate points in X.
  km = km.fit(X)

In [508]:
selector_styles_df = None

if runKmeans:
    vectorizer = TfidfVectorizer(stop_words={'english'})
    X = vectorizer.fit_transform(testStyles)
    
    Sum_of_squared_distances = []
    K = range(2,435)

    for k in K:
        km = KMeans(n_clusters=k, max_iter=150, n_init=10)
        km = km.fit(X)
        Sum_of_squared_distances.append(km.inertia_)
        print(f"Iteration: {k}/435", end="\r")

    plt.plot(K, Sum_of_squared_distances, 'bx-')
    plt.xlabel('k')
    plt.ylabel('Sum_of_squared_distances')
    plt.title('Elbow Method For Optimal k')
    plt.show()

    # rerunning this without the above cells will change the output values
    selector = list(testStyleMap.keys())

    true_k = 200
    model = KMeans(n_clusters=true_k, init='k-means++', max_iter=200, n_init=10)
    model.fit(X)
    labels = model.labels_

    selector_styles_df = pd.DataFrame(list(zip(selector,labels)),columns=['selector','cluster'])
    selector_styles_df = selector_styles_df.sort_values(by=['cluster'])
else:
    selector_styles_df = pd.read_json("selectors-clustered.json")
    selector_styles_df.columns = ['selector','cluster']

In [509]:
selector_styles_df

Unnamed: 0,selector,cluster
0,.pls,0
1,.phs,0
2,.mls,0
3,._5fyj,0
4,._4ki._704>li,0
...,...,...
512,".uiButton,.uiButtonSuppressed:active,.uiButton...",199
513,._50zz._50z-:hover,199
514,"._50zz._50z-:active,._50zz._42fs",199
515,._50zz._50z-,199


In [510]:
def saveAsJson(filePath, data):
    with open(filePath, 'w') as fp:
        json.dump(data, fp, indent=2)

In [511]:
if runKmeans:
    saveAsJson("selectors-clustered.json", selector_styles_df.values.tolist())

In [512]:
clusteredSelectors = selector_styles_df.values.tolist()
clusterSelectorMap = {}

for selector, clusterIndex in clusteredSelectors:
    if str(clusterIndex) in clusterSelectorMap:
        clusterSelectorMap[str(clusterIndex)].append(selector)
    else:
        clusterSelectorMap[str(clusterIndex)] = [selector]

In [513]:
def displayCluster(clusterIndex):
    for selector in clusterSelectorMap[str(clusterIndex)]:
        print(selector, testStyleMap[selector])

In [514]:
displayCluster(0)

.pls {'style': [['padding-left', '5px']], 'atRule': '', 'mediaQuery': ''}
.phs {'style': [['padding-left', '5px'], ['padding-right', '5px']], 'atRule': '', 'mediaQuery': ''}
.mls {'style': [['margin-left', '5px']], 'atRule': '', 'mediaQuery': ''}
._5fyj {'style': [['background-color', 'rgba(0,0,0,.5)'], ['left', '0'], ['padding', '5px 0 5px 5px'], ['position', 'absolute'], ['top', '0'], ['width', '99%']], 'atRule': '', 'mediaQuery': ''}
._4ki._704>li {'style': [['padding-left', '5px'], ['padding-right', '5px']], 'atRule': '', 'mediaQuery': ''}


In [515]:
displayCluster(101)

.pop_content .dialog_content.dialog_content_titleless {'style': [['border-top', '1px solid #555']], 'atRule': '', 'mediaQuery': ''}
.pop_content .dialog_content {'style': [['background', '#fff'], ['border', '1px solid #555'], ['border-top-width', '0']], 'atRule': '', 'mediaQuery': ''}
._13,._14 {'style': [['border-color', '#555'], ['border-style', 'solid'], ['border-width', '0 1px']], 'atRule': '', 'mediaQuery': ''}


In [516]:
unsortedCombinedStyleMap = {}

for clusterIndex in clusterSelectorMap:
    sharedStyles = {}
    sharedValues = {}

    for selector in clusterSelectorMap[clusterIndex]:
        for attr, value in testStyleMap[selector]["style"]:
            style = f"{attr.strip()}: {value.strip()};"

            if style in sharedStyles:
                sharedStyles[style].append(selector)
            else:
                sharedStyles[style] = [selector]
    
    for style in sharedStyles:
        combinedSelector = ", ".join(sharedStyles[style])
        unsortedCombinedStyleMap[combinedSelector] = style

unsortedCombinedStyleMap

{'.pls, .phs, ._4ki._704>li': 'padding-left: 5px;',
 '.phs, ._4ki._704>li': 'padding-right: 5px;',
 '.mls': 'margin-left: 5px;',
 '._5fyj': 'width: 99%;',
 '._4g4y': 'width: 370px;',
 '._4g4y, .mvm, .mtm': 'margin-top: 10px;',
 '.mbm, .mvm': 'margin-bottom: 10px;',
 '.mam': 'margin: 10px;',
 '._5a8u, ._7lt ._7lw, ._4-hz': 'background-color: #fff;',
 '._5a8u, ._4-i2:last-child': '-webkit-border-radius: 0 0 3px 3px;',
 '._4-i0': 'text-shadow: 0 1px 0 #fff;',
 '._4-i0, ._4-i2:first-child': '-webkit-border-radius: 3px 3px 0 0;',
 '._7lt ._7lw': 'height: 100px;',
 '._4-i2:only-child, ._4-hz': '-webkit-border-radius: 3px;',
 '._4-hz': 'position: relative;',
 '._50-0._50z_:hover, ._50-0._50z_, ._50-0._50z-:hover, ._50-0._50z-, ._50-0._50z-:active,._50-0._42fs, ._53ip ._53im ._53io, ._53ip ._53in ._53io': 'background-size: auto;',
 '._50-0._50z_:hover': 'background-position: -346px -175px;',
 '._50-0._50z_': 'background-position: -333px -175px;',
 '._50-0._50z-:hover': 'background-position: -3

In [517]:
len(unsortedCombinedStyleMap)

468

### Converting Style Map to CSS Text and then File
Add combines styles to the top of file but reorder the combined styles by the selector that appears the highest in the original stylesheet/map

In [518]:
combinedSelectorOrder = []

for combinedSelector in unsortedCombinedStyleMap:
    selectors = combinedSelector.split(", ")
    mapKeys = list(testStyleMap.keys())
    order = [mapKeys.index(selector) for selector in selectors]
    
    combinedSelectorOrder.append((min(order), combinedSelector))

combinedSelectorOrder.sort(key=lambda x: x[0])
combinedSelectorOrder

[(0, 'button, form'),
 (0, 'form, .full_bleed .pop_content .dialog_body'),
 (1, 'label, ._5upp'),
 (1, 'label'),
 (2, 'label input, ._5kx5, .fwn'),
 (3, 'textarea,.inputtext,.inputpassword, select'),
 (3, 'textarea,.inputtext,.inputpassword'),
 (4, 'textarea'),
 (5, 'select'),
 (6, '.inputtext,.inputpassword'),
 (7,
  '._4jy0._52nd, ._59pe,form.async_saving ._59pe,a.async_saving._59pe,._59pe._42fr,._59pe._42fr:active,._59pe._42fr:focus,._59pe._42fr:hover, .uiButtonSuppressed, ._51xa ._4jy0, form.async_saving .uiButton,.uiButtonDisabled,.uiButtonDisabled:active,.uiButtonDisabled:focus,.uiButtonDisabled:hover, .inputtext:invalid,.inputpassword:invalid, html ._5upp._5f0v:focus'),
 (8,
  '.uiScaledImageContainer .verticallyAligned, .uiGrid .vMid, ._4jy0 .img, ._509_>li, .inputcheckbox, .uiIconText .img, .inputradio'),
 (8, '.inputradio'),
 (9, '.inputcheckbox'),
 (10, '.inputbutton,.inputsubmit'),
 (11, '.inputsubmit_disabled'),
 (12, '.inputaux'),
 (13, '.inputaux_disabled'),
 (14, '.inpu

In [519]:
def getDefaultStyleProps(style):
    return {
        "style": style,
        "atRule": "",
        "mediaQuery": ""
    }

selectorOrder = [x[0] for x in combinedSelectorOrder]
selectorStylesInOrder = [[x[1], getDefaultStyleProps(unsortedCombinedStyleMap[x[1]])] for x in combinedSelectorOrder]
selectorStylesInOrder

[['button, form', {'style': 'margin: 0;', 'atRule': '', 'mediaQuery': ''}],
 ['form, .full_bleed .pop_content .dialog_body',
  {'style': 'padding: 0;', 'atRule': '', 'mediaQuery': ''}],
 ['label, ._5upp',
  {'style': 'vertical-align: middle;', 'atRule': '', 'mediaQuery': ''}],
 ['label', {'style': 'font-weight: bold;', 'atRule': '', 'mediaQuery': ''}],
 ['label input, ._5kx5, .fwn',
  {'style': 'font-weight: normal;', 'atRule': '', 'mediaQuery': ''}],
 ['textarea,.inputtext,.inputpassword, select',
  {'style': 'border: 1px solid #bdc7d8;', 'atRule': '', 'mediaQuery': ''}],
 ['textarea,.inputtext,.inputpassword',
  {'style': '-webkit-border-radius: 0;', 'atRule': '', 'mediaQuery': ''}],
 ['textarea', {'style': 'max-width: 100%;', 'atRule': '', 'mediaQuery': ''}],
 ['select', {'style': 'padding: 2px;', 'atRule': '', 'mediaQuery': ''}],
 ['.inputtext,.inputpassword',
  {'style': 'padding-bottom: 4px;', 'atRule': '', 'mediaQuery': ''}],
 ['._4jy0._52nd, ._59pe,form.async_saving ._59pe,a.as

In [520]:
print(sum([len(testStyleMap[x]["style"]) for x in testStyleMap]))

1099


In [521]:
for selector, styleProps in selectorStylesInOrder:
    selectors = selector.split(", ")
    styles = [x for x in styleProps["style"].split(";") if x != ""]
    
    for i, style in enumerate(styles):
        styles[i] = style.replace(" ", "").split(":")

    for selector in selectors:
        if selector in testStyleMap:
            for i, originalStyle in enumerate(testStyleMap[selector]["style"]):
                a1, v1 = originalStyle
                for a2, v2 in styles:
                    if a1 == a2 and v1 == v2:
                        del testStyleMap[selector]["style"][i]

In [522]:
print(sum([len(testStyleMap[x]["style"]) for x in testStyleMap]))

528


In [523]:
def insertElementsByIndices(list_a, list_b, pos):
    assert(len(list_b) == len(pos))
    acc = 0
    for i in range(len(list_b)):
        list_a.insert(pos[i]+acc, list_b[i])
        acc += 1

    return list_a

In [524]:
styles = [[selector, testStyleMap[selector]] for selector in testStyleMap]
styles

[['form', {'style': [], 'atRule': '', 'mediaQuery': ''}],
 ['label',
  {'style': [['cursor', 'pointer'], ['color', '#666']],
   'atRule': '',
   'mediaQuery': ''}],
 ['label input', {'style': [], 'atRule': '', 'mediaQuery': ''}],
 ['textarea,.inputtext,.inputpassword',
  {'style': [['border', '1px solid #bdc7d8'],
    ['margin', '0'],
    ['padding', '3px'],
    ['-webkit-appearance', 'none']],
   'atRule': '',
   'mediaQuery': ''}],
 ['textarea', {'style': [], 'atRule': '', 'mediaQuery': ''}],
 ['select',
  {'style': [['border', '1px solid #bdc7d8']],
   'atRule': '',
   'mediaQuery': ''}],
 ['.inputtext,.inputpassword', {'style': [], 'atRule': '', 'mediaQuery': ''}],
 ['.inputtext:invalid,.inputpassword:invalid',
  {'style': [], 'atRule': '', 'mediaQuery': ''}],
 ['.inputradio',
  {'style': [['padding', '0'], ['margin', '0 5px 0 0']],
   'atRule': '',
   'mediaQuery': ''}],
 ['.inputcheckbox', {'style': [], 'atRule': '', 'mediaQuery': ''}],
 ['.inputbutton,.inputsubmit',
  {'style': 

In [525]:
styles = insertElementsByIndices(styles, selectorStylesInOrder, selectorOrder)
styles

[['button, form', {'style': 'margin: 0;', 'atRule': '', 'mediaQuery': ''}],
 ['form, .full_bleed .pop_content .dialog_body',
  {'style': 'padding: 0;', 'atRule': '', 'mediaQuery': ''}],
 ['form', {'style': [], 'atRule': '', 'mediaQuery': ''}],
 ['label, ._5upp',
  {'style': 'vertical-align: middle;', 'atRule': '', 'mediaQuery': ''}],
 ['label', {'style': 'font-weight: bold;', 'atRule': '', 'mediaQuery': ''}],
 ['label',
  {'style': [['cursor', 'pointer'], ['color', '#666']],
   'atRule': '',
   'mediaQuery': ''}],
 ['label input, ._5kx5, .fwn',
  {'style': 'font-weight: normal;', 'atRule': '', 'mediaQuery': ''}],
 ['label input', {'style': [], 'atRule': '', 'mediaQuery': ''}],
 ['textarea,.inputtext,.inputpassword, select',
  {'style': 'border: 1px solid #bdc7d8;', 'atRule': '', 'mediaQuery': ''}],
 ['textarea,.inputtext,.inputpassword',
  {'style': '-webkit-border-radius: 0;', 'atRule': '', 'mediaQuery': ''}],
 ['textarea,.inputtext,.inputpassword',
  {'style': [['border', '1px solid 

In [529]:
refactoredCssText = ""

def getStyleText(selector, styles):
    if type(styles) is list:
        return f"{selector}{'{'}{''.join([f'{x[0]}:{x[1]};' for x in styles])}{'}'}"
    else:
        return f"{selector}{'{'}{styles}{'}'}"

for selector, styleProps in styles:
    if not styleProps["atRule"] == "" or len(styleProps["style"]) == 0:
        continue

    if styleProps["mediaQuery"] == "":
        refactoredCssText += getStyleText(selector, styleProps["style"])
    else:
        refactoredCssText += f"{styleProps['mediaQuery']}{'{'}{getStyleText(selector, styleProps['style'])}{'}'}"

# refactoredCssText = refactoredCssText.replace(" ", "")

In [530]:
with open("testRefactor.css", "w") as f:
    f.write(refactoredCssText)