In [1]:
from operator import concat
from functools import reduce
import re
import json

In [2]:
with open('asmrtist_top_descriptions.json') as file:
    top_descriptions = json.load(file)
    top_descriptions = {key.lower(): value for key, value in top_descriptions.items()}

In [3]:
asmrtists_names = iter([name.lower() for name in top_descriptions.keys()])

In [4]:
cleaned_descriptions = {}

In [5]:
locals().update(top_descriptions)

In [6]:
filter_single_letters = lambda values: list(filter(lambda x: len(x) > 1, values))

In [7]:
top_descriptions.keys()

dict_keys(['massageasmr', 'tingtingasmr', 'gentlewhisperingasmr', 'whispersredasmr', 'gibiasmr', 'asmrrequests', 'asmrdarling', 'asmrsurge', "jojo'sasmr", 'zachchoiasmr'])

### MassageASMR

In [8]:
def clean_massage_descriptions(description):
    clean = []
    for desc in description:
        desc = desc.split('If you would')[0]
        desc = re.sub('htt.*?\\n','', desc)
        desc = re.sub('\W', ' ', desc)
        desc = re.sub(r'\\n', ' ', desc)
        desc = re.sub('\W', ' ', desc)
        desc = re.sub('\d', ' ', desc)
        clean.append(re.sub('\s{2,}', ' ', desc).lower().strip().split())
    return reduce(concat, clean)

In [9]:
clean_massageasmr = clean_massage_descriptions(massageasmr)
clean_massageasmr = filter_single_letters(clean_massageasmr)
cleaned_descriptions[next(asmrtists_names)] = clean_massageasmr

### Tingting ASMR

In [10]:
def clean_tingting_descriptions(description):
    clean = []
    for desc in description:
        desc = desc.split('~')[0].split('Support')[0]
        desc = re.sub("'", '', desc)
        desc = re.sub(r'\\n', ' ', desc)
        desc = re.sub('\W', ' ', desc)
        desc = re.sub('\d', ' ', desc)
        clean.append(re.sub('\s{2,}', ' ', desc).lower().strip().split())
    return reduce(concat, clean)

In [11]:
clean_tingting = clean_tingting_descriptions(tingtingasmr)
clean_tingting = filter_single_letters(clean_tingting)
cleaned_descriptions[next(asmrtists_names)] = clean_tingting

### GentleWhispering ASMR

In [12]:
def clean_gentle_descriptions(description):
    clean = []
    for desc in description:
        desc = re.sub('htt.*?\\n','', desc.split('This video is created for')[0]).split('Products')[0]
        desc = re.sub("'", '', desc)
        desc = re.sub(r'\\n', ' ', desc)
        desc = re.sub('\W', ' ', desc)
        desc = re.sub('\d', ' ', desc)
        clean.append(re.sub('\s{2,}', ' ', desc).lower().strip().split())
    return reduce(concat, clean)

In [13]:
clean_gentle = clean_gentle_descriptions(gentlewhisperingasmr)
clean_gentle = filter_single_letters(clean_gentle)
cleaned_descriptions[next(asmrtists_names)] = clean_gentle

In [14]:
top_descriptions.keys()

dict_keys(['massageasmr', 'tingtingasmr', 'gentlewhisperingasmr', 'whispersredasmr', 'gibiasmr', 'asmrrequests', 'asmrdarling', 'asmrsurge', "jojo'sasmr", 'zachchoiasmr'])

### WhispersRed ASMR

In [15]:
def clean_red_descriptions(description):
    clean = []
    for desc in description:
        desc = desc.split('---')[0].split('My ASMR Book')[0].split('Please LIKE')[0]
        desc = re.sub('htt.*?\\n','', desc)
        desc = re.sub("'", '', desc)
        desc = re.sub(r'\\n', ' ', desc)
        desc = re.sub('\W', ' ', desc)
        desc = re.sub('\d', ' ', desc)
        clean.append(re.sub('\s{2,}', ' ', desc).lower().strip().split())
    return reduce(concat, clean)

In [16]:
clean_red = clean_red_descriptions(whispersredasmr)
clean_red = filter_single_letters(clean_red)
cleaned_descriptions[next(asmrtists_names)] = clean_red

### Gibi ASMR

In [17]:
def clean_gibi_descriptions(description):
    clean = []
    for desc in description:
        desc = desc.split('---')[0]
        desc = re.sub('htt.*?\\n','', desc)
        desc = re.sub(r'\\n', ' ', desc)
        desc = re.sub('\W', ' ', desc)
        desc = re.sub('\d', ' ', desc)
        clean.append(re.sub('\s{2,}', ' ', desc).lower().strip().split())
    return reduce(concat, clean)

In [18]:
clean_gibi = clean_gibi_descriptions(gibiasmr)
clean_gibi = filter_single_letters(clean_gibi)
cleaned_descriptions[next(asmrtists_names)] = clean_gibi

In [32]:
gibiasmr

["Hey everybody (: I darkened this video footage a bit as a little test to make this tapping video a bit more sleep-inducing! Let me know if it's something you like! Excuse the fact that I look a bit uh...ratchet in this video. My skin has been revolting against me. *sigh*\nI wanted to shoot a classic trigger video because my natural nails are the longest they've ever been and lord knows I'm going to accidentally break them sooner rather than later so... whee! There's not much to say, but I hope you enjoy the whispered rambling and tapping/scratching of some new and some familiar objects C:\nTodoroki charm: https://www.etsy.com/listing/57091790...\nTIMESTAMPS! \n0:00 Intro\n1:00 Toaster Coaster\n7:09 iPhone\n14:50 Faux Snakeskin\n21:26 Moisturizer Jar\n------------------------------------------------------------------------------------------\nMy upload schedule:\n►Every Tuesday\n►Every Thursday\n►Every Saturday\n--------------------------------------------------------------------------

### ASMR Requests

In [19]:
def clean_requests_descriptions(description):
    clean = []
    for desc in description:
        desc = desc.split('For those who are interested')[0]
        desc = re.sub('htt.*?\\n','', desc)
        desc = re.sub(r'\\n', ' ', desc)
        desc = re.sub('\W', ' ', desc)
        desc = re.sub('\d', ' ', desc)
        clean.append(re.sub('\s{2,}', ' ', desc).lower().strip().split())
    return reduce(concat, clean)

In [20]:
clean_requests = filter_single_letters(clean_requests_descriptions(asmrrequests))
cleaned_descriptions[next(asmrtists_names)] = clean_requests

### ASMR Darling

In [21]:
def clean_darling_descriptions(description):
    clean = []
    for desc in description:
        desc = desc.split('~')[0]
        desc = re.sub('\W', ' ', desc)
        desc = re.sub('n*\d', '', desc).strip()
        clean.append(desc.replace('  ', ' ').replace('  ', ' ').split())
    return reduce(concat, clean)

In [22]:
clean_darling = filter_single_letters(clean_darling_descriptions(asmrdarling))
cleaned_descriptions[next(asmrtists_names)] = clean_darling

### ASMRsurge

In [23]:
def clean_surge_descriptions(descriptions):
    clean = []
    for desc in descriptions:
        desc = desc.split('Patreons')[0].split('Patreon')[0].split('Patron')[0]
        desc = desc.split('notifications')[0].split('notification')[0]
        desc = re.sub('htt.*?\\n','', desc)
        desc = re.sub("'", '', desc)
        desc = re.sub(r'\\n', ' ', desc)
        desc = re.sub('\W', ' ', desc)
        desc = re.sub('\d', ' ', desc)
        clean.append(re.sub('\s{2,}', ' ', desc).lower().strip().split())
    return reduce(concat, clean)

In [24]:
clean_surge = clean_surge_descriptions(asmrsurge)
clean_surge = filter_single_letters(clean_surge)
cleaned_descriptions[next(asmrtists_names)] = clean_surge

### Jojo's ASMR

In [25]:
def clean_jojo_descriptions(jojo_description):
    clean = []
    for desc in jojo_description:
        desc = desc.split('Instagram')[0].split('\\nInstagram')[0]
        desc = re.sub('htt.*?\\n','', desc)
        desc = re.sub(r'\\n', ' ', desc)
        desc = re.sub('\W', ' ', desc)
        desc = re.sub('\d', ' ', desc)
        clean.append(re.sub('\s{2,}', ' ', desc).lower().strip().split())
    return reduce(concat, clean)

In [26]:
clean_jojo = filter_single_letters(clean_jojo_descriptions(top_descriptions['jojo\'sasmr']))
cleaned_descriptions[next(asmrtists_names)] = clean_jojo

### Zach Choi ASMR

In [27]:
def clean_zach_descriptions(description):
    clean = []
    for desc in description:
        desc = desc.split('http')[0]
        desc = re.sub('https://.+.com/', ' ', desc)
        desc = re.sub(r'\\n', ' ', desc)
        desc = re.sub('\W', ' ', desc)
        desc = re.sub('\d', ' ', desc)
        clean.append(re.sub('\s{2,}', ' ', desc).lower().strip().split())
    return reduce(concat, clean)

In [28]:
clean_zach = filter_single_letters(clean_zach_descriptions(zachchoiasmr))
cleaned_descriptions[next(asmrtists_names)] = clean_zach

In [31]:
clean_zach

['most',
 'popular',
 'food',
 'for',
 'asmr',
 'with',
 'stephanie',
 'soo',
 'honeycomb',
 'aloe',
 'vera',
 'tanghulu',
 'macarons',
 'our',
 'video',
 'on',
 'stephanie',
 'channel',
 'asmr',
 'most',
 'popular',
 'foods',
 'for',
 'asmr',
 'kfc',
 'onion',
 'rings',
 'mozzarella',
 'corn',
 'dog',
 'chicken',
 'nuggets',
 '먹방',
 'check',
 'out',
 'my',
 'instagram',
 'asmr',
 'hot',
 'cheetos',
 'mozzarella',
 'corn',
 'dogs',
 'with',
 'stephanie',
 'soo',
 'buy',
 'my',
 'merch',
 'asmr',
 'nuclear',
 'fire',
 'noodles',
 'with',
 'trisha',
 'paytas',
 'nikocado',
 'avocado',
 'no',
 'talking',
 'mukbang',
 '먹방',
 'subscribe',
 'to',
 'nick',
 'asmr',
 'nuclear',
 'fire',
 'stretchy',
 'cheese',
 'chicken',
 'wings',
 'mukbang',
 '먹방',
 'cooking',
 'eating',
 'sounds',
 'check',
 'out',
 'my',
 'instagram',
 'subscribe',
 'to',
 'nikocado',
 'avocado',
 'expectations',
 'vs',
 'reality',
 'with',
 'hyunee',
 'eats',
 'nuclear',
 'fire',
 'noodle',
 'mukbang',
 '먹방',
 'subscribe'

##### Save json file

In [29]:
# with open('cleaned_descriptions.json', 'w') as file:
#     json.dump(cleaned_descriptions, file)