# Create A YouTube Name

## Scrape From Social Blade

In [1]:
import requests
import pandas as pd
from bs4 import BeautifulSoup as bs

In [2]:
bad_url = r"https://socialblade.com/youtube/top/trending/bottom-500-channels-30-days/most-unsubscribed"
good_url = r"https://socialblade.com/youtube/top/trending/top-500-channels-30-days/most-subscribed"

In [3]:
def search_style(div, style_dict):
    """Search a HTML String for a given style"""
    div_style = div.get_attribute_list("style")[0]
    if div_style:
        style_pair_list = [[j.strip() for j in i.strip().split(":")] for i in div_style.split(";") if i.strip()]
        c_div_style_dict = {pair[0]:pair[1] for pair in style_pair_list}
        for k,v in style_dict.items():
            if k not in c_div_style_dict:
                return False
            if v != c_div_style_dict[k]:
                return False
        return True
    return False

In [4]:
def get_details(row):
    """Pass row of HTML from SocialBlade Table, return Tuple of Name, Subs, Views"""
    name = row.find_all("a")[0].string
    stats = [c_div for c_div in row.find_all("div") if search_style(c_div, {"width":"150px"})]
    subs, views = [int(c_stat.text.strip().replace(",","").replace("--", "0")) for c_stat in stats]
    return name, subs, views

In [5]:
def get_socialblade(url):
    """Pass SpcialBlade URL, return pandas DataFrame of data"""
    # TargetStyle is the normal width of the SocialBlade data table
    target_style = {"width":"860px"}
    
    results = requests.get(url)
    content = results.content
    soup = bs(content, "html.parser")
    divs = soup.find_all("div")
    rows = [c_div for c_div in divs if search_style(c_div, target_style)][2:]
    table_data = [get_details(c_row) for c_row in rows]
    return pd.DataFrame(table_data, columns = ["Name", "Subs", "Views"])

## Prepare Names for NN

In [36]:
import numpy as np

In [63]:
def encode(string, valid_chars = None, max_len=36):
    """Convert a string into a matrix of one-hot encoded character vectors"""
    if not valid_chars:
        letters = "abcdefghijklmnopqrstuvwxyz"
        letters += letters.upper()
        special_chars = r" !@#$%^&*()_+-={}[]:;<,>.?/\`~'" + '"'
        numbers = "".join([str(i) for i in range(10)])
        valid_chars = letters + special_chars + numbers
    valid_char_ct = len(valid_chars)
    
    output = []
    for i in string:
        c_letter = np.zeros(valid_char_ct)
        c_letter[valid_chars.index(i)] = 1
        output.append(c_letter)
    while len(output)<max_len:
        output.append(np.zeros(valid_char_ct))
    return np.array(output)

In [83]:
def decode(input_matrix, valid_chars = None):
    """Convert a matrix of one-hot encoded character vectors into a string"""
    if not valid_chars:
        letters = "abcdefghijklmnopqrstuvwxyz"
        letters += letters.upper()
        special_chars = r" !@#$%^&*()_+-={}[]:;<,>.?/\`~'" + '"'
        numbers = "".join([str(i) for i in range(10)])
        valid_chars = letters + special_chars + numbers
    output = ""
    for i in input_matrix:
        try:
            index = np.where(i==1)[0][0]
        except IndexError:
            return output
        output += valid_chars[index]
    return output