# Bob Ross Paintings Database Creation

In [1]:
import db_caller
import requests
import bs4
import pandas as pd
import numpy as np
import time
import re
from urllib.parse import urlparse
from tqdm.notebook import tqdm

In [2]:
conn = db_caller.connect("LRFinal.db")
proxies = {'http': 'http://206.189.157.23'}

## Bob Ross Painting Links

In [None]:
painting_list = []
url = urlparse('https://www.twoinchbrush.com/')
next_page = 'https://www.twoinchbrush.com/all-paintings'

while True:
    print(f'On {next_page}')
    time.sleep(np.random.choice(np.arange(1,8)))
    resp = requests.get(next_page, proxies=proxies)
    soup = bs4.BeautifulSoup(resp.text)
    for i in soup.select('.col-6.col-md-4.col-lg-3.mb-4 a'):
        info = i.select_one('span').text
        if bool(re.match(r'S\d+E\d', info)):
            info = re.split(r'S|E', info)
            season_info = info[1]
            episode_info = info[2]
            title = i.select_one('span.painting-card__title').text.strip()
            link = url._replace(path=i['href']).geturl()
            painting_list.append([title, season_info, episode_info, link])

    next_page = soup.select_one('ul.pagination a[rel="next"]')
    if next_page:
        next_page = next_page['href']
    else:
        break

On https://www.twoinchbrush.com/all-paintings
On https://www.twoinchbrush.com/all-paintings?page=2
On https://www.twoinchbrush.com/all-paintings?page=3
On https://www.twoinchbrush.com/all-paintings?page=4
On https://www.twoinchbrush.com/all-paintings?page=5
On https://www.twoinchbrush.com/all-paintings?page=6
On https://www.twoinchbrush.com/all-paintings?page=7
On https://www.twoinchbrush.com/all-paintings?page=8
On https://www.twoinchbrush.com/all-paintings?page=9
On https://www.twoinchbrush.com/all-paintings?page=10
On https://www.twoinchbrush.com/all-paintings?page=11
On https://www.twoinchbrush.com/all-paintings?page=12
On https://www.twoinchbrush.com/all-paintings?page=13
On https://www.twoinchbrush.com/all-paintings?page=14
On https://www.twoinchbrush.com/all-paintings?page=15
On https://www.twoinchbrush.com/all-paintings?page=16


In [5]:
cols = ['Title', 'Season', 'Episode', 'Link']
painting_df = pd.DataFrame(painting_list, columns=cols)
display(painting_df)

Unnamed: 0,Title,Season,Episode,Link
0,A Walk in the Woods,1,1,https://www.twoinchbrush.com/painting/a-walk-i...
1,Mt. McKinley,1,2,https://www.twoinchbrush.com/painting/mt-mckinley
2,Ebony Sunset,1,3,https://www.twoinchbrush.com/painting/ebony-su...
3,Winter Mist,1,4,https://www.twoinchbrush.com/painting/winter-mist
4,Quiet Stream,1,5,https://www.twoinchbrush.com/painting/quiet-st...
...,...,...,...,...
112,Mountain Path,9,9,https://www.twoinchbrush.com/painting/mountain...
113,Country Charm,9,10,https://www.twoinchbrush.com/painting/country-...
114,Nature's Paradise,9,11,https://www.twoinchbrush.com/painting/natures-...
115,Mountain by the Sea,9,12,https://www.twoinchbrush.com/painting/mountain...


## Bob Ross Painting Details

**WARNING:** the following cell will take ~15 minutes to run

In [20]:
painting_details = []

for painting_link in tqdm(painting_df['Link']):
    time.sleep(np.random.choice(np.arange(1,8)))
    resp = requests.get(painting_link, proxies=proxies)
    soup = bs4.BeautifulSoup(resp.text)

    tags, colors = [i.text for i in soup.select('.color-list')]
    tags = re.split(r'\n', tags.strip().casefold())
    colors =[i.strip().casefold() for i in re.split(r'\n',
                                                    colors.strip()) if i]
    painting_details.append([painting_link, tags, colors])

  0%|          | 0/117 [00:00<?, ?it/s]

In [27]:
cols = ['Link', 'Tags', 'Colors']
details_df = pd.DataFrame(painting_details, columns=cols)
display(details_df.head())

Unnamed: 0,Link,Tags,Colors
0,https://www.twoinchbrush.com/painting/a-walk-i...,"[forest, autumn, sunset/sunrise, landscape, la...","[alizarin crimson, bright red, cadmium yellow,..."
1,https://www.twoinchbrush.com/painting/mt-mckinley,"[mountains, foothills, forest, summer, cloudy,...","[alizarin crimson, bright red, cadmium yellow,..."
2,https://www.twoinchbrush.com/painting/ebony-su...,"[forest, autumn, dark background, sunset/sunri...","[alizarin crimson, black gesso, bright red, ca..."
3,https://www.twoinchbrush.com/painting/winter-mist,"[mountains, foothills, forest, winter, stormy,...","[prussian blue, titanium white, van dyke brown]"
4,https://www.twoinchbrush.com/painting/quiet-st...,"[mountains, foothills, forest, rocks, spring, ...","[alizarin crimson, bright red, cadmium yellow,..."


In [26]:
bobross_df = pd.merge(painting_df, details_df, on='Link')
display(bobross_df.head())

Unnamed: 0,Title,Season,Episode,Link,Tags,Colors
0,A Walk in the Woods,1,1,https://www.twoinchbrush.com/painting/a-walk-i...,"[forest, autumn, sunset/sunrise, landscape, la...","[alizarin crimson, bright red, cadmium yellow,..."
1,Mt. McKinley,1,2,https://www.twoinchbrush.com/painting/mt-mckinley,"[mountains, foothills, forest, summer, cloudy,...","[alizarin crimson, bright red, cadmium yellow,..."
2,Ebony Sunset,1,3,https://www.twoinchbrush.com/painting/ebony-su...,"[forest, autumn, dark background, sunset/sunri...","[alizarin crimson, black gesso, bright red, ca..."
3,Winter Mist,1,4,https://www.twoinchbrush.com/painting/winter-mist,"[mountains, foothills, forest, winter, stormy,...","[prussian blue, titanium white, van dyke brown]"
4,Quiet Stream,1,5,https://www.twoinchbrush.com/painting/quiet-st...,"[mountains, foothills, forest, rocks, spring, ...","[alizarin crimson, bright red, cadmium yellow,..."


In [56]:
tag_list = []
color_list = []

for tags in bobross_df['Tags']:
    for tag in tags:
        if tag not in tag_list:
            tag_list.append(tag)
for colors in bobross_df['Colors']:
    for color in colors:
        if color not in color_list:
            color_list.append(color)

tag_df = pd.DataFrame(0, index=np.arange(len(bobross_df)),
                      columns=tag_list+color_list)
print(tag_df.shape)
display(tag_df.head())

(117, 65)


Unnamed: 0,forest,autumn,sunset/sunrise,landscape,lake/pond,conifer tree,deciduous tree,bushes,mountains,foothills,...,van dyke brown,black gesso,burnt umber,indian yellow,phthalo blue,yellow ochre,liquid black,midnight black,liquid clear,dark sienna
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [57]:
for ind, tags in enumerate(bobross_df['Tags']):
    for tag in tags:
        tag_df[tag][ind] = 1
for ind, colors in enumerate(bobross_df['Colors']):
    for color in colors:
        tag_df[color][ind] = 1

In [65]:
bobross_final_df = pd.merge(bobross_df,
                            tag_df,
                            left_index=True,
                            right_index=True)
bobross_final_df.drop(['Tags', 'Colors'], axis=1, inplace=True)

In [66]:
bobross_final_df

Unnamed: 0,Title,Season,Episode,Link,forest,autumn,sunset/sunrise,landscape,lake/pond,conifer tree,...,van dyke brown,black gesso,burnt umber,indian yellow,phthalo blue,yellow ochre,liquid black,midnight black,liquid clear,dark sienna
0,A Walk in the Woods,1,1,https://www.twoinchbrush.com/painting/a-walk-i...,1,1,1,1,1,1,...,1,0,0,0,0,0,0,0,0,0
1,Mt. McKinley,1,2,https://www.twoinchbrush.com/painting/mt-mckinley,1,0,0,1,1,1,...,1,0,0,0,0,0,0,0,0,0
2,Ebony Sunset,1,3,https://www.twoinchbrush.com/painting/ebony-su...,1,1,1,1,1,1,...,1,1,0,0,0,0,0,0,0,0
3,Winter Mist,1,4,https://www.twoinchbrush.com/painting/winter-mist,1,0,0,1,0,1,...,1,0,0,0,0,0,0,0,0,0
4,Quiet Stream,1,5,https://www.twoinchbrush.com/painting/quiet-st...,1,0,0,1,0,0,...,1,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
112,Mountain Path,9,9,https://www.twoinchbrush.com/painting/mountain...,1,1,0,1,1,1,...,1,0,0,1,1,1,0,1,0,1
113,Country Charm,9,10,https://www.twoinchbrush.com/painting/country-...,1,0,0,1,0,1,...,1,0,0,1,1,1,0,1,0,1
114,Nature's Paradise,9,11,https://www.twoinchbrush.com/painting/natures-...,0,0,0,1,1,0,...,1,0,0,1,1,1,0,1,0,1
115,Mountain by the Sea,9,12,https://www.twoinchbrush.com/painting/mountain...,1,1,0,1,0,1,...,1,0,0,0,1,1,0,1,1,1


## Loading Bob Ross Painting Data

In [69]:
try:
    bobross_final_df.to_sql('paintings', conn, if_exists='fail', index=False)
except ValueError:
    print('Already in database.')

Already in database.
