In [None]:
import requests
from bs4 import BeautifulSoup
from datetime import datetime
from dateutil import parser
import time
import json
import pandas as pd
from IPython.display import clear_output
from dotenv import dotenv_values
import selenium.webdriver as webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

In [None]:
browser = webdriver.Chrome()
main_url = 'https://dribbble.com/'
search_url = main_url + '/search/search=financial+dashboard'
username = dotenv_values('.env')['USERNAME']
password = dotenv_values('.env')['PASSWORD']

browser.get(search_url)
WebDriverWait(browser, 20).until(EC.element_to_be_clickable((By.CLASS_NAME, 'js-site-nav-sign-in'))).click()
time.sleep(1)

In [None]:
browser.find_element(By.ID, 'login').send_keys(username)
browser.find_element(By.ID, 'password').send_keys(password)
browser.find_element(By.XPATH, '//*[@id="main-container"]/section[2]/main/div[1]/div[2]/form/input[4]').click()

In [None]:
s = WebDriverWait(browser, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'loading-more'))).get_attribute("outerHTML")
n = 0
while 'Loading more…' in s:
    n += 1
    try:
        s = browser.find_element(By.CLASS_NAME, 'loading-more').get_attribute("outerHTML")
        WebDriverWait(browser, 10).until(EC.element_to_be_clickable((By.XPATH, '//*[@id="main"]/div[2]/a'))).click()
    except: 
        time.sleep(1)
    browser.execute_script('window.scrollTo(0,document.body.scrollHeight)')
    time.sleep(1)
    clear_output(wait=True)
    print(n)

In [None]:
entries = BeautifulSoup(
    browser.find_element(By.ID, 'wrap-inner').get_attribute("outerHTML"), "lxml"
).find_all('li', class_='shot-thumbnail')
browser.close()
print(f'found {len(entries)} entries')

In [None]:
id, user, href, img_url = [], [], [], []

for entry in entries:
    keywords = [x.lower() for x in entry.find('img')['alt'].split()]
    if not 'mobile' in keywords:
        href_short = entry.find('a', class_='shot-thumbnail-link')['href']
        id_proxy = href_short.split('/')[-1].split('-')[0]
        if id_proxy.isnumeric():
            id.append(id_proxy)
            img_url.append(entry.find('img')['src'].split('?')[0])   
            href.append(main_url + href_short)
            user.append(entry.find('div', class_ = 'user-information').find('img')['alt'])
print(f'found {len(id)} dashboards')

In [None]:
title, posted, views, saves, likes, comments = [], [], [], [], [], []

browser = webdriver.Chrome()
n = 0

for url in href:
    n += 1
    browser.get(url)
    WebDriverWait(browser, 20).until(EC.element_to_be_clickable((By.CLASS_NAME, 'js-site-nav-sign-in')))

    title.append(browser.find_element(By.TAG_NAME, 'h1').text)

    buttons = browser.find_elements(By.TAG_NAME, 'button')
    for b in buttons:
        if b.get_attribute('data-original-title') == 'Shot details':
            b.click()
            break
    modal = WebDriverWait(browser, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'modal-content')))
    data = []
    while len(data) == 0:
        try:
            data = modal.text.split('\n')[1::2][:5]
        except:
            time.sleep(1)
    posted.append(str(parser.parse(data[0], fuzzy=True).date())

    data = [int(''.join(filter(str.isdigit, d))) for d in data[1:]]
    views.append(data[0])
    saves.append(data[1])
    likes.append(data[2])
    comments.append(data[3])
    
    clear_output(wait=True)
    print(n)
    
browser.close()

In [None]:
df = pd.DataFrame({
    'id': id, 'user': user, 'title': title,
    'posted': posted, 'views': views, 'saves': saves, 'likes': likes, 'comments': comments,
    'url': img_url
})
df.to_csv('dribbble/db.csv')

In [None]:
for idx, row in df.iterrows():
    response = requests.get(row['url'])
    if response.status_code == 200:
        file_name = 'dribbble/' + '.'.join([row['id'], row['url'].split('.')[-1]])
        with open(file_name, "wb") as f:
            f.write(response.content)
    else:
        print(response.status_code)
        break
    clear_output(wait=True)
    print(idx+1)

In [None]:
df = pd.read_csv('dribbble/db.csv')

In [None]:
df.head()

In [None]:
import matplotlib.pyplot as plt

In [None]:
import os

In [None]:
dir_path = r'dribbble'

In [None]:
files = [x for x in os.listdir(dir_path) if '.' in x]
ids = [x.split('.')[0] for x in files]
ids = [int(x) for x in ids if x.isnumeric()]

In [None]:
df = df.loc[df['id'].isin(ids)].reset_index(drop=True)

In [None]:
plt.plot(df.views, df.likes, '.b')
plt.xlabel('Views')
plt.ylabel('Likes')
plt.show()

In [None]:
plt.hist(df.likes/df.views, 100, density=True, color='b')
plt.xlabel('Conversion')
plt.ylabel('PDF')
plt.show()

In [None]:
z = df.groupby(by='user').id.count().sort_values(ascending=False)[:25]
zz = df.groupby(by='user')[['likes', 'views']].sum()
zz = zz.join(pd.DataFrame({'user': z.index, 'count': z.values}).set_index('user')).dropna().sort_values(by='count', ascending=False)
names = [''.join(char for char in x if ord(char) < 128).rstrip() for x in zz.index]

fig = plt.figure(figsize=(10, 6))
ax1 = fig.add_subplot()
ax1.bar(names, zz['count'], color='b')
ax1.set_xticks(ax1.get_xticks(), names, rotation=90, ha='right')
ax1.set_ylabel('Total Submissions', labelpad=10, fontsize=16)
ax2 = ax1.twinx()
ax2.plot(names, zz.likes/zz.views, 'ro-')
ax2.set_ylim(ymin=0)
ax2.set_ylabel('Conversion', labelpad=10, fontsize=16)

plt.show()