In [1]:
import os
import requests
import json
import pandas as pd

In [2]:
# set folders
RESULTS_FOLDER = '.\\Results'

if not os.path.exists(RESULTS_FOLDER):
    os.makedirs(RESULTS_FOLDER)

In [3]:
# get request
# see API at the bottom of https://alternative.me/crypto/fear-and-greed-index/
page=requests.get('https://api.alternative.me/fng/?limit=0&date_format=world')

# convert to json and extract to DataFrame
data=json.loads(page.content)['data']
df_download=pd.DataFrame()
for item in data:
    df_download=pd.concat([df_download, pd.DataFrame({'date': item['timestamp'], 'index': item['value'],
                                                        'classification': item['value_classification']}, index=[0])])
df_download['date']=pd.to_datetime(df_download['date'], infer_datetime_format=True)

In [4]:
df_download

Unnamed: 0,date,index,classification
0,2023-04-24,53,Neutral
0,2023-04-23,56,Greed
0,2023-04-22,53,Neutral
0,2023-04-21,50,Neutral
0,2023-04-20,52,Neutral
...,...,...,...
0,2018-02-05,11,Extreme Fear
0,2018-02-04,24,Extreme Fear
0,2018-02-03,40,Fear
0,2018-02-02,15,Extreme Fear


In [5]:
df_download.to_csv(os.path.join(RESULTS_FOLDER, '99_Crypto_Fear_and_Greed_Index.csv'), index=False, sep=';')

# Alternative way to scrape directly from ChartJS

In [1]:
from seleniumwire import webdriver
from seleniumwire.utils import decode
import json
import time
import pandas as pd

CHROMEDRIVER_PATH = r"C:\Users\Alessandro Bitetto\Downloads\UniPV\ICOs\WebDriver\chromedriver"

In [None]:
url='https://alternative.me/crypto/fear-and-greed-index/'
requested_item_url='https://alternative.me/api/crypto/fear-and-greed-index/history'

# have a look at https://www.youtube.com/watch?v=i9N_LrnDUnY&ab_channel=StevesieData from minute 2:00 to understand
# how to locate the url of the chart you want to extract. It will be "requested" and the Network tab in inspect mode
# will record the interaction and then you can extract the response. Otherwise see the first example in 
# https://pypi.org/project/selenium-wire/ to get the full list of Network interactions

In [None]:
# load page and scroll down (to load the chart)
driver = webdriver.Chrome(executable_path=CHROMEDRIVER_PATH)
driver.get(url)
time.sleep(3)
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(1)

# press "Max" button in the chart
driver.find_element("xpath", "/html/body/div/main/section/div/div[5]/div[2]/div/span[5]").click()

# loop request and extract the chart data as json
downloaded_data=[]
for request in driver.requests:
    if request.response:
        if request.url == requested_item_url:
            print(f'Found: {request.url}')
            body = decode(request.response.body, request.response.headers.get('Content-Encoding', 'identity'))
            downloaded_data.append(json.loads(body))
print(f'\nTotal data downloaded: {len(downloaded_data)}')

In [None]:
# convert json to DataFrame
converted_dataframe=[]
for data in downloaded_data:
    converted_dataframe.append(pd.DataFrame({'dates': data['data']['labels'], 'index': data['data']['datasets'][0]['data']}))

In [1]:
from seleniumwire import webdriver
from seleniumwire.utils import decode
import json
import time
import pandas as pd


import requests
from bs4 import BeautifulSoup
from soup2dict import convert
import numpy as np
from utils import get_social_series, get_chromedriver
import re

CHROMEDRIVER_PATH = r"C:\Users\Alessandro Bitetto\Downloads\UniPV\ICOs\WebDriver\chromedriver"

In [13]:
def scrape_info_icomarks(url='', chromedriver_path='', skip_social=False, skip_price=False):
    
    '''
    - skip_social: if True skip social users' timeseries download (takes time and uses WebDriver)
    - skip_price: if True skip market price timeseries download (takes time and uses WebDriver)
    
    Better to allow or deny both.
    '''
    
    add_row=pd.DataFrame()
    
    #### request page
    page = requests.get(url)
    soup = BeautifulSoup(page.content, 'html.parser')


    #### page screenshot date
    add_row['url']=[url]
    try:
        tag = soup.find_all('div', class_="swimm-panel-bottom__links", recursive=True)
        conv_dict = convert(tag)
        add_row['PageScreenshot']=[conv_dict['div'][0]['#text']]
    except:
        pass


    #### extract rating

    tag = soup.find_all('div', class_="ico-rating-content", recursive=True)
    conv_dict = convert(tag)
    if len(conv_dict) > 0:
        # overall rating
        ind=np.where([x['@class'] == ['ico-rating-overall'] for x in conv_dict['div'][0]['div']])[0][0]
        value=conv_dict['div'][0]['div'][ind]['#text']   
        add_row['Rating_TOTAL_SCORE']=[value]
        # rating component
        for t in conv_dict['div'][0]['div'][2]['div']:
            if '@class' in t.keys():
                if t['@class'] == ['ico-rating__item']:
                    ind=np.where([x['@class'] == ['ico-rating__title'] for x in t['div']])[0][0]
                    name=t['div'][ind]['#text']
                    name=re.sub('ICO |STO |IEO ', '', name)
                    ind=np.where([x['@class'] == ['ico-rating__circle'] for x in t['div']])[0][0]
                    value=t['div'][ind]['#text']
                    add_row['Rating_'+name.replace(' ', '_')]=[value]


    #### extract "Detail" tab blocks

    tag = soup.find_all('div', class_="icoinfo-block", recursive=True)
    conv_dict = convert(tag)

    block_log={}
    block_df=pd.DataFrame()
    for t in conv_dict['div']:

        # block name
        ind=np.where([x['@class'] == ['icoinfo-block__title'] for x in t['div']])[0][0]
        block_name=t['div'][ind]['#text']

        # extract block contents
        try:
            ind=np.where([x['@class'] == ['icoinfo-block-content'] for x in t['div']])[0][0]
            block_log[block_name]=t['div'][ind]['#text']   # save log of displayed items
            block_dict=t['div'][ind]['div']
        except:   # means tab of social media
            block_dict=t['div']

        block_t=pd.DataFrame()
        for t1 in block_dict:
            if t1['@class'] == ['icoinfo-block__item']:
                item_name=t1['span'][0]['#text'].replace(':', '')
                item_url=None
                if 'a' in t1.keys():
                    if '@href' in t1['a'][0]:
                        item_url=t1['a'][0]['@href']                
                    item_display=t1['a'][0]['navigablestring']
                elif 'i' in t1.keys():
                    if 'navigablestring' in t1['i'][0]:
                        item_display=t1['i'][0]['navigablestring']
                else:
                    try:
                        item_display=t1['navigablestring']
                    except:
                        item_display=None
                if type(item_display) == list:
                    if len(item_display) > 1:
                        item_display='; '.join(item_display)
                    else:
                        item_display=item_display[0]
                block_t=pd.concat([block_t, pd.DataFrame({'BlockName': block_name,
                                                         'Item': item_name,
                                                         'ItemUrl': item_url,
                                                         'ItemValue': item_display}, index=[0])])

        block_df=pd.concat([block_df, block_t])
    add_row['InfoBlock']=[block_df]


    #### Get team size and members
    
    try:
        tag = soup.find_all('a', href=True, recursive=True)
        conv_dict = convert(tag)
        ind=np.where([x['@href'] == '#team' for x in conv_dict['a']])[0][0]
        team_size=int(conv_dict['a'][ind]['#text'].replace('Team (', '').replace(')', ''))
        # check if Advisors
        advisor_size=int(soup.findAll(string = re.compile('Advisors \('))[0].replace('Advisors (', '').replace(')', ''))
        loop_max = 2 if advisor_size != 0 else 1
        # extract Team and Advisors
        tag = soup.find_all('div', class_='company-team', recursive=True)
        conv_dict = convert(tag)
        team_df=pd.DataFrame()
        for team_ind in range(loop_max):    # 0 = Team  1 = Advisor
            team_lab = 'Team' if team_ind == 0 else 'Advisor'
            for t in conv_dict['div'][team_ind]['div']:
                person_name=t['div'][0]['#text']
                person_role=t['div'][1]['#text']
                person_extra=None
                if t['div'][2]['@class'] == ['company-team__post']:
                    person_extra=t['div'][2]['#text']
                if ['company-team__links'] in [x['@class'] for x in t['div']]:
                    ind=np.where([x['@class'] == ['company-team__links'] for x in t['div']])[0][0]
                    person_link=[x['@href'] for x in t['div'][ind]['a']]
                else:
                    person_link=None
                team_df=pd.concat([team_df, pd.DataFrame({'Member': team_lab,
                                                          'Name': person_name,
                                                          'Role': person_role,
                                                          'Extra': person_extra,
                                                          'Links': [person_link]}, index=[0])])
        if team_size != team_df[team_df['Member']=='Team'].shape[0]:
            print(f"- {url} mismatch between 'TeamSize' and extracted team members")
        if advisor_size != team_df[team_df['Member']=='Advisor'].shape[0]:
            print(f"- {url} mismatch between 'AdvisorSize' and extracted advisor members")
        add_row['TeamSize']=team_size
        add_row['AdvisorSize']=advisor_size
        add_row['TeamBlock']=[team_df]
    except:
        pass


    #### Get Social Rating and users timeseries

    tag = soup.find_all('div', class_='companyTab companyTab_social', recursive=True)
    conv_dict = convert(tag)

    if len(conv_dict) > 0:
        social_df=pd.DataFrame()
        try:
            for t in conv_dict['div'][0]['div'][0]['div']:
                if t['@class'] == ['social-item']:
                    social_name=t['div'][0]['div'][0]['#text']
                    total_user=int(t['div'][1]['div'][2]['#text'].replace(',', ''))
                    rating=t['div'][1]['div'][3]['#text']
                    social_df=pd.concat([social_df, pd.DataFrame({'Social': social_name,
                                                                 'Users': total_user,
                                                                 'Rating': rating}, index=[0])])
        except:
            pass
        
        # download chart data
        if not skip_social:
            driver = get_chromedriver(chromedriver_path=chromedriver_path)
            driver.get(url)
            series_status, series_dict=get_social_series(driver=driver, tot_series=social_df.shape[0])
            series_status = 'DOWNLOADED' if len(series_dict) != 0 else series_status
        else:
            series_dict={}
            series_status='DOWNLOAD_SKIPPED'

        add_row['SocialWithRating']=social_df.shape[0]
        add_row['SocialSeriesStatus']=series_status
        add_row['SocialBlock']=[[{'stats': social_df, 'timeseries': series_dict}]]
        
    
    #### Get Market Price timeseries
    
    if not skip_price:
        if 'driver' not in locals():
            driver = get_chromedriver(chromedriver_path=chromedriver_path)
            driver.get(url)
        series_status, series_df=get_price_series(driver)
    else:
        series_status='DOWNLOAD_SKIPPED'
        series_df=None
        
    add_row['MarketPriceSeriesStatus']=series_status
    add_row['MarketPriceSeries']=[series_df]
            
     
    if 'driver' in locals():
        driver.close()
        
    return add_row

In [57]:
page = requests.get('https://icomarks.com/ico/online')
soup = BeautifulSoup(page.content, 'html.parser')
len(soup.find_all('div', class_="companyGraph", recursive=True))

1

In [25]:
url='https://icomarks.com/ico/online'# 'https://icomarks.com/ico/blueqbit'# 'https://icomarks.com/ico/online'
add_row=scrape_info_icomarks(url=url, chromedriver_path=CHROMEDRIVER_PATH, skip_social=False)
add_row

Unnamed: 0,url,PageScreenshot,InfoBlock,TeamSize,AdvisorSize,TeamBlock,SocialWithRating,SocialSeriesStatus,SocialBlock,MarketPriceSeriesStatus,MarketPriceSeries
0,https://icomarks.com/ico/online,Visit Website Last screenshot taken on 18 Mar ...,BlockName Item \ 0 ...,11,10,Member Name ...,3,DOWNLOADED,"[{'stats': ['Social', 'Users', 'Rating'], 'tim...",DOWNLOADED,Date PriceUSD MarketCa...


In [26]:
add_row['MarketPriceSeries']

0                       Date  PriceUSD     MarketCa...
Name: MarketPriceSeries, dtype: object

In [27]:
add_row.to_json('pp.json', orient='table')

In [36]:
add_row=pd.read_json('pp.json', orient='table')

In [37]:
add_row['MarketPriceSeries']

0    [{'Date': '2018-12-03T17:34:38.000', 'PriceUSD...
Name: MarketPriceSeries, dtype: object

In [38]:
add_row['InfoBlock']=[pd.DataFrame(add_row['InfoBlock'][0])]
if 'TeamBlock' in add_row.columns:
    add_row['TeamBlock']=[pd.DataFrame(add_row['TeamBlock'][0])]
if 'SocialBlock' in add_row.columns:
    social_df=pd.DataFrame(add_row['SocialBlock'][0][0]['stats'])
    series_dict={}
    for k in add_row['SocialBlock'][0][0]['timeseries'].keys():
        series_dict[k]=pd.DataFrame(add_row['SocialBlock'][0][0]['timeseries'][k])
    add_row['SocialBlock']=[[{'stats': social_df, 'timeseries': series_dict}]]
if 'MarketPriceSeries' in add_row.columns:
    add_row['MarketPriceSeries']=[pd.DataFrame(add_row['MarketPriceSeries'][0])]

In [39]:
add_row['MarketPriceSeries']

0                            Date  PriceUSD     Mar...
Name: MarketPriceSeries, dtype: object

In [31]:
url='https://icomarks.com/ico/online'
add_row=scrape_info_icomarks(url=url, chromedriver_path=CHROMEDRIVER_PATH, skip_social=False)
add_row

  advisor_size=int(soup.findAll(text = re.compile('Advisors \('))[0].replace('Advisors (', '').replace(')', ''))


Unnamed: 0,url,PageScreenshot,InfoBlock,TeamSize,AdvisorSize,TeamBlock,SocialWithRating,SocialSeriesStatus,SocialBlock
0,https://icomarks.com/ico/online,Visit Website Last screenshot taken on 18 Mar ...,BlockName Item \ 0 ...,11,10,Member Name ...,3,DOWNLOADED,"[{'stats': ['Social', 'Users', 'Rating'], 'tim..."


In [32]:
scrape_df[scrape_df.url=='https://icomarks.com/ico/online']

Unnamed: 0,url,ScrapeStatus,PageScreenshot,Rating_TOTAL_SCORE,Rating_PROFILE_(weight:_45%),Rating_SOCIAL_ACTIVITY_(weight:_35%),Rating_TEAM_PROOF_(weight:_20%),InfoBlock,TeamSize,AdvisorSize,TeamBlock,SocialWithRating,SocialSeriesStatus,SocialBlock,PklPath,TotTimeSec,Rating_PRODUCT_(weight:_25%),Rating_BUSINESS_(weight:_25%),Rating_PROFILE_(weight:_25%),Rating_SOCIAL_ACTIVITY_(weight:_15%),Rating_TEAM_PROOF_(weight:_10%)
5978,https://icomarks.com/ico/online,OK,Visit Website Last screenshot taken on 18 Mar ...,,,,,BlockName Item \ 0 ...,11.0,10.0,Member Name ...,3.0,DOWNLOADED,"[{'stats': ['Social', 'Users', 'Rating'], 'tim...",.\Checkpoints\Icomarks\online.json,22.0,,,,,


In [15]:
url='https://icomarks.com/ieo/lcx'  # 'https://icomarks.com/ico/online'

driver = webdriver.Chrome(executable_path=CHROMEDRIVER_PATH)
driver.get(url)

In [9]:
url='https://icomarks.com/ico/online'  #'https://icomarks.com/ieo/lcx'

driver = webdriver.Chrome(executable_path=CHROMEDRIVER_PATH)
driver.get(url)

def get_price_series(driver=None):

    status=''
    count=0
    try:
        valueToClick = "All"
        button = driver.find_element('xpath',
                                     '//div[@class="companyGraph"]//div[@class="highcharts-container "]//*[name()="g" and '
                                     f'@class="highcharts-range-selector-group"]//*[name()="text" and text()="{valueToClick}"]')
    except:
        status='DOWNLOAD_NOT_AVAILABLE'
    else:
        try:
            button.click()

            status='DOWNLOADED'
            for request in driver.requests:
                if request.response:
                    if 'https://icomarks.com/graph/prices?' in request.url:
                        body = decode(request.response.body, request.response.headers.get('Content-Encoding', 'identity'))
                        data=json.loads(body)
                        count+=1
        except:
            status='DOWNLOAD_ERROR'

    series_df=None
    if count > 0:
        for col in ['prices', 'market_cap', 'h24_vol']:
            df_t=pd.DataFrame(data[col], columns =['Date', col])
            df_t['Date']=pd.to_datetime(df_t['Date'], unit='ms')
            if series_df is not None:
                series_df=series_df.merge(df_t, on='Date', how='left')
            else:
                series_df=df_t
        series_df.columns=['Date', 'PriceUSD', 'MarketCap', 'Volume24H']
    if count > 1:
        status='DOWNLOADED_BUT_MULTIPLE_SERIES'
        
    return status, series_df
    
get_price_series(driver)
    

('DOWNLOADED',
                    Date  PriceUSD     MarketCap   Volume24H
 0   2018-12-03 17:34:38  0.002840  0.000000e+00   549.00000
 1   2018-12-03 21:04:38  0.003456  3.032811e+06  1480.00000
 2   2018-12-04 03:04:36  0.004190  3.676995e+06  5112.00000
 3   2018-12-04 09:04:40  0.003905  3.427082e+06  6460.00000
 4   2018-12-04 15:11:43  0.003873  3.399135e+06  6695.34885
 ..                  ...       ...           ...         ...
 823 2019-10-06 15:59:11  0.001048  9.198154e+05     9.87989
 824 2019-10-09 21:26:11  0.001086  9.528451e+05     0.00000
 825 2019-10-10 03:13:11  0.001086  9.528451e+05     0.00000
 826 2019-10-10 09:01:08  0.001086  9.528451e+05     0.00000
 827 2019-11-18 14:59:11  0.003342  2.932805e+06     0.00000
 
 [828 rows x 4 columns])

In [2]:
url='https://icomarks.com/ico/online' # 'https://icomarks.com/ieo/lcx'

driver = webdriverWire.Chrome(executable_path=CHROMEDRIVER_PATH)
driver.get(url)

  driver = webdriverWire.Chrome(executable_path=CHROMEDRIVER_PATH)


In [3]:
count=0
for request in driver.requests:
    if request.response:
        if 'https://icomarks.com/graph/prices?' in request.url:
            body = decode(request.response.body, request.response.headers.get('Content-Encoding', 'identity'))
            data=json.loads(body)
            count+=1

AttributeError: 'WebDriver' object has no attribute 'requests'

In [50]:
df=None
for col in ['prices', 'market_cap', 'h24_vol']:
    df_t=pd.DataFrame(data[col], columns =['Date', col])
    df_t['Date']=pd.to_datetime(df_t['Date'], unit='ms')
    if df is not None:
        df=df.merge(df_t, on='Date', how='left')
    else:
        df=df_t
df.columns=['Date', 'PriceUSD', 'MarketCap', 'Volume24H']
df

Unnamed: 0,Date,PriceUSD,MarketCap,Volume24H
0,2018-12-03 17:34:38,0.002840,0.000000e+00,549.00000
1,2018-12-03 21:04:38,0.003456,3.032811e+06,1480.00000
2,2018-12-04 03:04:36,0.004190,3.676995e+06,5112.00000
3,2018-12-04 09:04:40,0.003905,3.427082e+06,6460.00000
4,2018-12-04 15:11:43,0.003873,3.399135e+06,6695.34885
...,...,...,...,...
823,2019-10-06 15:59:11,0.001048,9.198154e+05,9.87989
824,2019-10-09 21:26:11,0.001086,9.528451e+05,0.00000
825,2019-10-10 03:13:11,0.001086,9.528451e+05,0.00000
826,2019-10-10 09:01:08,0.001086,9.528451e+05,0.00000


In [40]:
df=pd.DataFrame(data['market_cap'], columns =['Date', 'Price'])
df['Date']=pd.to_datetime(df['Date'], unit='ms')
df

Unnamed: 0,Date,Price
0,2018-12-03 17:34:38,0.000000e+00
1,2018-12-03 21:04:38,3.032811e+06
2,2018-12-04 03:04:36,3.676995e+06
3,2018-12-04 09:04:40,3.427082e+06
4,2018-12-04 15:11:43,3.399135e+06
...,...,...
823,2019-10-06 15:59:11,9.198154e+05
824,2019-10-09 21:26:11,9.528451e+05
825,2019-10-10 03:13:11,9.528451e+05
826,2019-10-10 09:01:08,9.528451e+05


In [39]:
df=pd.DataFrame(data['prices'], columns =['Date', 'Price'])
df['Date']=pd.to_datetime(df['Date'], unit='ms')
df

Unnamed: 0,Date,Price
0,2018-12-03 17:34:38,0.002840
1,2018-12-03 21:04:38,0.003456
2,2018-12-04 03:04:36,0.004190
3,2018-12-04 09:04:40,0.003905
4,2018-12-04 15:11:43,0.003873
...,...,...
823,2019-10-06 15:59:11,0.001048
824,2019-10-09 21:26:11,0.001086
825,2019-10-10 03:13:11,0.001086
826,2019-10-10 09:01:08,0.001086


In [18]:
len(data['prices'])

828

In [19]:
len(data['market_cap'])

828

In [20]:
len(data['h24_vol'])

828

In [8]:
json.loads(body)

{'total': 828,
 'period': 6,
 'start': '2018-12-03 09:34:38',
 'end': '2019-11-18 06:59:11',
 'prices': [[1543858478000, 0.00283999868662],
  [1543871078000, 0.00345556209292],
  [1543892676000, 0.0041895405292],
  [1543914280000, 0.00390479185987],
  [1543936303000, 0.0038729489],
  [1543957661000, 0.0043200499],
  [1543978955000, 0.0040322799],
  [1544000855000, 0.0040551506],
  [1544022336000, 0.0034136145],
  [1544044417000, 0.0037475255],
  [1544065895000, 0.0032076297],
  [1544087316000, 0.0032673093],
  [1544109275000, 0.0035706588],
  [1544130516000, 0.0029883829],
  [1544152657000, 0.0029279603],
  [1544174017000, 0.0029086094],
  [1544195137000, 0.00272203],
  [1544216857000, 0.0028010515],
  [1544238697000, 0.0029473154],
  [1544260297000, 0.0033018724],
  [1544281537000, 0.0028359391],
  [1544303617000, 0.0028521197],
  [1544324737000, 0.0030615297],
  [1544346638000, 0.0029719669],
  [1544367757000, 0.0032825226],
  [1544390017000, 0.0031495304],
  [1544411496000, 0.002977

In [4]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import json
import time
import pandas as pd
CHROMEDRIVER_PATH = r"C:\Users\Alessandro Bitetto\Downloads\UniPV\ICOs\WebDriver\chromedriver"

In [18]:
url='https://icomarks.com/ico/online'  # https://icomarks.com/ieo/lcx

driver = webdriver.Chrome(executable_path=CHROMEDRIVER_PATH)
driver.get(url)

  driver = webdriver.Chrome(executable_path=CHROMEDRIVER_PATH)


In [14]:
valueToClick = "All"
button = driver.find_element('xpath',
                             '//div[@class="companyGraph"]//div[@class="highcharts-container "]//*[name()="g" and '
                             f'@class="highcharts-range-selector-group"]//*[name()="text" and text()="{valueToClick}"]')
button.click()

In [40]:
print('- Social Media time series status:')
display(scrape_df.SocialSeriesStatus.value_counts().to_frame())

Social Media time series status:

Unnamed: 0,SocialSeriesStatus
DOWNLOADED,6398
DOWNLOAD_ERROR,227


In [35]:
scrape_df.SocialSeriesStatus.value_counts()

DOWNLOADED        6398
DOWNLOAD_ERROR     227
Name: SocialSeriesStatus, dtype: int64

In [33]:
formatted_df=joblib.load(os.path.join(CHECKPOINT_FOLDER, 'scrape_df_raw.pkl'))

In [36]:
formatted_df[formatted_df.SocialSeriesStatus=='DOWNLOAD_ERROR']

Unnamed: 0,url,ScrapeStatus,PageScreenshot,Rating_TOTAL_SCORE,Rating_PROFILE_(weight:_45%),Rating_SOCIAL_ACTIVITY_(weight:_35%),Rating_TEAM_PROOF_(weight:_20%),InfoBlock,TeamSize,AdvisorSize,TeamBlock,SocialWithRating,SocialSeriesStatus,SocialBlock,PklPath,TotTimeSec,Rating_PRODUCT_(weight:_25%),Rating_BUSINESS_(weight:_25%),Rating_PROFILE_(weight:_25%),Rating_SOCIAL_ACTIVITY_(weight:_15%),Rating_TEAM_PROOF_(weight:_10%)
134,https://icomarks.com/ico/blueqbit,OK,Visit Website Last screenshot taken on 17 Mar ...,4.6,5,1,10,BlockName Item \ 0 Ge...,9.0,0.0,Member Name ...,0.0,DOWNLOAD_ERROR,"[{'stats': [], 'timeseries': {}}]",.\Checkpoints\Icomarks\blueqbit.json,25.0,,,,,
370,https://icomarks.com/ico/supportmarket,OK,Visit Website Last screenshot taken on 4 Mar 2...,5.5,6.9,1,10,BlockName Item \ 0 ...,9.0,0.0,Member Name ...,0.0,DOWNLOAD_ERROR,"[{'stats': [], 'timeseries': {}}]",.\Checkpoints\Icomarks\supportmarket.json,27.0,,,,,
468,https://icomarks.com/ico/hyde-and-co,OK,[\nLast screenshot taken on],3.0,5.4,1,1,BlockName Item \ 0 Ge...,4.0,0.0,Member Name Role ...,2.0,DOWNLOAD_ERROR,"[{'stats': ['Social', 'Users'], 'timeseries': ...",.\Checkpoints\Icomarks\hyde-and-co.json,20.0,,,,,
476,https://icomarks.com/ico/turbotradecoin,OK,Visit Website Last screenshot taken on 24 Mar ...,2.1,3.5,1,1,BlockName Item \ 0 Genera...,,,,0.0,DOWNLOAD_ERROR,"[{'stats': [], 'timeseries': {}}]",.\Checkpoints\Icomarks\turbotradecoin.json,25.0,,,,,
585,https://icomarks.com/ico/narbonne,OK,Visit Website Last screenshot taken on 16 Mar ...,2.3,3.8,1,1,BlockName Item ...,,,,0.0,DOWNLOAD_ERROR,"[{'stats': [], 'timeseries': {}}]",.\Checkpoints\Icomarks\narbonne.json,29.0,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7992,https://icomarks.com/ico/honor,OK,Visit Website Last screenshot taken on 14 Mar ...,3.4,4.6,1,5,BlockName Item \ 0 ...,6.0,0.0,Member Name ...,0.0,DOWNLOAD_ERROR,"[{'stats': [], 'timeseries': {}}]",.\Checkpoints\Icomarks\honor.json,20.0,,,,,
7996,https://icomarks.com/ico/modoview,OK,Visit Website Last screenshot taken on 8 Mar 2...,4.8,5.4,1,10,BlockName Item \ 0 ...,3.0,0.0,Member Name Role ...,0.0,DOWNLOAD_ERROR,"[{'stats': [], 'timeseries': {}}]",.\Checkpoints\Icomarks\modoview.json,28.0,,,,,
8009,https://icomarks.com/ico/dfs,OK,Visit Website Last screenshot taken on 11 Mar ...,2.4,4.2,1,1,BlockName Item \ 0 Genera...,5.0,0.0,Member Name ...,0.0,DOWNLOAD_ERROR,"[{'stats': [], 'timeseries': {}}]",.\Checkpoints\Icomarks\dfs.json,21.0,,,,,
8052,https://icomarks.com/ico/tetarise,OK,Visit Website Last screenshot taken on 12 Feb ...,5.0,5.8,1,10,BlockName Item \ 0 Genera...,10.0,6.0,Member Name ...,0.0,DOWNLOAD_ERROR,"[{'stats': [], 'timeseries': {}}]",.\Checkpoints\Icomarks\tetarise.json,38.0,,,,,


In [34]:
pd.set_option('display.max_columns', None)
formatted_df

Unnamed: 0,url,ScrapeStatus,PageScreenshot,Rating_TOTAL_SCORE,Rating_PROFILE_(weight:_45%),Rating_SOCIAL_ACTIVITY_(weight:_35%),Rating_TEAM_PROOF_(weight:_20%),InfoBlock,TeamSize,AdvisorSize,TeamBlock,SocialWithRating,SocialSeriesStatus,SocialBlock,PklPath,TotTimeSec,Rating_PRODUCT_(weight:_25%),Rating_BUSINESS_(weight:_25%),Rating_PROFILE_(weight:_25%),Rating_SOCIAL_ACTIVITY_(weight:_15%),Rating_TEAM_PROOF_(weight:_10%)
0,https://icomarks.com/ico/synthetics-ai,OK,[\nLast screenshot taken on],7.0,5.8,8,8,BlockName Item \ 0 ...,5.0,0.0,Member Name Role ...,2.0,DOWNLOADED,"[{'stats': ['Social', 'Users'], 'timeseries': ...",.\Checkpoints\Icomarks\synthetics-ai.json,15.0,,,,,
1,https://icomarks.com/ico/777-bingo,OK,[\nLast screenshot taken on],5.2,7.3,5,1,BlockName Item \ 0 ...,4.0,2.0,Member Name Role ...,2.0,DOWNLOADED,"[{'stats': ['Social', 'Users'], 'timeseries': ...",.\Checkpoints\Icomarks\777-bingo.json,9.0,,,,,
2,https://icomarks.com/ico/sonic,OK,[\nLast screenshot taken on],4.7,5,4,5,BlockName Item \ 0 ...,4.0,0.0,Member Name Rol...,2.0,DOWNLOADED,"[{'stats': ['Social', 'Users'], 'timeseries': ...",.\Checkpoints\Icomarks\sonic.json,12.0,,,,,
3,https://icomarks.com/ico/botchain,OK,[\nLast screenshot taken on],7.2,6.2,7,10,BlockName Item \ 0 ...,9.0,4.0,Member Name \ 0 ...,2.0,DOWNLOADED,"[{'stats': ['Social', 'Users'], 'timeseries': ...",.\Checkpoints\Icomarks\botchain.json,29.0,,,,,
4,https://icomarks.com/ico/eclipse,OK,[\nLast screenshot taken on],5.1,6.5,1,9,BlockName Item \ 0 ...,12.0,0.0,Member Name \ 0 ...,2.0,DOWNLOADED,"[{'stats': ['Social', 'Users'], 'timeseries': ...",.\Checkpoints\Icomarks\eclipse.json,13.0,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8274,https://icomarks.com/ico/vanhealthing,OK,Visit Website Last screenshot taken on 22 Mar ...,6.3,6.2,6,7,BlockName Item \ 0 ...,15.0,0.0,Member Name \ 0 Team ...,3.0,DOWNLOADED,"[{'stats': ['Social', 'Users', 'Rating'], 'tim...",.\Checkpoints\Icomarks\vanhealthing.json,14.0,,,,,
8275,https://icomarks.com/ico/consensus,OK,Visit Website Last screenshot taken on 26 Mar ...,6.9,3.1,10,10,BlockName Item ...,4.0,6.0,Member Name ...,2.0,DOWNLOADED,"[{'stats': ['Social', 'Users'], 'timeseries': ...",.\Checkpoints\Icomarks\consensus.json,22.0,,,,,
8276,https://icomarks.com/ico/kahnchat,OK,Visit Website Last screenshot taken on 18 Mar ...,7.1,7.3,8,5,BlockName Item \ 0 ...,6.0,3.0,Member Name R...,3.0,DOWNLOADED,"[{'stats': ['Social', 'Users', 'Rating'], 'tim...",.\Checkpoints\Icomarks\kahnchat.json,12.0,,,,,
8277,https://icomarks.com/ico/santiment,OK,Visit Website Last screenshot taken on 21 Mar ...,,,,,BlockName Item \ 0 ...,12.0,0.0,Member Name ...,1.0,DOWNLOADED,"[{'stats': ['Social'], 'timeseries': {'Twitter...",.\Checkpoints\Icomarks\santiment.json,32.0,,,,,


In [18]:
import os
import joblib
CHECKPOINT_FOLDER = '.\\Checkpoints'

pkl_path=os.path.join(CHECKPOINT_FOLDER, 'scrape_df_raw.pkl')
scrape_df=joblib.load(pkl_path)

In [30]:
pd.set_option('display.max_columns', None)
aa=scrape_df[scrape_df.url=='https://icomarks.com/ico/online']
aa

Unnamed: 0,url,ScrapeStatus,PageScreenshot,Rating_TOTAL_SCORE,Rating_PROFILE_(weight:_45%),Rating_SOCIAL_ACTIVITY_(weight:_35%),Rating_TEAM_PROOF_(weight:_20%),InfoBlock,TeamSize,AdvisorSize,TeamBlock,SocialWithRating,SocialSeriesStatus,SocialBlock,PklPath,TotTimeSec,Rating_PRODUCT_(weight:_25%),Rating_BUSINESS_(weight:_25%),Rating_PROFILE_(weight:_25%),Rating_SOCIAL_ACTIVITY_(weight:_15%),Rating_TEAM_PROOF_(weight:_10%)
5978,https://icomarks.com/ico/online,OK,Visit Website Last screenshot taken on 18 Mar ...,,,,,BlockName Item \ 0 ...,11.0,10.0,Member Name ...,3.0,DOWNLOADED,"[{'stats': ['Social', 'Users', 'Rating'], 'tim...",.\Checkpoints\Icomarks\online.json,22.0,,,,,


In [19]:
aa=scrape_df[scrape_df.url=='https://icomarks.com/ico/online']['SocialBlock']
aa.values[0][0]

{'stats':      Social  Users     Rating
 0  Telegram   2662       High
 1   Twitter  12935  Very High
 2  Facebook  14080  Very High,
 'timeseries': {'Facebook':                          Date  Users
  0    2018-05-17T07:00:00.000Z  12347
  1    2018-05-18T07:00:00.000Z  12380
  2    2018-05-19T07:00:00.000Z  12417
  3    2018-05-20T07:00:00.000Z  12457
  4    2018-05-21T07:00:00.000Z  12504
  ..                        ...    ...
  179  2018-11-15T08:00:00.000Z  14084
  180  2018-11-16T08:00:00.000Z  14082
  181  2018-11-17T08:00:00.000Z  14081
  182  2018-11-18T08:00:00.000Z  14080
  183  2018-11-19T08:00:00.000Z  14080
  
  [184 rows x 2 columns],
  'Twitter':                          Date  Users
  0    2018-05-17T07:00:00.000Z  10549
  1    2018-05-18T07:00:00.000Z  10531
  2    2018-05-19T07:00:00.000Z  10525
  3    2018-05-20T07:00:00.000Z  10499
  4    2018-05-21T07:00:00.000Z  10492
  ..                        ...    ...
  106  2018-08-31T07:00:00.000Z  13229
  107  2018-09-01T07

In [23]:
scrape_df.SocialBlock

0       [{'stats': ['Social', 'Users'], 'timeseries': ...
1       [{'stats': ['Social', 'Users'], 'timeseries': ...
2       [{'stats': ['Social', 'Users'], 'timeseries': ...
3       [{'stats': ['Social', 'Users'], 'timeseries': ...
4       [{'stats': ['Social', 'Users'], 'timeseries': ...
                              ...                        
8274    [{'stats': ['Social', 'Users', 'Rating'], 'tim...
8275    [{'stats': ['Social', 'Users'], 'timeseries': ...
8276    [{'stats': ['Social', 'Users', 'Rating'], 'tim...
8277    [{'stats': ['Social'], 'timeseries': {'Twitter...
8278    [{'stats': ['Social', 'Users', 'Rating'], 'tim...
Name: SocialBlock, Length: 8279, dtype: object