In [28]:
import glob
import pandas as pd
import numpy as np

import requests
#import html5lib
from bs4 import BeautifulSoup

import datetime
import re

def percentage(part, whole):
    return 100 * float(part)/float(whole)

In [29]:
# ================================================================================== #
# Change the "crypto_name" variable to any other Cryptocurrency to get their result  #
# ================================================================================== #

crypto_name = 'ethereum' # bitcoin, ethereum, bitcoincash, ripple or litecoin


In [30]:
# initializing dataframes
df_noOfTweets = pd.DataFrame(columns=['date','positive', 'neutral', 'negative', 'total']) # for the barchart of no. of tweets
df_polarity_line = pd.DataFrame(columns=['date','polarity']) # for the polarity line chart

# Tweets about each crypto is taken from saved txt files with dates ranging from 2017-09-01 to 2017-11-30
# Tweets are collected by selenium automated script written in python
for f in glob.glob( 'database\\Tweets\\'+ crypto_name +'\\'+ crypto_name + '*.txt'):
    df_datawise = pd.read_csv(f, sep='\t', encoding='utf-8', parse_dates=True)
    polarity = 0.00
    neutral = positive = negative = 0
    for idx,row in df_datawise.iterrows():
        polarity += float(row["polarity"])  # adding up polarities to find the average later
        
        # adding reaction of how people are reacting to find average later
        if (float(row["polarity"]) >= -0.1 and float(row["polarity"]) <= 0.1):  
            neutral += 1
        elif (float(row["polarity"]) > 0.1):
            positive += 1
        elif (float(row["polarity"]) <-0.1):
            negative += 1
    positive = percentage(positive, len(df_datawise.index))
    negative = percentage(negative, len(df_datawise.index))
    neutral = percentage(neutral, len(df_datawise.index))
    
    # below df format is for barchart with sentiments
    df_temp_df_noOfTweets = pd.DataFrame({'date': df_datawise.iloc[0]['date'], 'positive': ((positive/100)*len(df_datawise.index)),'neutral': ((neutral/100)*len(df_datawise.index)), 'negative': ((negative/100)*len(df_datawise.index)), 'total': len(df_datawise.index)}, index=[0])
    df_noOfTweets = df_noOfTweets.append(df_temp_df_noOfTweets, sort=True, ignore_index=True)
    
    # below df format is for polarity_line with sentiments
    df_temp_polarity_line = pd.DataFrame({'date': df_datawise.iloc[0]['date'], 'polarity': polarity}, index=[0])
    df_polarity_line = df_polarity_line.append(df_temp_polarity_line, sort=True, ignore_index=True)

In [31]:
import plotly
import plotly.figure_factory as ff
init_notebook_mode(connected=True)

table = ff.create_table(df_noOfTweets)
plotly.offline.iplot(table, filename='df_noOfTweets')
# =================== #
# no_of_tweets TABLE  #
# =================== #

In [32]:
import plotly
import plotly.graph_objs as go
from plotly.offline import *
# =================== #
# Plot No. of Tweets  #
# =================== #
trace1 = go.Scatter(
    x=df_noOfTweets['date'],
    y=df_noOfTweets['total'],
    mode = 'lines',
    name='No. Of Tweets'
)
trace2 = go.Scatter(
    x=df_noOfTweets['date'],
    y=df_noOfTweets['neutral'],
    mode = 'lines',
    name='Neutral'
)
trace3 = go.Scatter(
    x=df_noOfTweets['date'],
    y=df_noOfTweets['positive'],
    mode = 'lines',
    name='Positive'
)
trace4 = go.Scatter(
    x=df_noOfTweets['date'],
    y=df_noOfTweets['negative'],
    mode = 'lines',
    name='Negative'
)
data = [trace1, trace2, trace3, trace4]
layout = go.Layout(
    title='No. of Tweets plot - ' + crypto_name
)
fig = go.Figure(data=data, layout=layout)
fname = crypto_name + '_No_Of_Tweets.html'
plotly.offline.iplot(fig, filename = fname) # Plot No. of Tweets

In [33]:
# ===================================== #
# Getting the closing price and volume  #
# ===================================== #
url='https://coinmarketcap.com/currencies/'+ crypto_name +'/historical-data/?start=20170901&end=20171130'
    
page = requests.get(url)
soup = BeautifulSoup(page.text, "lxml") 
table = soup.find('table', attrs={'class':'table'})
table_rows = table.find_all('tr')

l = []
for tr in table_rows[1:]:
    td = tr.find_all('td')
    row = [tr.text for tr in td]
    l.append(row)
df_temp = pd.DataFrame(l, columns=["date", "a", "b", "c", "close",'volume', 'd'])
df = pd.DataFrame(df_temp, columns=["date", 'volume', 'close'])
df['date'] = df['date'].apply(lambda x: datetime.datetime.strptime(x, "%b %d, %Y").strftime("%Y-%m-%d"))
df['volume'] = df['volume'].apply(lambda x: re.sub("[^0-9]", "", x))
df['close'] = df['close'].apply(lambda x: re.sub(",", "", x))
df_marketcap_sentiment_line = pd.merge(df_polarity_line, df, left_on='date', right_on='date')

# ======================================= #
# Polarity, Vol. and Closing Price TABLE  #
# ======================================= #
table = ff.create_table(df_marketcap_sentiment_line)
fname = crypto_name + '_df_marketcap_sentiment_line'
plotly.offline.iplot(table, filename = fname)

In [34]:
# ============================================== #
# Plot Polarity, Price in USD(Close) and Volume  #
# ============================================== #
trace1 = go.Scatter(
    x=df_marketcap_sentiment_line['date'],
    y=df_marketcap_sentiment_line['close'],
    mode = 'lines',
    name='Closing Price'
)
trace2 = go.Scatter(
    x=df_marketcap_sentiment_line['date'],
    y=df_marketcap_sentiment_line['polarity'],
    mode = 'lines',
    name='Polarity',
    yaxis='y2'
)
data = [trace1, trace2]
layout = go.Layout(
    title='Sentiment Polarity Vs. Closing Price (in USD) - ' + crypto_name,
    yaxis=dict(
        title='Closing Price.'
    ),
    yaxis2=dict(
        title='Polarity',
        titlefont=dict(
            color='rgb(148, 103, 189)'
        ),
        tickfont=dict(
            color='rgb(148, 103, 189)'
        ),
        overlaying='y',
        side='right'
    )
)
fig = go.Figure(data=data, layout=layout)
fname = crypto_name + '_Sentiment_vs_Market.html'
plotly.offline.iplot(fig, filename= fname)