In [None]:
!pip install -U -q plotly

[K     |████████████████████████████████| 13.2MB 285kB/s 
[?25h

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.offline as py
import plotly.graph_objs as go
import seaborn as sns
import plotly.io as pio
from plotly.subplots import make_subplots
py.init_notebook_mode(connected=True)
# Required for plotly to run on colab
pio.renderers.default = 'colab'
%matplotlib inline

In [None]:
import os
from google.colab import drive
drive.mount('/content/drive/')
os.chdir('/content/drive/MyDrive/CA683')

Mounted at /content/drive/


In [None]:
data = pd.read_csv("bitcoin_and_sentiments_data.csv")
# data = df.drop(["url","fullname"],axis=1)

In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2320 entries, 0 to 2319
Data columns (total 16 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Unnamed: 0                    2320 non-null   int64  
 1   time                          2320 non-null   object 
 2   closed_price_by_hour          2320 non-null   float64
 3   price_diff                    2320 non-null   float64
 4   movement                      2320 non-null   int64  
 5   Daily_Weight_mean_by_hour     2320 non-null   float64
 6   Daily_Weight_count_by_hour    2320 non-null   int64  
 7   blob_sent_mean_by_hour        2320 non-null   float64
 8   subjectivity_mean_by_hour     2320 non-null   float64
 9   vader_sent_mean_by_hour       2320 non-null   float64
 10  tweet_diff                    2320 non-null   float64
 11  blob_sent_mean_by_hour_diff   2320 non-null   float64
 12  vader_sent_mean_by_hour_diff  2320 non-null   float64
 13  twe

In [None]:
data["time"] = pd.to_datetime(data["time"],errors= "coerce").dt.tz_localize(None)
data.index = data['time']

In [None]:
# This is additional code to get twitter volume graph
sentiment_grouped_weekly = data.groupby(pd.Grouper(key="time", freq="1w")).agg(
    tweet_count_by_week = ('tweet_diff', 'count'),
    price_mean_by_week = ('closed_price_by_hour', np.mean)
)
drop_column_subse_week = sentiment_grouped_weekly.columns.values[1:]
sentiment_grouped_weekly.dropna(subset=drop_column_subse_week, inplace=True)

In [None]:
sentiment_grouped_weekly.index
sentiment_grouped_weekly_data = sentiment_grouped_weekly.reset_index()
sentiment_grouped_weekly_data.time = pd.to_datetime(sentiment_grouped_weekly_data.time)

In [None]:
sentiment_grouped_weekly_data.shape
sentiment_grouped_weekly_data = sentiment_grouped_weekly_data.sort_values(by='time')

**Calculate 5-hr Moving Average**

In [55]:
data["simple_moving_average"] = data["closed_price_by_hour"].rolling(window=5).mean()
data["simple_moving_average"].fillna(data["closed_price_by_hour"], inplace=True)

In [65]:
sma_grouped_weekly = data.groupby(pd.Grouper(key="time", freq="1w")).agg(
    simple_moving_average_mean_by_week = ('simple_moving_average', np.mean)
)
sma_grouped_weekly = sma_grouped_weekly.reset_index()
sma_drop_column_subse_week = sma_grouped_weekly.columns.values[1:]
sma_grouped_weekly.dropna(subset=sma_drop_column_subse_week, inplace=True)
sma_grouped_weekly.time = pd.to_datetime(sma_grouped_weekly.time)
sma_grouped_weekly = sma_grouped_weekly.sort_values(by='time')

In [63]:
sma_grouped_weekly.head()

Unnamed: 0,time,price_mean_by_week
0,2016-02-21,418.409535
6,2016-04-03,423.36451
11,2016-05-08,436.289125
12,2016-05-15,437.947036
13,2016-05-22,446.842973


In [67]:
# fig = go.Figure()
# # Create and style traces
# fig.add_trace(go.Scatter(mode='lines+markers', x=sentiment_grouped_weekly_data["time"], y=sentiment_grouped_weekly_data["tweet_count_by_week"], name='Tweet',
#                          line=dict(color='firebrick', width=3)))
# fig.add_trace(go.Scatter(x=sentiment_grouped_weekly_data["time"], y=sentiment_grouped_weekly_data["price_mean_by_week"], name = 'Price',
#                          line=dict(color='royalblue', width=3)))

# # Edit the layout
# fig.update_layout(title='Twitter Volume and Price',
#                    xaxis_title='Time',
#                    yaxis_title='Twitter Volume',title_x=0.5)


# fig.show()


fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(mode='lines+markers', x=sentiment_grouped_weekly_data["time"], y=sentiment_grouped_weekly_data["tweet_count_by_week"], name='Tweet Volume',
                         line=dict(color='firebrick', width=3)),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(mode='lines+markers', x=sentiment_grouped_weekly_data["time"], y=sentiment_grouped_weekly_data["price_mean_by_week"], name = 'Bitcoin Price',
                         line=dict(color='royalblue', width=3)),
    secondary_y=True,
)
fig.add_trace(
    go.Scatter(mode='lines+markers', x=sma_grouped_weekly["time"], y=sma_grouped_weekly["simple_moving_average_mean_by_week"], name = 'Moving average',
                         line=dict(color='yellow', width=3)),
    secondary_y=True,
)

# Add figure title
fig.update_layout(
    title_text="Twitter Volume Vs Bitcoin Price", title_x=0.5
)

# Set x-axis title
fig.update_xaxes(title_text="Time")

# Set y-axes titles
fig.update_yaxes(title_text="Twitter Volume", secondary_y=False)
fig.update_yaxes(title_text="Bitcoin Price", secondary_y=True)

fig.show()

In [None]:
data.head(2)

Unnamed: 0.1,Unnamed: 0,time,closed_price_by_hour,price_diff,movement,Daily_Weight_mean_by_hour,Daily_Weight_count_by_hour,blob_sent_mean_by_hour,subjectivity_mean_by_hour,vader_sent_mean_by_hour,tweet_diff,blob_sent_mean_by_hour_diff,vader_sent_mean_by_hour_diff,tweet_movement,blob_sent_movement,vader_sent_movement
0,0,2016-01-01 00:00:00,431.466842,,0,0.069315,10,0.0,0.17,0.162646,-3.0,-0.014592,0.048154,0,0,1
1,1,2016-01-01 01:00:00,430.464194,-1.002649,0,0.213276,13,0.014542,0.269231,0.229434,3.0,0.014542,0.066788,1,1,1
