# Twitter vs Stocks Change Data Analysis

## Data Cleaning Notebook

In [323]:
# Packages:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from urllib.request import urlopen
import json
import datetime
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [324]:
# Pulling CSV with Twitter information. Check ceo_name.ipynb for data extraction
columns_chosed = ['created_at','full_text','id','source','favorite_count','retweet_count']

ceo_tweets = pd.read_csv("data/branson.csv")
ceo_tweets = ceo_tweets[columns_chosed]

FileNotFoundError: [Errno 2] No such file or directory: 'data/branson.csv'

In [None]:
# Filtering Dataframe for tweets since 2019
ceo_tweets["created_at"] = pd.to_datetime(ceo_tweets["created_at"]).dt.date
ceo_tweets["created_at"] = pd.to_datetime(ceo_tweets["created_at"])
ceo_tweets = ceo_tweets[ceo_tweets["created_at"]>='2019-01-01']

### Tweets DF and Sentimental Analysis

In [None]:
import re

# Clean The Data
def cleantext(text):
    text = re.sub(r"@[A-Za-z0-9]+", "", text) # Remove Mentions
    text = re.sub(r"#", "", text) # Remove Hashtags Symbol
    text = re.sub(r"RT[\s]+", "", text) # Remove Retweets
    text = re.sub(r"https?:\/\/\S+", "", text) # Remove The Hyper Link
    
    return text

In [None]:
# Clean The Text
ceo_tweets['full_text'] = ceo_tweets['full_text'].apply(cleantext)

In [None]:
# Checking weekends:
ceo_tweets["Day of Week"] = ceo_tweets['created_at'].dt.dayofweek

In [None]:
ceo_tweets['orig_date'] = ceo_tweets['created_at']

for i in range(ceo_tweets.shape[0]):
    if ceo_tweets['Day of Week'].loc[i] == 5:
        ceo_tweets['created_at'].loc[i] = ceo_tweets['created_at'].loc[i]+ datetime.timedelta(days=2)
    if ceo_tweets['Day of Week'].loc[i] == 6:
        ceo_tweets['created_at'].loc[i] = ceo_tweets['created_at'].loc[i]+ datetime.timedelta(days=1)
# stock_tweets_df = ceo_tweets.merge(stock_df,how='outer', on='date')
# stock_tweets_df

In [None]:
from textblob import TextBlob

# Get The Subjectivity
def sentiment_analysis(ds):
    sentiment = TextBlob(ds['full_text']).sentiment
    return pd.Series([sentiment.subjectivity, sentiment.polarity])

# Adding Subjectivity & Polarity
ceo_tweets[["subjectivity", "polarity"]] = ceo_tweets.apply(sentiment_analysis, axis=1)

In [None]:
ceo_tweets = (    
    ceo_tweets.groupby(["created_at"])
    .agg(
        Tweets=("full_text", "sum"),
        Agg_count=("full_text", "count"),
        Agg_retweet=("retweet_count", "sum"),
        Agg_favorite=("favorite_count", "sum"),
        Subjectivity_mean=("subjectivity", "mean"),
        Polarity_mean=("polarity", "mean"),
    )
    .reset_index()
)
ceo_tweets = ceo_tweets.rename(columns={"created_at":'Date'})

In [None]:
#Polarity Change
ceo_tweets["Polarity_chg"] = np.log(ceo_tweets["Polarity_mean"]/ceo_tweets["Polarity_mean"].shift())
ceo_tweets["Polarity_chg"] = ceo_tweets["Polarity_chg"].fillna(0)

### Stock Data Cleaning

In [None]:
# Getting Data Stock prices. Source: Yahoo Finance.
stock = pd.read_csv("data/SPCE.csv")
stock_df = stock[['Date','Close']]

# Calculating Stock Price Variation
stock_df['Return'] = stock_df['Close']
stock_df['Return']= np.log(stock_df['Close']/stock_df['Close'].shift())

stock_df["Date"] = pd.to_datetime(stock_df["Date"]).dt.date
stock_df["Date"] = pd.to_datetime(stock_df["Date"])

### Mergin Stocks and Tweets DFs

In [None]:
df_stock_tweets = stock_df.merge(ceo_tweets, how='left', on='Date')

In [None]:
colnan = ['Agg_count','Agg_retweet','Agg_favorite','Subjectivity_mean','Polarity_mean','Polarity_chg']
for col in colnan:
    df_stock_tweets[col] = df_stock_tweets[col].fillna(0)

df_stock_tweets['Tweets'] = df_stock_tweets['Tweets'].fillna('')

In [None]:
df_stock_tweets.to_csv('data/branson_cleaned.csv', index=False)

### Plots to be Updated!

In [None]:
fig = go.Figure()
_ = fig.add_trace(go.Scatter(
    x= df_stock_tweets['Agg_count'],
    y= df_stock_tweets['Return'],
    mode="markers",
    marker={"size": df_stock_tweets['Agg_favorite'], "sizemode": "area",
                                    "sizeref": 2*max(df_stock_tweets['Agg_favorite']/1000)},
    hovertemplate= "Number of Tweets per Day: %{x:.1f}<br>" +
    "Stock Price Change: %{y:.1f}<br>" +
    "Number of Likes: %{marker.size:,}" +
    "<extra></extra>"
))
fig.update_layout(
    plot_bgcolor="white",
    yaxis={"title": {"text": "Stock Price Change", "font": {"size": 16}}},
    xaxis={"title": {"text": "# of Tweets per Day", "font": {"size": 16}}},
    # hovermode="x",
    title={'text': "CEO's # Tweets vs Stock Price Change", "font": {"size": 16}}
)
fig.show()

In [None]:
fig = go.Figure()
_ = fig.add_trace(go.Scatter(
    x= branson_df['favorite_count'],
    y= apple_stock_df['var'],
    mode="markers",
    marker={"size": branson_df['tweet_count'], "sizemode": "area",
                                    "sizeref": 2*max(branson_df['tweet_count']/1000)},
    hovertemplate= "Number of Tweets Liked per Day: %{x:.1f}<br>" +
    "Stock Price Change: %{y:.1f}<br>" +
    "Number of Tweets: %{marker.size:,}" +
    "<extra></extra>"
))
fig.update_layout(
    plot_bgcolor="white",
    yaxis={"title": {"text": "Stock Price Change", "font": {"size": 16}}},
    xaxis={"title": {"text": "# of Tweets Liked per Day", "font": {"size": 16}}},
    # hovermode="x",
    title={'text': "Richard Branson's # Tweets Liked vs Stock Price Change", "font": {"size": 16}}
)
fig.show()

In [None]:
fig = go.Figure()
_ = fig.add_trace(go.Scatter(
    x= branson["polarity_chg"],
    y= apple_stock_df['var'],
    mode="markers",
    marker={"size": branson_df['favorite_count'], "sizemode": "area",
                                    "sizeref": 2*max(branson_df['favorite_count']/1000)},
    hovertemplate= "Polarity of Tweets: %{x:.1f}<br>" +
    "Stock Price Change: %{y:.1f}<br>" +
    "Number of Likes: %{marker.size:,}" +
    "<extra></extra>"
))
fig.update_layout(
    plot_bgcolor="white",
    # hovermode="x",
    xaxis={"title": {"text": "Sentiment Change", "font": {"size": 16}}},
    yaxis={"title": {"text": "Stock Price Change", "font": {"size": 16}}},
    title={'text': "Richard Branson's Tweets Sentiment Change vs Virgin Galactic's Stock Price Change", "font": {"size": 16}}
)
fig.show()

## Word Clound

In [None]:
import matplotlib.pyplot as plt
from wordcloud import WordCloud
allwords = " ".join([twts for twts in branson['full_text']])
wordCloud = WordCloud(width = 1000, height = 1000, random_state = 21, max_font_size = 119).generate(allwords)
plt.figure(figsize=(20, 20), dpi=80)
plt.imshow(wordCloud, interpolation = "bilinear")
plt.axis("off")
plt.show()