![QuantConnect Logo](https://cdn.quantconnect.com/web/i/icon.png)
<hr>

### Using Pharmaceutical News to Measure Valuation Gaps and Impact
#### Using keywords and phrases, lets build a model for which we measure when a new vaccine or drug gets accepted or rejected by the FDA. y-axis will be the rejections and x-axis will be the acceptance words. 

#### This notebook aims to plot instances of acceptances and rejections in the news relating to drug pharameceuticals and FDA compliance. The model follows closely several months of the covid vaccine of getting the drug ultimately accepted. 

In [None]:
from QuantConnect.Data.Custom.Tiingo import *
import pandas as pd
import numpy as np
import scipy

from datetime import datetime, timedelta
qb = QuantBook()

In [None]:
# Assets to analyze for sentiment
sp500 = ["IHE", "AMPH", "SIGA", "LNTH", "EMBC" "ORGO", "OGN", "PFE"]

In [None]:
# Words for sentiment scoring  
acceptance_words = ["acceptance", "cooperation", "acknowledgment", "approval", "acquiring", "admission", "compliance"]
rejection_words = ["rejection", "dismissal", "veto", "exclusion", "elimination", "rebuff", "failure", "error"]

In [None]:
def count_instances(master, article):
    count = 0
    for w in article:
        if w in master:
            count = count + 1
    return count

In [None]:
# Extract history and get word count of rejection and acceptance words, per day, per ticker
acceptance_word_sums = []
rejection_word_sums = []
tickers_not_in_data = []
# Note: This takes 45 minutes to run
for ticker in sp500:
    symbol = qb.AddEquity(ticker).Symbol
    news = qb.AddData(TiingoNews, symbol).Symbol
    history = qb.History(TiingoNews, news, timedelta(days=127), Resolution.Daily)
    try:
        description = history.reset_index(level=0)['description']
        acceptance_word_count = description.apply(lambda x: count_instances(acceptance_words, x.split(' ')))
        acceptance_word_count_daily = acceptance_word_count.resample('D').sum()
        acceptance_word_sums.append(acceptance_word_count_daily)
        rejection_word_count = description.apply(lambda x: count_instances(rejection_words, x.split(' ')))
        rejection_word_count_daily = rejection_word_count.resample('D').sum()
        rejection_word_sums.append(rejection_word_count_daily)    
    except:
        tickers_not_in_data.append(ticker)
        continue

In [None]:
import pickle 
# Speed up iterative analysis by pickling the history call
acceptance_word_sums_pickle = pickle.dumps(acceptance_word_sums)
rejection_word_sums_pickle = pickle.dumps(rejection_word_sums)
tickers_not_in_data_pickle = pickle.dumps(tickers_not_in_data)

# Load pickled history to avoid calling data again
# acceptance_word_sums = pickle.loads(acceptance_word_sums_pickle) 
# rejection_word_sums = pickle.Loads(rejection_word_sums_pickle)
# tickers_not_in_data = pickle.Loads(tickers_not_in_data_pickle)

In [None]:

# Get the sum of acceptance words per day for all tickers
acceptance_data = []
for item in acceptance_word_sums:
    acceptance_data.append(item)
acceptance_data = pd.concat(acceptance_data)
acceptance = acceptance_data.groupby(level=0, axis=0).sum()

In [None]:

rejection_data = []
for item in rejection_word_sums:
    rejecton_data.append(item)
rejection_data = pd.concat(rejection_data)
rejection = rejection_data.groupby(level=0, axis=0).sum()

In [None]:

# Rename the columns to "acceptance" & "rejection" because they're both currently called "description" 
acceptance = acceptance.rename("acceptance")
rejection = rejection.rename("rejection")
df = pd.concat([acceptance, rejection], axis=1)
time = df.index.to_series()
df = pd.concat([time, acceptance, rejection], axis=1)

In [None]:

# Transform into percentage from absolute numbers
df['total'] = df['acceptance'] + df['rejection']
df['pct_acceptance'] = (df['acceptance']/ df['total']).round(2)
df['pct_rejection'] = (df['rejection']/ df['total']).round(2)
# Drop incomplete data
df = df[:-2]

In [None]:

import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

trace1 = go.Scatter(x=df.time,
                    y=df.pct_acceptance,
                    name = "Acceptance in the News",
                    line = {'color':'rgb(138, 185, 211)'})

trace2 = go.Scatter(x=df.time,
                    y=df.pct_rejection,
                    name = "Rejection in the News",
                    line = {'color': 'rgb(237, 193, 218)'})

layout = {'title':'S&P500 Pharamceutical News Valuation Gaps', 'plot_bgcolor' : 'rgb(248, 247, 247)' , 'yaxis': { 'tickformat': ',.0%', 'range': [0,1]}}

fig = dict(data=[trace1, trace2], layout=layout)     

iplot(fig)