In [1]:
import requests
import pandas as pd
import re

In [2]:
URL = "https://www.biopharmcatalyst.com/company/ABBV/news"
page = requests.get(URL, headers={'User-Agent': 'Mozilla/5.0'})

from bs4 import BeautifulSoup
soup = BeautifulSoup(page.content, "html.parser")

In [3]:
date_elements = soup.find_all("time", class_="news-item__time")
text_elements = soup.find_all("a", class_="news-item__link")

In [4]:
news = pd.DataFrame(text_elements)

In [5]:
date = pd.DataFrame(date_elements)

In [6]:
df = pd.concat([date,news],1)

In [7]:
df.columns = ['date', 'news']

In [8]:
df['date'] = pd.to_datetime(df['date'])

In [9]:
df

Unnamed: 0,date,news
0,2021-10-15,AbbVie Receives CHMP Positive Opinion for Risa...
1,2021-10-11,Allergan Aesthetics Launches Series of Initiat...
2,2021-10-07,Health Canada Approves the Use of RINVOQ® (upa...
3,2021-10-07,AbbVie's Upadacitinib (RINVOQ®) Met Primary an...
4,2021-10-07,AbbVie's RINVOQ® (upadacitinib) Met Primary an...
...,...,...
262,2019-09-13,"Thinking about buying stock in AbbVie, Aurora ..."
263,2019-09-10,Advances in Biotechnology May Offer Cancer Pat...
264,2019-09-06,AbbVie Declares Quarterly Dividend
265,2019-08-29,AbbVie Discontinues Rovalpituzumab Tesirine (R...


In [10]:
!pip install yfinance



In [11]:
import yfinance as yf

In [12]:
start_date = '2019-08-27'

In [13]:
end_date = '2021-10-15'

In [14]:
ticker = 'ABBV'

In [15]:
data = yf.download(ticker, start_date, end_date)

[*********************100%***********************]  1 of 1 completed


In [16]:
data = data.reset_index()

In [17]:
data.rename(columns = {'Date':'date'}, inplace = True)

In [18]:
df = df.merge(data, on = 'date')

In [19]:
!pip install textblob



In [20]:
!pip install vaderSentiment



In [21]:
from textblob import TextBlob
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from sklearn.model_selection import train_test_split
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

In [22]:
def subjectivity(text):
    return TextBlob(text).sentiment.subjectivity 

In [23]:
def polarity(text):
    return TextBlob(text).sentiment.polarity 

In [24]:
df['subjectivity'] = df['news'].apply(subjectivity)

In [25]:
df['polarity'] = df['news'].apply(polarity)

In [26]:
def sentimentscores(text):
    sia = SentimentIntensityAnalyzer()
    sentiment = sia.polarity_scores(text)
    return sentiment

In [27]:
negative = []
neutral = []
positive = []

for i in range(0, len(df['news'])):
    score = sentimentscores(df['news'][i])
    negative.append(score['neg'])
    neutral.append(score['neu'])
    positive.append(score['pos'])

In [28]:
df['negative'] = negative
df['neutral'] = neutral 
df['positive'] = positive

In [29]:
df

Unnamed: 0,date,news,Open,High,Low,Close,Adj Close,Volume,subjectivity,polarity,negative,neutral,positive
0,2021-10-11,Allergan Aesthetics Launches Series of Initiat...,111.139999,111.400002,110.279999,110.339996,109.018311,4613800,0.000000,0.000,0.273,0.559,0.168
1,2021-10-07,Health Canada Approves the Use of RINVOQ® (upa...,109.779999,111.790001,109.559998,110.870003,109.541969,7842600,0.700000,0.000,0.103,0.791,0.107
2,2021-10-07,AbbVie's Upadacitinib (RINVOQ®) Met Primary an...,109.779999,111.790001,109.559998,110.870003,109.541969,7842600,0.433333,0.200,0.000,1.000,0.000
3,2021-10-07,AbbVie's RINVOQ® (upadacitinib) Met Primary an...,109.779999,111.790001,109.559998,110.870003,109.541969,7842600,0.400000,0.050,0.000,1.000,0.000
4,2021-10-06,Psychedelic Medicine Start Up Clairvoyant Ther...,109.300003,109.669998,108.150002,109.320000,108.010536,5710300,0.200000,-0.025,0.000,0.870,0.130
...,...,...,...,...,...,...,...,...,...,...,...,...,...
248,2019-09-13,"Thinking about buying stock in AbbVie, Aurora ...",71.150002,71.510002,70.480003,70.510002,62.837143,8135600,0.000000,0.000,0.000,0.898,0.102
249,2019-09-10,Advances in Biotechnology May Offer Cancer Pat...,66.070000,68.180000,65.650002,68.180000,60.760704,12155700,0.000000,0.000,0.308,0.490,0.203
250,2019-09-06,AbbVie Declares Quarterly Dividend,67.260002,68.120003,67.000000,67.620003,60.261642,7502500,0.000000,0.000,0.000,1.000,0.000
251,2019-08-29,AbbVie Discontinues Rovalpituzumab Tesirine (R...,66.730003,67.029999,65.959999,66.220001,59.013988,6011900,0.000000,0.000,0.000,1.000,0.000


In [30]:
df['change'] = df['Open'] > df ['Close']

In [31]:
df['change'] = df['change'].replace(True,'0')

In [32]:
df['change'] = df['change'].replace(False,'1')

In [57]:
df = df.drop(['date', 'news'], axis = 1)

In [58]:
X = df.drop('change', axis = 1).values
y = df['change'].values

In [59]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [60]:
model = LinearDiscriminantAnalysis().fit(X_train, y_train)

In [62]:
predictions = model.predict(X_test)

In [63]:
predictions

array(['0', '1', '0', '0', '0', '1', '1', '1', '0', '1', '0', '0', '0',
       '0', '0', '0', '0', '0', '1', '0', '0', '0', '1', '0', '0', '0',
       '1', '0', '1', '1', '0', '1', '0', '0', '0', '0', '0', '0', '1',
       '1', '0', '0', '0', '1', '1', '1', '0', '1', '1', '0', '1', '0',
       '0', '1', '0', '0', '1', '0', '0', '1', '0', '1', '1', '0', '0',
       '0', '1', '1', '1', '1', '1', '0', '0', '0', '1', '1'], dtype='<U1')

In [64]:
y_test

array(['0', '1', '0', '0', '0', '0', '1', '1', '0', '1', '0', '0', '0',
       '0', '0', '0', '0', '0', '1', '0', '0', '0', '1', '0', '0', '0',
       '1', '0', '1', '1', '0', '1', '0', '0', '0', '0', '0', '0', '1',
       '1', '1', '0', '0', '1', '1', '1', '0', '1', '1', '1', '1', '1',
       '0', '1', '1', '0', '1', '0', '0', '1', '0', '1', '1', '0', '0',
       '0', '1', '1', '1', '1', '1', '0', '0', '0', '1', '1'],
      dtype=object)

In [65]:
from sklearn.metrics import classification_report

In [67]:
print(classification_report(y_test, predictions))

              precision    recall  f1-score   support

           0       0.91      0.98      0.94        42
           1       0.97      0.88      0.92        34

    accuracy                           0.93        76
   macro avg       0.94      0.93      0.93        76
weighted avg       0.94      0.93      0.93        76

