In [1]:
import pandas as pd
import pickle
import weighted_sentiment_functions_final as wsff
import stock_price_calculation_trials as spct
from datetime import datetime

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Kevin\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [10]:
all_data = pd.read_csv('sample_data_modified.csv', sep='.@', header=None, names=['DELETE', 'CIK', 'TICKER', 'DATE', 'TEXT'], dtype={'CIK': object}, engine='python', parse_dates=[3], infer_datetime_format=True, encoding='utf-8')
all_data.head()

Unnamed: 0,DELETE,CIK,TICKER,DATE,TEXT
0,,2488,AMD,2019-02-08,Our 2018 financial results demonstrate the suc...
1,,1091667,CHTR,2019-01-31,We are the second largest cable operator in th...
2,,1467858,GM,2010-04-07,Over time as Old GM’s market share declined in...


In [12]:
new_all_data = all_data.drop(['DELETE'], axis=1)
new_all_data.head()

Unnamed: 0,CIK,TICKER,DATE,TEXT
0,2488,AMD,2019-02-08,Our 2018 financial results demonstrate the suc...
1,1091667,CHTR,2019-01-31,We are the second largest cable operator in th...
2,1467858,GM,2010-04-07,Over time as Old GM’s market share declined in...


In [13]:
# load the trained classifier model from disk
filename = 'finalised_classifier_model.sav'
model = pickle.load(open(filename, 'rb'))

In [14]:
weighted_sentiments = []
for i, row in new_all_data.iterrows():
    weighted_sentiment = wsff.get_weighted_sentiment(row['TEXT'], model, clustering_model='Agglomerative')
    weighted_sentiments.append(weighted_sentiment)
print(weighted_sentiments)
new_all_data['weighted_sentiments'] = weighted_sentiments

unweighted_sentiments = []
for i, row in new_all_data.iterrows():
    unweighted_sentiment = wsff.get_unweighted_sentiment(row['TEXT'], model, clustering_model='Agglomerative')
    unweighted_sentiments.append(unweighted_sentiment)
print(unweighted_sentiments)
new_all_data['unweighted_sentiments'] = unweighted_sentiments

[0.80000000000000004, 0.90000000000000002, 0.80000000000000004]
[1.0, 1.0, 1.0]


In [15]:
new_all_data.head()

Unnamed: 0,CIK,TICKER,DATE,TEXT,weighted_sentiments,unweighted_sentiments
0,2488,AMD,2019-02-08,Our 2018 financial results demonstrate the suc...,0.8,1.0
1,1091667,CHTR,2019-01-31,We are the second largest cable operator in th...,0.9,1.0
2,1467858,GM,2010-04-07,Over time as Old GM’s market share declined in...,0.8,1.0


In [16]:
def get_current_stock_price_date(ticker, date):
    given_date = date
    while True:
        try:
            stock_price = spct.get_stock_adj_close(ticker, given_date)
            break
        except KeyError:
            given_date = spct.add_days(given_date,1)
    return given_date, stock_price

stock_prices = []
given_dates = []


    
for i, row in new_all_data.iterrows():
    given_date, stock_price = get_current_stock_price_date(row['TICKER'], row['DATE'])
    stock_prices.append(stock_price)
    given_dates.append(given_date)
    
print(stock_prices)
print(given_dates)

[23.049999237060547, 331.04998779296875, 26.155519485473633]
[Timestamp('2019-02-08 00:00:00'), Timestamp('2019-01-31 00:00:00'), Timestamp('2010-11-18 00:00:00')]


In [17]:
stock_prices_next_year = []
given_dates_next_year = []
stock_prices_three_months = []
given_dates_three_months = []

for i, row in new_all_data.iterrows():
    date_one_year = spct.add_years(given_dates[i], 1)
    given_date, stock_price = get_current_stock_price_date(row['TICKER'], date_one_year)
    stock_prices_next_year.append(stock_price)
    given_dates_next_year.append(given_date)
    
for i, row in new_all_data.iterrows():
    date_three_months = spct.add_months(given_dates[i], 3)
    given_date, stock_price = get_current_stock_price_date(row['TICKER'], date_three_months)
    stock_prices_three_months.append(stock_price)
    given_dates_three_months.append(given_date)

print(stock_prices_next_year)
print(given_dates_next_year)
print(stock_prices_three_months)
print(given_dates_three_months)

[52.259998321533203, 517.46002197265625, 16.585308074951172]
[Timestamp('2020-02-10 00:00:00'), Timestamp('2020-01-31 00:00:00'), Timestamp('2011-11-18 00:00:00')]
[27.090000152587891, 371.19000244140625, 27.930322647094727]
[Timestamp('2019-05-08 00:00:00'), Timestamp('2019-04-30 00:00:00'), Timestamp('2011-02-18 00:00:00')]


In [18]:
percent_changes_one_year = []
percent_changes_three_months = []

for old, new in zip(stock_prices, stock_prices_next_year):
    change = spct.percent_change(old,new)
    percent_changes_one_year.append(change)
    
for old, new in zip(stock_prices, stock_prices_three_months):
    change = spct.percent_change(old,new)
    percent_changes_three_months.append(change)

new_all_data['% change 1 year'] = percent_changes_one_year
new_all_data['% change 3 months'] = percent_changes_three_months

print(percent_changes_one_year)
print(percent_changes_three_months)

[126.72451215316249, 56.308727096605168, -36.589643787566928]
[17.527119519516933, 12.125061509907129, 6.7855779450558877]


In [19]:
new_all_data.head()

Unnamed: 0,CIK,TICKER,DATE,TEXT,weighted_sentiments,unweighted_sentiments,% change 1 year,% change 3 months
0,2488,AMD,2019-02-08,Our 2018 financial results demonstrate the suc...,0.8,1.0,126.724512,17.52712
1,1091667,CHTR,2019-01-31,We are the second largest cable operator in th...,0.9,1.0,56.308727,12.125062
2,1467858,GM,2010-04-07,Over time as Old GM’s market share declined in...,0.8,1.0,-36.589644,6.785578
