In [1]:
import os
import boto3
import pandas as pd

In [2]:
DB_ACCESS_KEY = pd.read_csv('DB_ACCESS_KEY.csv')
aws_access_key_id = DB_ACCESS_KEY['Access key ID'][0]
aws_secret_access_key = DB_ACCESS_KEY['Secret access key'][0]

In [3]:
dynamodb = boto3.resource('dynamodb', region_name='us-east-2', aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key)
table = dynamodb.Table('StockSentiment')
# keep scanning until we have all the data in the table
response = table.scan()
data = response['Items']
while 'LastEvaluatedKey' in response:
    response = table.scan(ExclusiveStartKey=response['LastEvaluatedKey'])
    data.extend(response['Items'])
# convert the data to a pandas dataframe
sentiment_ticker_list = pd.DataFrame(data)
# convert Date column to datetime
sentiment_ticker_list['Date'] = pd.to_datetime(sentiment_ticker_list['Date'])
# make the index the Date column
sentiment_ticker_list = sentiment_ticker_list.set_index('Date').sort_index(ascending=False)
sentiment_ticker_list

Unnamed: 0_level_0,ticker_sentiment_score,ticker_sentiment_label,Stock,source,url,relevance_score
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-03-27 10:00:00,0.181614,Somewhat-Bullish,RSSS,Benzinga,https://www.benzinga.com/pressreleases/23/03/n...,0.089129
2023-03-27 10:00:00,0.404327,Bullish,RAIL,Benzinga,https://www.benzinga.com/pressreleases/23/03/g...,0.500324
2023-03-27 10:00:00,0.19024,Somewhat-Bullish,GDVM,PennyStocks.com,https://pennystocks.com/featured/2023/03/27/3-...,0.026532
2023-03-27 07:26:12,0.446184,Bullish,FSR,Investing News Network,https://investingnews.com/all-electric-fisker-...,0.862645
2023-03-27 07:26:12,0.317112,Somewhat-Bullish,GOOG,Investing News Network,https://investingnews.com/all-electric-fisker-...,0.090989
...,...,...,...,...,...,...
2023-02-21 07:42:00,0.003875,Neutral,NWG,Reuters,https://www.reuters.com/markets/europe/hsbc-cu...,0.140076
2023-02-21 07:31:30,0.327663,Somewhat-Bullish,NIO,Benzinga,https://www.benzinga.com/news/23/02/30980568/n...,0.730954
2023-02-21 07:31:30,0.086742,Neutral,TSLA,Benzinga,https://www.benzinga.com/news/23/02/30980568/n...,0.472338
2023-02-21 07:30:00,0.082247,Neutral,CVS,GlobeNewswire,https://www.globenewswire.com/news-release/202...,0.010758


In [4]:
rds_key_pass = pd.read_csv('db_key_pass.csv')
username = rds_key_pass['ID'][0]
password = rds_key_pass['PASS'][0]
url = rds_key_pass['URL'][0]

In [5]:
# change column names to match RDS table
sentiment_ticker_list = sentiment_ticker_list.rename(columns={'Stock': 'stock', 'ticker_sentiment_score': 'sentiment',
                                                              'ticker_sentiment_label': 'sentiment_label', 'url': 'article_url'})

In [45]:
sentiment_ticker_list['time_published'] = sentiment_ticker_list.index
# drop duplicates
new_sents = sentiment_ticker_list.copy().drop_duplicates(subset=['stock', 'time_published']).set_index(['time_published','stock']).sort_index(ascending=False)
new_sents

Unnamed: 0_level_0,Unnamed: 1_level_0,sentiment,sentiment_label,source,article_url,relevance_score
time_published,stock,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-03-27 10:00:00,RSSS,0.181614,Somewhat-Bullish,Benzinga,https://www.benzinga.com/pressreleases/23/03/n...,0.089129
2023-03-27 10:00:00,RAIL,0.404327,Bullish,Benzinga,https://www.benzinga.com/pressreleases/23/03/g...,0.500324
2023-03-27 10:00:00,GDVM,0.19024,Somewhat-Bullish,PennyStocks.com,https://pennystocks.com/featured/2023/03/27/3-...,0.026532
2023-03-27 07:26:12,META,0.295401,Somewhat-Bullish,Investing News Network,https://investingnews.com/all-electric-fisker-...,0.045569
2023-03-27 07:26:12,GOOG,0.317112,Somewhat-Bullish,Investing News Network,https://investingnews.com/all-electric-fisker-...,0.090989
...,...,...,...,...,...,...
2023-02-21 07:42:00,NWG,0.003875,Neutral,Reuters,https://www.reuters.com/markets/europe/hsbc-cu...,0.140076
2023-02-21 07:31:30,TSLA,0.086742,Neutral,Benzinga,https://www.benzinga.com/news/23/02/30980568/n...,0.472338
2023-02-21 07:31:30,NIO,0.327663,Somewhat-Bullish,Benzinga,https://www.benzinga.com/news/23/02/30980568/n...,0.730954
2023-02-21 07:30:00,CVS,0.082247,Neutral,GlobeNewswire,https://www.globenewswire.com/news-release/202...,0.010758


In [46]:
import pymysql
from sqlalchemy import create_engine
import sqlalchemy

In [84]:
dtypes = {'time_published': sqlalchemy.types.DATETIME, 'relevance_score': sqlalchemy.types.DECIMAL(6,5), 'sentiment': sqlalchemy.types.DECIMAL(6,5),
                                'stock': sqlalchemy.types.VARCHAR(10), 'sentiment_label': sqlalchemy.types.VARCHAR(10), 'article_url': sqlalchemy.types.VARCHAR(200), 'source': sqlalchemy.types.VARCHAR(50)}
# Establish connection to the MySQL database
with pymysql.connect(host=url, user=username, password=password, db='stock_data') as con:
    # Create a SQLAlchemy engine object
    engine = create_engine(f'mysql+pymysql://{username}:{password}@{url}/stock_data', echo=False)
    try:
        # Convert the pandas DataFrame to a MySQL table
        new_sents.iloc[:-5].to_sql(name='Sentiments', con=engine, if_exists='replace', index=True, index_label=['time_published', 'stock'], dtype=dtypes)
        con.cursor().execute('ALTER TABLE Sentiments ADD PRIMARY KEY (stock, time_published);')
    except Exception as e:
        print(e)

In [85]:
new_sents.iloc[-5:]

Unnamed: 0_level_0,Unnamed: 1_level_0,sentiment,sentiment_label,source,article_url,relevance_score
time_published,stock,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-02-21 07:42:00,NWG,0.003875,Neutral,Reuters,https://www.reuters.com/markets/europe/hsbc-cu...,0.140076
2023-02-21 07:31:30,TSLA,0.086742,Neutral,Benzinga,https://www.benzinga.com/news/23/02/30980568/n...,0.472338
2023-02-21 07:31:30,NIO,0.327663,Somewhat-Bullish,Benzinga,https://www.benzinga.com/news/23/02/30980568/n...,0.730954
2023-02-21 07:30:00,CVS,0.082247,Neutral,GlobeNewswire,https://www.globenewswire.com/news-release/202...,0.010758
2023-02-21 07:05:00,DSSMY,0.0,Neutral,Reuters,https://www.reuters.com/markets/europe/resilie...,0.040416


In [86]:
dtypes = {'time_published': sqlalchemy.types.DATETIME, 'relevance_score': sqlalchemy.types.DECIMAL(6,5), 'sentiment': sqlalchemy.types.DECIMAL(6,5),
                                'stock': sqlalchemy.types.VARCHAR(10), 'sentiment_label': sqlalchemy.types.VARCHAR(10), 'article_url': sqlalchemy.types.VARCHAR(200), 'source': sqlalchemy.types.VARCHAR(50)}
# Establish connection to the MySQL database
with pymysql.connect(host=url, user=username, password=password, db='stock_data') as con:
    # Create a SQLAlchemy engine object
    for i in range(len(new_sents.iloc[-10:])):
        # print(new_sents.loc[idx])
        engine = create_engine(f'mysql+pymysql://{username}:{password}@{url}/stock_data', echo=False)
        try:
            # Convert the pandas DataFrame to a MySQL table
            new_sents.iloc[-10:].iloc[i:i+1].to_sql(name='Sentiments', con=engine, if_exists='append', index=True, index_label=['time_published', 'stock'], dtype=dtypes)
        except Exception as e:
            print(e)

(pymysql.err.IntegrityError) (1062, "Duplicate entry 'FOREX:USD-2023-02-21 07:50:00' for key 'Sentiments.PRIMARY'")
[SQL: INSERT INTO `Sentiments` (time_published, stock, sentiment, sentiment_label, source, article_url, relevance_score) VALUES (%(time_published)s, %(stock)s, %(sentiment)s, %(sentiment_label)s, %(source)s, %(article_url)s, %(relevance_score)s)]
[parameters: {'time_published': datetime.datetime(2023, 2, 21, 7, 50), 'stock': 'FOREX:USD', 'sentiment': '-0.165678', 'sentiment_label': 'Somewhat-Bearish', 'source': 'Reuters', 'article_url': 'https://www.reuters.com/markets/currencies/russian-rouble-weakens-ahead-putin-address-parliament-2023-02-21/', 'relevance_score': '0.233556'}]
(Background on this error at: https://sqlalche.me/e/20/gkpj)
(pymysql.err.IntegrityError) (1062, "Duplicate entry 'FOREX:EUR-2023-02-21 07:50:00' for key 'Sentiments.PRIMARY'")
[SQL: INSERT INTO `Sentiments` (time_published, stock, sentiment, sentiment_label, source, article_url, relevance_score) V

In [78]:
temp = new_sents.iloc[-10:]
temp

Unnamed: 0_level_0,Unnamed: 1_level_0,sentiment,sentiment_label,source,article_url,relevance_score
time_published,stock,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-02-21 07:50:00,FOREX:USD,-0.165678,Somewhat-Bearish,Reuters,https://www.reuters.com/markets/currencies/rus...,0.233556
2023-02-21 07:50:00,FOREX:EUR,-0.127204,Neutral,Reuters,https://www.reuters.com/markets/currencies/rus...,0.118064
2023-02-21 07:50:00,FOREX:CNY,-0.127204,Neutral,Reuters,https://www.reuters.com/markets/currencies/rus...,0.118064
2023-02-21 07:49:00,HAITY,0.0,Neutral,Business Standard,https://www.business-standard.com/article/mark...,0.200688
2023-02-21 07:46:01,CRYPTO:BTC,0.013812,Neutral,The Financial Express,https://www.financialexpress.com/blockchain/cr...,0.406745
2023-02-21 07:42:00,NWG,0.003875,Neutral,Reuters,https://www.reuters.com/markets/europe/hsbc-cu...,0.140076
2023-02-21 07:31:30,TSLA,0.086742,Neutral,Benzinga,https://www.benzinga.com/news/23/02/30980568/n...,0.472338
2023-02-21 07:31:30,NIO,0.327663,Somewhat-Bullish,Benzinga,https://www.benzinga.com/news/23/02/30980568/n...,0.730954
2023-02-21 07:30:00,CVS,0.082247,Neutral,GlobeNewswire,https://www.globenewswire.com/news-release/202...,0.010758
2023-02-21 07:05:00,DSSMY,0.0,Neutral,Reuters,https://www.reuters.com/markets/europe/resilie...,0.040416


In [79]:
for i in range(len(temp)):
    print(temp.iloc[i:i+1])

                               sentiment   sentiment_label   source  \
time_published      stock                                             
2023-02-21 07:50:00 FOREX:USD  -0.165678  Somewhat-Bearish  Reuters   

                                                                     article_url  \
time_published      stock                                                          
2023-02-21 07:50:00 FOREX:USD  https://www.reuters.com/markets/currencies/rus...   

                              relevance_score  
time_published      stock                      
2023-02-21 07:50:00 FOREX:USD        0.233556  
                               sentiment sentiment_label   source  \
time_published      stock                                           
2023-02-21 07:50:00 FOREX:EUR  -0.127204         Neutral  Reuters   

                                                                     article_url  \
time_published      stock                                                          
2023-02-21 07

In [4]:
# Connect to the database
connection = pymysql.connect(
    host=url,
    user=username,
    passwd=password,
    db="stock_data"
)
# load from RDS into pandas
sentiment_ticker_list = pd.read_sql('SELECT * FROM Sentiments', con=connection)

  sentiment_ticker_list = pd.read_sql('SELECT * FROM Sentiments', con=connection)


DatabaseError: Execution failed on sql 'SELECT * FROM Sentiments': (1146, "Table 'stock_data.Sentiments' doesn't exist")

In [8]:
new_sents = sentiment_ticker_list.copy()
new_sents

Unnamed: 0,stock,relevance_score,sentiment,sentiment_label,article_url,source,time_published
0,AIMC,0.02,0.00,Neutral,https://www.prnewswire.com/news-releases/regal...,PR Newswire,2023-03-27 12:27:00
1,CURN,0.02,0.05,Neutral,https://www.prnewswire.com/news-releases/regal...,PR Newswire,2023-03-27 12:27:00
2,RRX,0.41,0.32,Somewhat-Bullish,https://www.prnewswire.com/news-releases/regal...,PR Newswire,2023-03-27 12:27:00
3,GE,0.01,0.13,Neutral,https://www.prnewswire.com/news-releases/regal...,PR Newswire,2023-03-27 12:27:00
4,ROK,0.01,-0.01,Neutral,https://www.prnewswire.com/news-releases/regal...,PR Newswire,2023-03-27 12:27:00
...,...,...,...,...,...,...,...
14453,GS,0.12,-0.09,Neutral,https://www.axios.com/2023/03/29/office-real-e...,Axios,2023-03-29 11:20:00
14454,ENOV,0.38,0.32,Somewhat-Bullish,https://www.benzinga.com/trading-ideas/long-id...,Benzinga,2023-03-29 11:28:50
14455,ESPR,0.16,0.19,Somewhat-Bullish,https://www.benzinga.com/trading-ideas/long-id...,Benzinga,2023-03-29 11:28:50
14456,SLS,0.24,0.05,Neutral,https://www.benzinga.com/trading-ideas/long-id...,Benzinga,2023-03-29 11:28:50


In [10]:
# drop duplicates
new_sents = new_sents.drop_duplicates(subset=['stock', 'time_published']).set_index(['stock', 'time_published']).sort_index()
new_sents

Unnamed: 0_level_0,Unnamed: 1_level_0,relevance_score,sentiment,sentiment_label,article_url,source
stock,time_published,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A,2023-03-08 14:00:00,0.13,0.14,Neutral,https://www.benzinga.com/pressreleases/23/03/g...,Benzinga
A,2023-03-14 19:30:00,0.05,0.00,Neutral,https://www.benzinga.com/pressreleases/23/03/g...,Benzinga
AA,2023-03-14 22:00:20,0.67,0.26,Somewhat-Bullish,https://www.zacks.com/stock/news/2065773/alcoa...,Zacks Commentary
AABB,2023-03-09 13:00:00,0.75,0.35,Bullish,https://www.benzinga.com/pressreleases/23/03/g...,Benzinga
AADI,2023-03-10 00:25:04,0.21,0.00,Neutral,https://www.zacks.com/stock/news/2063886/carib...,Zacks Commentary
...,...,...,...,...,...,...
ZVRA,2023-03-15 11:26:12,0.08,0.08,Neutral,https://www.benzinga.com/pressreleases/23/03/g...,Benzinga
ZYNE,2023-03-01 12:00:00,0.21,-0.01,Neutral,https://www.benzinga.com/pressreleases/23/03/g...,Benzinga
ZYXI,2023-03-13 14:55:50,0.16,0.53,Bullish,https://www.benzinga.com/news/23/03/31322709/i...,Benzinga
ZYXI,2023-03-15 22:52:00,0.44,0.03,Neutral,https://www.benzinga.com/pressreleases/23/03/g...,Benzinga


In [12]:
# Establish connection to the MySQL database
conn = pymysql.connect(host=url, user=username, password=password, db='stock_data')

# Create a SQLAlchemy engine object
engine = create_engine(f'mysql+pymysql://{username}:{password}@{url}/stock_data', echo=False)

# Convert the pandas DataFrame to a MySQL table
new_sents.to_sql(name='Sentiments', con=engine, if_exists='replace', index=False, dtype={'time_published': sqlalchemy.types.DATETIME, 'relevance_score': sqlalchemy.types.DECIMAL(6,5), 'sentiment': sqlalchemy.types.DECIMAL(6,5)})

# Close the connection
conn.close()