### TABLES

In [1]:
# Load the SQL extension
%reload_ext sql

# Connect to a MariaDB database
%sql mysql+pymysql://csc370:1234@localhost:3306/sprint1

### Creating tables

- From last sprint, we have our tables created
- Implemented relationship tables

In [2]:
%%sql

DROP TABLE IF EXISTS `PortfolioHasAllocation`;
DROP TABLE IF EXISTS `PortfolioHasStock`;
DROP TABLE IF EXISTS `AllocationHasStock`;
DROP TABLE IF EXISTS `StockHasHistory`;
DROP TABLE IF EXISTS `SessionHasPortfolio`;

 * mysql+pymysql://csc370:***@localhost:3306/sprint1
0 rows affected.
0 rows affected.
0 rows affected.
0 rows affected.
0 rows affected.


[]

In [3]:
%%sql

DROP TABLE IF EXISTS `PortfolioHasAllocation`;
DROP TABLE IF EXISTS `PortfolioHasStock`;
DROP TABLE IF EXISTS `AllocationHasStock`;
DROP TABLE IF EXISTS `StockHasHistory`;
DROP TABLE IF EXISTS `SessionHasPortfolio`;
DROP TABLE IF EXISTS `Session`;
DROP TABLE IF EXISTS `Portfolio`;
DROP TABLE IF EXISTS `Allocation`;
DROP TABLE IF EXISTS `Stocks`;
DROP TABLE IF EXISTS `History`;

-- Create main tables
CREATE TABLE `Session` (
    `SessionID` INT PRIMARY KEY
);

CREATE TABLE `Portfolio` (
    `PortfolioID` INT PRIMARY KEY,
    `TotalAmt` FLOAT,
    `Risk` VARCHAR(64)
);

CREATE TABLE `Allocation` (
    `AllocID` INT PRIMARY KEY,
    `Ticker` VARCHAR(10),
    `Amount` FLOAT
);

CREATE TABLE `Stocks` (
    `StockID` INT PRIMARY KEY,
    `Ticker` VARCHAR(10),
    `Sector` VARCHAR(64),
    `Price` FLOAT,
    `SD` FLOAT,   -- Standard Deviation
    `ERet` FLOAT  -- Expected Return
);

CREATE TABLE `History` (
    `HistoryID` INT PRIMARY KEY,
    `Ticker` VARCHAR(10),
    `Date` VARCHAR(10),
    `Price` FLOAT
);

-- Create relationship tables

-- Session HAS Portfolio
-- Portfolio HAS Stocks (assuming a portfolio can directly contain stocks)
CREATE TABLE `PortfolioHasStock` (
    `PortfolioID` INT,
    `StockID` INT,
    FOREIGN KEY (`PortfolioID`) REFERENCES `Portfolio`(`PortfolioID`),
    FOREIGN KEY (`StockID`) REFERENCES `Stocks`(`StockID`),
    PRIMARY KEY (`PortfolioID`, `StockID`)
);

-- Allocation HAS Stocks
CREATE TABLE `AllocationHasStock` (
    `AllocID` INT,
    `StockID` INT,
    FOREIGN KEY (`AllocID`) REFERENCES `Allocation`(`AllocID`),
    FOREIGN KEY (`StockID`) REFERENCES `Stocks`(`StockID`),
    PRIMARY KEY (`AllocID`, `StockID`)
);

-- Stocks HAS History
CREATE TABLE `StockHasHistory` (
    `StockID` INT,
    `HistoryID` INT,
    FOREIGN KEY (`StockID`) REFERENCES `Stocks`(`StockID`),
    FOREIGN KEY (`HistoryID`) REFERENCES `History`(`HistoryID`),
    PRIMARY KEY (`StockID`, `HistoryID`)
);

CREATE TABLE `SessionHasPortfolio` (
    `SessionID` INT,
    `PortfolioID` INT,
    FOREIGN KEY (`SessionID`) REFERENCES `Session`(`SessionID`),
    FOREIGN KEY (`PortfolioID`) REFERENCES `Portfolio`(`PortfolioID`),
    PRIMARY KEY (`SessionID`, `PortfolioID`)
);

-- Portfolio HAS Allocation
CREATE TABLE `PortfolioHasAllocation` (
    `PortfolioID` INT,
    `AllocID` INT,
    FOREIGN KEY (`PortfolioID`) REFERENCES `Portfolio`(`PortfolioID`),
    FOREIGN KEY (`AllocID`) REFERENCES `Allocation`(`AllocID`),
    PRIMARY KEY (`PortfolioID`, `AllocID`)
);




 * mysql+pymysql://csc370:***@localhost:3306/sprint1


0 rows affected.
0 rows affected.
0 rows affected.
0 rows affected.
0 rows affected.
0 rows affected.
0 rows affected.
0 rows affected.
0 rows affected.
0 rows affected.
0 rows affected.
0 rows affected.
0 rows affected.
0 rows affected.
0 rows affected.
0 rows affected.
0 rows affected.
0 rows affected.
0 rows affected.
0 rows affected.


[]

In [4]:
%%sql

SHOW TABLES;

 * mysql+pymysql://csc370:***@localhost:3306/sprint1
10 rows affected.


Tables_in_sprint1
allocation
allocationhasstock
history
portfolio
portfoliohasallocation
portfoliohasstock
session
sessionhasportfolio
stockhashistory
stocks


### DEMO

- User inputs AAPL, MSFT, AMZN, GOOGL, META, TSLA, BRK-A, JNJ, JPM, V, PG, NVDA, DIS, KO, WMT (step-by-step).
- We have two python modules that dynamically populate the Stocks table and the History table.
- Usefullness of the History table: If we need data that yfinance cannot provide we can use the History table to calculate.

In [5]:
import get_stock
import get_histroy

tickers = 'AAPL, MSFT, AMZN, GOOGL, META, TSLA, BRK-A, JNJ, JPM, V, PG, NVDA, DIS, KO, WMT'
#tickers = input("Enter the stock ticker symbols (separated by commas): ").split(',')
sql_insert_statements_stock = get_stock.main(tickers)
print(sql_insert_statements_stock)

sql_insert_statements_history = get_histroy.main(tickers)
print(sql_insert_statements_history)


INSERT INTO stocks (StockID, Ticker, Sector, Price, SD, ERet) VALUES (1, 'AAPL', 'Technology', 192.25, 1.264, 3.761);
INSERT INTO stocks (StockID, Ticker, Sector, Price, SD, ERet) VALUES (2, 'MSFT', 'Technology', 415.13, 0.893, 3.194);
INSERT INTO stocks (StockID, Ticker, Sector, Price, SD, ERet) VALUES (3, 'AMZN', 'Consumer Cyclical', 176.44, 1.155, 3.265);
INSERT INTO stocks (StockID, Ticker, Sector, Price, SD, ERet) VALUES (4, 'GOOGL', 'Communication Services', 172.5, 1.019, 4.98);
INSERT INTO stocks (StockID, Ticker, Sector, Price, SD, ERet) VALUES (5, 'META', 'Communication Services', 466.83, 1.208, 4.944);
INSERT INTO stocks (StockID, Ticker, Sector, Price, SD, ERet) VALUES (6, 'TSLA', 'Consumer Cyclical', 178.08, 2.408, 1.859);
INSERT INTO stocks (StockID, Ticker, Sector, Price, SD, ERet) VALUES (7, 'BRK-A', 'Financial Services', 627400.0, 0.894, 4.715);
INSERT INTO stocks (StockID, Ticker, Sector, Price, SD, ERet) VALUES (8, 'JNJ', 'Healthcare', 146.67, 0.547, 7.459);
INSERT IN

In [6]:
%%sql

DELETE FROM stocks;
DELETE FROM history;

 * mysql+pymysql://csc370:***@localhost:3306/sprint1
0 rows affected.
0 rows affected.


[]

Store insert statements in a variable

In [7]:
# Store the SQL insert statements in a variable
%store sql_insert_statements_stock
%store sql_insert_statements_history

Stored 'sql_insert_statements_stock' (str)
Stored 'sql_insert_statements_history' (str)


Reads SQL file

In [8]:
%%sql

{sql_insert_statements_stock}
{sql_insert_statements_history}

 * mysql+pymysql://csc370:***@localhost:3306/sprint1
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affec

[]

In [9]:
%%sql

-- First, ensure that the StockHasHistory table is empty
DELETE FROM StockHasHistory;

-- Insert data into StockHasHistory by matching StockID with HistoryID based on the Ticker
INSERT INTO StockHasHistory (StockID, HistoryID)
SELECT s.StockID, h.HistoryID
FROM Stocks s
JOIN History h ON s.Ticker = h.Ticker;

 * mysql+pymysql://csc370:***@localhost:3306/sprint1
0 rows affected.
3750 rows affected.


[]

In [10]:
%%sql

SELECT * FROM stocks;

 * mysql+pymysql://csc370:***@localhost:3306/sprint1
15 rows affected.


StockID,Ticker,Sector,Price,SD,ERet
1,AAPL,Technology,192.25,1.264,3.761
2,MSFT,Technology,415.13,0.893,3.194
3,AMZN,Consumer Cyclical,176.44,1.155,3.265
4,GOOGL,Communication Services,172.5,1.019,4.98
5,META,Communication Services,466.83,1.208,4.944
6,TSLA,Consumer Cyclical,178.08,2.408,1.859
7,BRK-A,Financial Services,627400.0,0.894,4.715
8,JNJ,Healthcare,146.67,0.547,7.459
9,JPM,Financial Services,202.63,1.116,8.177
10,V,Financial Services,272.46,0.953,4.103


In [11]:
%%sql

SELECT * FROM history
LIMIT 10;

 * mysql+pymysql://csc370:***@localhost:3306/sprint1
10 rows affected.


HistoryID,Ticker,Date,Price
1,AAPL,2023-06-05,178.631
2,AAPL,2023-06-06,178.263
3,AAPL,2023-06-07,176.881
4,AAPL,2023-06-08,179.616
5,AAPL,2023-06-09,180.004
6,AAPL,2023-06-12,182.819
7,AAPL,2023-06-13,182.342
8,AAPL,2023-06-14,182.978
9,AAPL,2023-06-15,185.027
10,AAPL,2023-06-16,183.943


In the future we will work on joining the History table and the Stocks table through the StocksHasHistory relationship table.

We may want to see which stocks have the highest price.

In [12]:
%%sql

SELECT * FROM stocks
ORDER BY Price DESC;

 * mysql+pymysql://csc370:***@localhost:3306/sprint1
15 rows affected.


StockID,Ticker,Sector,Price,SD,ERet
7,BRK-A,Financial Services,627400.0,0.894,4.715
12,NVDA,Technology,1096.33,1.749,3.253
5,META,Communication Services,466.83,1.208,4.944
2,MSFT,Technology,415.13,0.893,3.194
10,V,Financial Services,272.46,0.953,4.103
9,JPM,Financial Services,202.63,1.116,8.177
1,AAPL,Technology,192.25,1.264,3.761
6,TSLA,Consumer Cyclical,178.08,2.408,1.859
3,AMZN,Consumer Cyclical,176.44,1.155,3.265
4,GOOGL,Communication Services,172.5,1.019,4.98


We may want to see how many of these stocks are in the unique sectors. This helps with diversification

In [13]:
%%sql

SELECT Sector, COUNT(*) AS NumberOfStocks
FROM stocks
GROUP BY Sector;

 * mysql+pymysql://csc370:***@localhost:3306/sprint1
6 rows affected.


Sector,NumberOfStocks
Technology,3
Consumer Cyclical,2
Communication Services,3
Financial Services,3
Healthcare,1
Consumer Defensive,3


For each sectory we can see the average standard deviation which shows which sectory may be more risky

In [14]:
%%sql

SELECT Sector, ROUND(AVG(SD), 3) AS AverageSD
FROM stocks
GROUP BY Sector;

 * mysql+pymysql://csc370:***@localhost:3306/sprint1
6 rows affected.


Sector,AverageSD
Technology,1.302
Consumer Cyclical,1.781
Communication Services,1.207
Financial Services,0.988
Healthcare,0.547
Consumer Defensive,0.497


If the user wants 10 stocks but has interest in 15, we can take the 10 stocks with the highest estimated return. 

In [15]:
%%sql

DELETE FROM stocks
WHERE StockID IN (
    SELECT StockID
    FROM (
        SELECT StockID
        FROM stocks
        ORDER BY ERet ASC
        LIMIT 5
    ) AS subquery
);

 * mysql+pymysql://csc370:***@localhost:3306/sprint1
(pymysql.err.IntegrityError) (1451, 'Cannot delete or update a parent row: a foreign key constraint fails (`sprint1`.`stockhashistory`, CONSTRAINT `stockhashistory_ibfk_1` FOREIGN KEY (`StockID`) REFERENCES `stocks` (`StockID`))')
[SQL: DELETE FROM stocks
WHERE StockID IN (
    SELECT StockID
    FROM (
        SELECT StockID
        FROM stocks
        ORDER BY ERet ASC
        LIMIT 5
    ) AS subquery
);]
(Background on this error at: https://sqlalche.me/e/20/gkpj)


In [16]:
%%sql

SELECT * FROM stocks;

 * mysql+pymysql://csc370:***@localhost:3306/sprint1
15 rows affected.


StockID,Ticker,Sector,Price,SD,ERet
1,AAPL,Technology,192.25,1.264,3.761
2,MSFT,Technology,415.13,0.893,3.194
3,AMZN,Consumer Cyclical,176.44,1.155,3.265
4,GOOGL,Communication Services,172.5,1.019,4.98
5,META,Communication Services,466.83,1.208,4.944
6,TSLA,Consumer Cyclical,178.08,2.408,1.859
7,BRK-A,Financial Services,627400.0,0.894,4.715
8,JNJ,Healthcare,146.67,0.547,7.459
9,JPM,Financial Services,202.63,1.116,8.177
10,V,Financial Services,272.46,0.953,4.103


In [17]:
%%sql

-- First, ensure that the StockHasHistory table is empty
DELETE FROM StockHasHistory;

-- Insert data into StockHasHistory by matching StockID with HistoryID based on the Ticker
INSERT INTO StockHasHistory (StockID, HistoryID)
SELECT s.StockID, h.HistoryID
FROM Stocks s
JOIN History h ON s.Ticker = h.Ticker;

 * mysql+pymysql://csc370:***@localhost:3306/sprint1
3750 rows affected.
3750 rows affected.


[]

In [23]:
%%sql

SELECT 
    s.StockID,
    s.Ticker AS StockTicker,
    s.Sector,
    s.Price AS CurrentPrice,
    s.SD,
    s.ERet,
    h.HistoryID,
    h.Date,
    h.Price AS HistoricalPrice
FROM 
    Stocks s
JOIN 
    StockHasHistory shh ON s.StockID = shh.StockID
JOIN 
    History h ON shh.HistoryID = h.HistoryID
WHERE 
    s.Ticker = 'AAPL'
ORDER BY 
    h.Date ASC
LIMIT 10;

 * mysql+pymysql://csc370:***@localhost:3306/sprint1
10 rows affected.


StockID,StockTicker,Sector,CurrentPrice,SD,ERet,HistoryID,Date,HistoricalPrice
1,AAPL,Technology,192.25,1.264,3.761,1,2023-06-05,178.631
1,AAPL,Technology,192.25,1.264,3.761,2,2023-06-06,178.263
1,AAPL,Technology,192.25,1.264,3.761,3,2023-06-07,176.881
1,AAPL,Technology,192.25,1.264,3.761,4,2023-06-08,179.616
1,AAPL,Technology,192.25,1.264,3.761,5,2023-06-09,180.004
1,AAPL,Technology,192.25,1.264,3.761,6,2023-06-12,182.819
1,AAPL,Technology,192.25,1.264,3.761,7,2023-06-13,182.342
1,AAPL,Technology,192.25,1.264,3.761,8,2023-06-14,182.978
1,AAPL,Technology,192.25,1.264,3.761,9,2023-06-15,185.027
1,AAPL,Technology,192.25,1.264,3.761,10,2023-06-16,183.943


In [20]:
%%sql

SELECT 
    s.StockID,
    s.Ticker AS StockTicker,
    s.Sector,
    s.Price AS CurrentPrice,
    s.SD,
    s.ERet,
    h.HistoryID,
    h.Date,
    h.Price AS HistoricalPrice
FROM 
    Stocks s
JOIN 
    StockHasHistory shh ON s.StockID = shh.StockID
JOIN 
    History h ON shh.HistoryID = h.HistoryID
WHERE 
    h.Date = '2023-09-20';

 * mysql+pymysql://csc370:***@localhost:3306/sprint1
15 rows affected.


StockID,StockTicker,Sector,CurrentPrice,SD,ERet,HistoryID,Date,HistoricalPrice
1,AAPL,Technology,192.25,1.264,3.761,75,2023-09-20,174.799
2,MSFT,Technology,415.13,0.893,3.194,325,2023-09-20,318.954
3,AMZN,Consumer Cyclical,176.44,1.155,3.265,575,2023-09-20,135.29
4,GOOGL,Communication Services,172.5,1.019,4.98,825,2023-09-20,133.74
5,META,Communication Services,466.83,1.208,4.944,1075,2023-09-20,299.352
6,TSLA,Consumer Cyclical,178.08,2.408,1.859,1325,2023-09-20,262.59
7,BRK-A,Financial Services,627400.0,0.894,4.715,1575,2023-09-20,556580.0
8,JNJ,Healthcare,146.67,0.547,7.459,1825,2023-09-20,159.112
9,JPM,Financial Services,202.63,1.116,8.177,2075,2023-09-20,145.463
10,V,Financial Services,272.46,0.953,4.103,2325,2023-09-20,240.452
