# Requirements setup

In [120]:
import os
import requests
import sqlalchemy
import pymysql
import pandas as pd
import yfinance as yf

# Install MySQLdb

In [145]:
pymysql.install_as_MySQLdb()

# Create schemas to store stock data

In [146]:
# Create a list of stock indices
indexes = ['Nasdaq', 'Bovespa']

In [147]:
# Read the environment variables
password = os.getenv('PASSWORD')

def schema_creator(index):
    engine = sqlalchemy.create_engine(f'mysql://root:{password}@localhost:3306/')
    engine.execute(sqlalchemy.schema.CreateSchema(index))

In [150]:
for index in indexes:
    schema_creator(index)

# Getting ticker symbols for the indices (+ amendments)

In [151]:
# Create an empty dataframe to store the stock market symbols
bovespa = pd.DataFrame()
nasdaq = pd.DataFrame()

# Stock market indices
countries = ['brazil', 'united-states']

# Loop through the stock market indices
# and store the respective data in the dataframes  
# Top 500
# for country in countries:
#     for i in range(1, 11):
#         # If there are no more pages, break the loop
#         # and move on to the next country
#         try:
#             response = requests.get(f'https://disfold.com/{country}/companies/?page={i}')
#             response.raise_for_status()
#             data = pd.read_html(response.content)[0]
#         except requests.exceptions.HTTPError:
#             print(f'No more companies from {country}.')
#         if country == 'brazil':
#             bovespa = bovespa.append(data)
#         elif country == 'united-states':
#             nasdaq = nasdaq.append(data)

for country in countries:
    data = pd.read_html(f'https://disfold.com/{country}/companies/')[0]
    if country == 'brazil':
        bovespa = bovespa.append(data)
    elif country == 'united-states':
        nasdaq = nasdaq.append(data)


In [160]:
# Drop null values and convert the Stock column to a list
bovespa_stock = bovespa['Stock'].dropna().to_list()
nasdaq_stock = nasdaq['Stock'].dropna().to_list()

# Drop data if starts with '('
bovespa_stock = [stock for stock in bovespa_stock if not stock.startswith('(')]
nasdaq_stock = [stock for stock in nasdaq_stock if not stock.startswith('(')]

In [161]:
# Add amendments to the stock symbols, .SA for Bovespa and for Nasdaq is not necessary
bovespa_stock = [f'{stock}.SA' for stock in bovespa_stock]
# nasdaq_stock = [stock + '.US' for stock in nasdaq_stock]

# Populate the database with stock prices

In [163]:
stock_exchanges = {'Nasdaq': nasdaq_stock, 'Bovespa': bovespa_stock}

for index in indexes:
    engine = sqlalchemy.create_engine(f'mysql://root:{password}@localhost:3306/{index}')
    for stock_exchange in stock_exchanges[index]:
        data = yf.download(stock_exchange, start='2022-05-12').reset_index()
        data.to_sql(stock_exchange.lower(), con=engine, if_exists='replace', index=False)
        

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- BRK.B: No timezone found, symbol may be delisted
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%******

In [165]:
len(nasdaq_stock)

50