# Project Option 1. Trading Strategy of S&P 500 Stocks

## User Interface:
### 1. The user can enter the day range (such as 2020-01-01 to 2021-09-01)
### 2. The user can also choose the industry from the stocks in S&P 500 index
## Within the day range and the industry:
### 3. Your code will find a portfolio of two stocks that generates the best SMA (Simple Moving Average) trading strategy (price vs. SMA) that generates the highest profits
### 4. The portfolio has \\$50,000 in each stock for a total of \\$100,000 at the beginning of the day range
### 5. Your code will need to find the best N-day in the N-day SMA for ~ EACH ~ stock's trading strategy
### 6. At the end, your code displays each stock, SMA days, and the total profit 
### 7. Each transaction is to buy or sell your entire holding of that stock

In [1]:
# Import libraries
import pandas as pd
import numpy as np
from numpy import nan
import yfinance as yf
import datetime as dt

In [2]:
# 1) Download adjusted close price time series from Yahoo! Finance using yfinance library

def downloadData(ticker, begin_day, end_day):
    try:
        dt.datetime.strptime(begin_day, '%Y-%m-%d')
        dt.datetime.strptime(end_day, '%Y-%m-%d')
        mydata = yf.download(str(ticker), begin_day, end_day)
        return mydata['Adj Close']
    except:
        print('Date must be entered as YYYY-MM-DD')

In [3]:
# Bonus) Find smallest industry (only for testing, fewer iterations necessary for smaller industries)

def findSmallestIndustry():
    # create list of unique industries, and dict to track company count within each
    sp500_csv = pd.read_csv('https://github.com/FormanWilliam/Forman_Portfolio/raw/main/sp500.csv')
    industry_counts = {}
    industry_list = []
    
    for eachindustry in sp500_csv['Industry']:
        if isinstance(eachindustry, str):
            if eachindustry not in industry_counts:
                industry_counts[eachindustry] = 0
                industry_list.append(eachindustry)
            else:
                industry_counts[eachindustry] = industry_counts[eachindustry] + 1
    
    # from count dict, return smallest industry
    smallest_industry = ['Communication Services', 500]

    for eachindustry in industry_list:
        if industry_counts[eachindustry] < smallest_industry[1]:
            smallest_industry = [eachindustry, industry_counts[eachindustry]]

    return smallest_industry[0]

In [4]:
# 2) Find SMA trading net profit for given ticker, given time period, and given SMA

def findSMAProfit(ticker, begin_day, end_day, sma_days):
    # get series for Adj. Close and SMA, combine two series into dataframe
    adj_close_series = downloadData(ticker, begin_day, end_day)
    sma_series = adj_close_series.rolling(window = sma_days).mean()
    price_sma_df = pd.concat([adj_close_series, sma_series], axis = 1)
    price_sma_df.columns = ['Price', 'SMA']
    
    # initialize variables
    cash = 0
    buy_price = price_sma_df.iloc(0)[1][0]
    num_holdings = 50000 / buy_price
    position = "Holding"
    yesterday_price, yesterday_sma = 0, 99999
    
    # loop each trading day to determine buy/sell signals
    # and calculate daily P/L
    counter = 0
    buy_counter = 0
    sell_counter = 0
    for eachday in price_sma_df.index:
        today_price, today_sma = price_sma_df.loc[eachday, 'Price'], price_sma_df.loc[eachday, 'SMA']
        if counter >= sma_days + 1: # ignore days where SMA = NaN
            # evaluate trading signals once sufficient days for SMA have passed
            # if sell signal while holding, then sell & calculate change in profit
            if (today_price < today_sma) & (yesterday_price >= yesterday_sma) & (position == "Holding"):
                position = "Waiting"
                cash += today_price * num_holdings
                num_holdings = 0
                sell_counter += 1
            # if buy signal while waiting, then buy & calculate number of assets bought (not rounded)
            elif (today_price > today_sma) & (yesterday_price <= yesterday_sma) & (position == "Waiting"):
                position = "Holding"
                num_holdings = cash / today_price
                cash = 0
                buy_counter += 1
        # end loop by setting yesterday data = today data
        yesterday_price, yesterday_sma = today_price, today_sma
        counter += 1
        # sell holdings if still holding position on last day
        if (eachday == price_sma_df.index[-1]) & (position == "Holding"):
            position = "Waiting"
            cash += today_price * num_holdings
            num_holdings = 0
    # "cash" is net profit, not gross, because it is initially set to $0 instead of $50,000 
    return cash

In [5]:
# 3) For given stock and time period, find most profitable number of SMA days (5-60, step 5) 

def findBestSMA(ticker, begin_day, end_day):
    sma_list = [sma for sma in range(5,61,5)] # start after 0 because 0-day SMA is just current price
    best_sma = [0,0] # initialize return value best_sma
    # for each SMA option, run profit calculations, and replace result with any improvements
    for eachsma in sma_list:
        if findSMAProfit(ticker, begin_day, end_day, eachsma) > best_sma[1]:
            best_sma = [eachsma, round(findSMAProfit(ticker, begin_day, end_day, eachsma), 2)]
    return best_sma

In [6]:
# 4) Find best company/SMA days strategy for given industry
stock_list = pd.read_csv('https://github.com/FormanWilliam/Forman_Portfolio/raw/main/sp500.csv')

def findStrategy(industry, begin_day, end_day):
    # create dict to hold 2 best strategies
    best_strategies = []
    # error check industry name
    try:
        industry_stocks = stock_list.loc[stock_list['Industry'] == industry]
    except:
        print('Industry name entered incorrectly!')
    # for given industry, append each stock's best strat [SMA days, profit] to best_strategies list
    for eachrow in industry_stocks.iterrows():
        ticker = eachrow[1].loc['Ticker']
        print(ticker)
        best_strategies.append([ticker, findBestSMA(ticker, begin_day, end_day)])
    two_best = {'First':['',[0,1]],'Second':['',[0,0]]}
    # compare each stock's best SMA strategy, update the two frontrunners
    for eachstrategy in best_strategies:
        if eachstrategy[1][1] > two_best['First'][1][1] > two_best['Second'][1][1]:
            # if current strat beats both best, replace first with current
            print(eachstrategy, ' replaces first ', two_best['First'])
            two_best['Second'] = two_best['First']
            two_best['First'] = eachstrategy
            continue
        elif two_best['First'][1][1] > eachstrategy[1][1] > two_best['Second'][1][1]:
            # if current strat only beats second best, replace second with current
            print(eachstrategy, ' replaces second ', two_best['Second'])
            two_best['Second'] = eachstrategy
    return two_best

## Now that we have the functions to find the 2 strategies, let's set up the main function with user inputs

In [7]:
def main():
    print('Given S&P 500 Industry and time period, find 2 best N-day SMA trading strategies\n')
    stock_list = pd.read_csv('https://github.com/FormanWilliam/Forman_Portfolio/raw/main/sp500.csv')
    industry_list = []
    for eachindustry in stock_list['Industry']:
        if (eachindustry not in industry_list):
            industry_list.append(eachindustry)
    print('List of valid industries: ', industry_list)
    industry = input('Enter industry: ')
    begin_day = input('Enter begin date (format YYYY-MM-DD): ')
    end_day = input('Enter end date (format YYYY-MM-DD): ')
    print(f'\nFinding 2 best strategies in {industry} industry between {begin_day} and {end_day}\nThis should take a minute, depending on industry and time period')
    strats = (findStrategy(industry, begin_day, end_day))
    total_profit = strats['First'][1][1] + strats['Second'][1][1]
    result = f'''
    Best strategy: {strats['First'][1][0]} day SMA strategy for ticker {strats['First'][0]} yields profits of ${strats['First'][1][1]}
    Second best strategy: {strats['Second'][1][0]} day SMA strategy for ticker {strats['Second'][0]} yields profits of ${strats['Second'][1][1]}
    Total Profit: {total_profit}
    '''
    return result

print(main())

Given S&P 500 Industry and time period, find 2 best N-day SMA trading strategies

List of valid industries:  ['Communication Services', 'Consumer Discretionary', 'Consumer Staples', 'Energy', 'Financials', nan, 'Health Care', 'Industrials', 'Information Technology', 'Materials', 'Real Estate', 'Utilities']
Enter industry: Energy
Enter begin date (format YYYY-MM-DD): 2021-01-01
Enter end date (format YYYY-MM-DD): 2021-04-01

Finding 2 best strategies in Energy industry between 2021-01-01 and 2021-04-01
This should take a minute, depending on industry and time period
APA
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
HAL
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%****

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
PSX
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%****