Group 22
Rosolino Mangano, Simran Abbas, Zon Ahmed

In [None]:
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics

# Get data from Yahoo Finance
def getStockData(tickerSymbol, startDate, endDate):
    stockData = yf.download(tickerSymbol, startDate, endDate, progress=False)
    
    if stockData.empty:
        print(f"No data available for Ticker Symbol <{tickerSymbol}> in the specified date range.")
        return None
    
    # Just get the daily price (closing or adj closing). Included both
    selectedData = stockData[["Close", "Adj Close"]]
    return selectedData

# Get user input
def getUserInput():
    while True:
        tickerSymbol = input("Enter the ticker symbol: ")
        if not tickerSymbol:
            return None
        # Validate the ticker by checking if info can be retrieved for it
        try:
            yf.Ticker(tickerSymbol).info
            return tickerSymbol
        except:
            print(f"Unable to find data for Ticker Symbol: <{tickerSymbol}>. Try again (Example Ticker: AAPL for Apple)")
            print("Press Enter to exit...")

# Display table
def displayTable(stockData, count, ticker = 'S&P 500'):
    print("--------------------------------------------------------")
    print(f"First Five Day Data for {ticker.upper()}")
    display(stockData)
    print(f"Total Count: {count}")

# Calculate daily percentage change
def calculateDailyPercentChange(data):
    return data.pct_change().dropna()

# Plot daily percentage change
def plotDailyPercentChange(data, tickerSymbol):
    print("--------------------------------------------------------")
    totalCount = len(data)
    
    # Try to dynamically set the size so the x-axis does not like a complete mess
    # But a year worth of data still makes the graph hard to read
    plotWidth = max(10, totalCount * 0.2)
    plotHeight = max(6, plotWidth * 0.5)  
    plt.figure(figsize=(plotWidth, plotHeight))
    
    # Plot the data
    ax = data.plot(kind='bar', title=f'Daily % Change for {tickerSymbol}')
    
    # Remove the time from the date
    ax.set_xticklabels([x.strftime('%Y-%m-%d') for x in data.index])
    
    # Code to show the x-axis in steps of 5 to reduce clutter
#     xTicksIndices = range(0, totalCount, 5)
#     plt.xticks(xTicksIndices, rotation=45)
    
    # Change font size of the titles
    ax.title.set_fontsize(max(14, plotWidth))
    ax.set_xlabel('Date', fontsize=max(12,plotWidth*0.8))
    ax.set_ylabel('Daily % Change', fontsize=max(12,plotHeight*0.9))
    
    # Change the font size of the y axis values
    ax.tick_params(axis='y', labelsize=max(10, plotHeight))
    
    plt.show()
    
# Display statistics
def displayStatistics(data, label):
    print("--------------------------------------------------------")
    print(f"Statistics for {label}:")
    print("Mean:", data.mean())
    print("Standard Deviation:", data.std())
    print("Variance:", data.var())
    print("Min:", data.min())
    print("Max:", data.max())

def performRegressionAnalysis(assetData, spData, tickerSymbol):
    
    # X should be a 2D array representing the daily % change for the asset
    x = assetData.values.reshape(-1, 1)

    # y should be a 1D array representing the daily % change for S&P 500
    y = spData.values
    
    # Split the data into training and testing sets
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)

    # Create a linear regression model and fit it to the training data
    model = LinearRegression()
    model.fit(x_train, y_train)

    # Make predictions on the test set
    y_pred = model.predict(x_test)

    # Display regression metrics
    print("--------------------------------------------------------")
    print('Intercept:', model.intercept_)
    print('Coefficient:', model.coef_[0])
    print('R-squared:', metrics.r2_score(y_test, y_pred))
    print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))
    print("--------------------------------------------------------")

    # Plot the regression line
    plt.scatter(x_test, y_test, color = 'black')
    plt.plot(x_test, y_pred, color= 'blue', linewidth = 3)
    plt.title('Linear Regression Analysis')
    plt.xlabel(f'Daily % Change for {tickerSymbol}')
    plt.ylabel('Daily % Change for S&P 500')
    plt.show()

    # Check if positively or negatively correlated
    if model.coef_[0] > 0:
        print(f'{tickerSymbol} is positively correlated with the S&P 500.')
    elif model.coef_[0] < 0:
        print(f'{tickerSymbol} is negatively correlated with the S&P 500.')
    else:
        print(f'{tickerSymbol} is not correlated with the S&P 500.')
    print("--------------------------------------------------------")
    
def main():
    # Get user input
    tickerSymbol = getUserInput()
    # Stop when user decides to exit instead of entering a valid symbol
    if tickerSymbol is None:
        return 
    
    # Manually set the date
    startDate = '2017-01-01'
    endDate = '2017-12-31'
    
    # Get stock data
    userStockData = getStockData(tickerSymbol, startDate, endDate)
    # Stop when the date range has no data
    if userStockData is None:
        return
    spStockData = getStockData("^GSPC", startDate, endDate)
    
    # Display the tables
    displayTable(userStockData.head(), len(userStockData), tickerSymbol)
    displayTable(spStockData.head(), len(spStockData))
    
    # Calculate daily percentage change (using only Closing)
    userDailyPercentChange = calculateDailyPercentChange(userStockData['Close'])
    spDailyPercentChange = calculateDailyPercentChange(spStockData['Close'])
    
    # Plot daily percentage change
    plotDailyPercentChange(userDailyPercentChange, tickerSymbol)

    # Display statistics for both the asset and S&P 500
    displayStatistics(userDailyPercentChange, tickerSymbol)
    displayStatistics(spDailyPercentChange, 'S&P 500')
    
    # Perform linear regression analysis
    performRegressionAnalysis(userDailyPercentChange, spDailyPercentChange, tickerSymbol)
    
main()