In [None]:
!pip install tqdm

In [None]:
!pip install yfinance

In [None]:
import yfinance as yf
import pandas as pd

# Get a list of all equity tickers
equities = pd.read_csv('http://ftp.nasdaqtrader.com/dynamic/SymDir/nasdaqtraded.txt', delimiter='|')
equities = equities[equities['Test Issue'] == 'N']  # Exclude test issues

ticker_list = equities['NASDAQ Symbol'].tolist()

string_list = [str(item) for item in ticker_list]

print(string_list)

In [None]:
import yfinance as yf
import pandas as pd

# Fetch the historical daily data for the symbols
data = yf.download(string_list, start='2021-01-01', end='2023-05-31')

yahoo_data = spark.createDataFrame(data)

# Print the retrieved data
print(data)

In [None]:
yahoo_data = spark.createDataFrame(data)


In [None]:
print(data)
data.index = pd.to_datetime(data.index)
signals = spark.read.table('kash.signal')

signals = signals.toPandas()

signals['Date'] = pd.to_datetime(signals['Date'])

df = data
df.columns = pd.MultiIndex.from_tuples(df.columns)

# Transpose the second level of the multi-index column onto the row index
df = df.stack(level=1)

df['Symbol'] = df.index.get_level_values(1)

all_data = df.merge(signals, on='Date', how='inner')

all_data['Signal'] = all_data['Signal'].replace(2, -1)
# Print the updated DataFrame
print(all_data)

In [None]:
all_data['Change'] = (all_data['Close'] - all_data['Open']) / all_data['Open']

all_data['Return'] = (all_data['Change'] * all_data['Signal']) + 1


print(all_data)

In [None]:
daily_return_data = all_data[['Date', 'Symbol', 'Return']]
daily_return_data['Return'] = (daily_return_data['Return'] - 1) * 100

daily_return_data.set_index('Date', inplace=True)

df_pivoted = daily_return_data.pivot(columns='Symbol', values='Return')

df_pivoted = df_pivoted.reset_index()

db_df = spark.createDataFrame(df_pivoted)

db_df.createOrReplaceTempView('results')

print(df_pivoted)

In [None]:
%sql CREATE OR REPLACE TABLE alpha_two.YAHOO_ALL_DAILY_RETURNS as select * from results

In [None]:
all_data['Hold_Return'] = all_data['Change'] + 1

all_data = all_data.sort_values('Date')

all_data['Cumulative_Return'] = all_data.groupby('Symbol')['Return'].cumprod()
all_data['Cumulative_Hold'] = all_data.groupby('Symbol')['Hold_Return'].cumprod()

all_data['ExcessReturn'] = all_data['Cumulative_Return'] - all_data['Cumulative_Hold']

info = all_data[['Date', 'Symbol', 'Cumulative_Return', 'Cumulative_Hold', 'ExcessReturn']]

print(info)

In [None]:
db_df = spark.createDataFrame(info)

db_df.createOrReplaceTempView('results')

In [None]:
%sql CREATE OR REPLACE TABLE alpha_two.YAHOO_ALL_DAILY_EXCESS_RETURNS as select * from results

In [None]:
from tqdm import tqdm
def get_stock_details(ticker):
    try:
        stock = yf.Ticker(ticker)
        return {
            'Ticker': ticker,
            'Industry': stock.info.get('industry'),
            'Sector': stock.info.get('sector'),
            'MarketCap': stock.info.get('marketCap'),
            'Market': stock.info.get('exchange')
        }
    except Exception:
        print("Exception")
        return {
            'Ticker': ticker,
            'Industry': None,
            'Sector': None,
            'MarketCap': None,
            'Market': None
        }

tqdm.pandas()  # Enable progress bar for pandas apply
unique_values = info['Symbol'].unique()

unique_df = pd.DataFrame({'Symbol': unique_values})

unique_df['Symbol'] = unique_df['Symbol'].progress_apply(get_stock_details)

details_df = unique_df['Symbol'].progress_apply(pd.Series)

merged_df = pd.concat([unique_df, details_df], axis=1)

In [None]:

all = pd.merge(info, merged_df[['Ticker', 'Industry', 'Sector', 'MarketCap', 'Market']], left_on='Symbol', right_on='Ticker', how='inner')

print(all)

In [None]:
db_df = spark.createDataFrame(all)

db_df.createOrReplaceTempView('results')

In [None]:
%sql CREATE OR REPLACE TABLE alpha_two.ALL_SIGNAL_RETURNS_WITH_FUNDAMENTALS as select * from results

In [None]:
#Only needed if column names need to be cleaned

import re
# Function to remove invalid characters and spaces from column names
def clean_column_name(column_name):
    # Remove invalid characters using regular expression pattern
    cleaned_name = re.sub(r'[^a-zA-Z0-9_]', '', column_name)
    # Remove leading and trailing spaces
    cleaned_name = cleaned_name.strip()
    return cleaned_name

# Clean column names using the clean_column_name function
all_data.columns = all_data.columns.map(clean_column_name)

# Print the DataFrame with cleaned column names
print(all_data)

db_df = spark.createDataFrame(all_data)

db_df.createOrReplaceTempView('results')