# Temporal Data

In [63]:
import pandas as pd

from os import listdir

In [64]:
# Set the stock market (NASDAQ or NYSE)
market = "NYSE"

## Get list of stocks

In [65]:
path = "../Temporal_Relational_Stock_Ranking/data/2013-01-01"
prefix = "{}_".format(market)
suffix = "_1.csv"
stock_list = [f.removeprefix(prefix).removesuffix(suffix) for f in listdir(path) if ".csv" in f and market in f]
stock_list.sort()
print(len(stock_list))
print(stock_list[:5])

1769
['A', 'AAN', 'AAP', 'AAT', 'AB']


## Generate DataFrame

In [66]:
# generate dataframe function
def generate_df(stock):
    columns = ["day", "open", "high", "low", "close", "volume"]
    df = pd.read_csv("{}/{}_{}_1.csv".format(path, market, stock), header=None, names=columns)
    df["day"] = df["day"].astype(int)
    df = df.drop(df.tail(1).index) # remove last line (negative values)
    df["tic"] = stock # set tic list
    return df

In [67]:
# First subdataframe
final_df = generate_df(stock_list[0])
final_df

Unnamed: 0,day,open,high,low,close,volume,tic
0,0,0.578496,0.580309,0.567871,0.555112,0.593285,A
1,1,0.581725,0.581215,0.570633,0.557246,0.595410,A
2,2,0.588015,0.584431,0.573608,0.560353,0.607168,A
3,3,0.595722,0.586117,0.576604,0.563522,0.602777,A
4,4,0.599320,0.587760,0.579721,0.566039,0.597960,A
...,...,...,...,...,...,...,...
1239,1239,0.981782,0.982051,0.971172,0.966818,0.973084,A
1240,1240,0.973084,0.978439,0.969663,0.966341,0.938377,A
1241,1241,0.959598,0.972588,0.967970,0.965779,0.932568,A
1242,1242,0.951665,0.966893,0.966298,0.965104,0.933418,A


In [68]:
# other dataframes
for i in range(1, len(stock_list)):
    df = generate_df(stock_list[i])
    final_df = pd.concat([final_df, df])
final_df = final_df.sort_values(by=["day", "tic"])
final_df

Unnamed: 0,day,open,high,low,close,volume,tic
0,0,0.578496,0.580309,0.567871,0.555112,0.593285,A
0,0,0.586496,0.589924,0.593711,0.598892,0.603702,AAN
0,0,0.357611,0.357296,0.359589,0.367068,0.361763,AAP
0,0,0.602742,0.597432,0.592057,0.594226,0.620892,AAT
0,0,0.535432,0.529228,0.526605,0.533735,0.555247,AB
...,...,...,...,...,...,...,...
1243,1243,0.961859,0.964495,0.965738,0.968085,0.961481,ZB-A
1243,1243,0.864738,0.864625,0.852930,0.857227,0.862424,ZBH
1243,1243,0.798242,0.798556,0.790556,0.791843,0.797866,ZF
1243,1243,0.703997,0.713644,0.680562,0.652618,0.699005,ZNH


## Save DataFrame

In [69]:
final_df.to_csv("../temporal_data/{}_temporal_data.csv".format(market), index=False)