# Top ten index constituents - representative and worth investing

In [1]:
import pandas as pd

def preprocess_stock_data(file_path):
    # 读取CSV文件
    stock_data = pd.read_csv(file_path)

    # 删除价格数据中的$符号并转换为数值类型
    price_columns = ['Close/Last', 'Open', 'High', 'Low']
    for col in price_columns:
        stock_data[col] = stock_data[col].replace('[\$,]', '', regex=True).astype(float)

    # 将日期列转换为日期类型
    stock_data['Date'] = pd.to_datetime(stock_data['Date'])

    # 按日期升序排序
    stock_data = stock_data.sort_values(by='Date')

    # 输出预处理后的数据
    return stock_data

           Date  Close/Last    Volume    Open     High     Low
2515 2014-06-24       25.34    243530   25.52    25.98   25.30
2514 2014-06-25       25.27    491864   25.24    25.49   24.57
2513 2014-06-26       25.45    342669   26.00    26.00   25.21
2512 2014-06-27       24.69   1482237   25.24    25.41   24.69
2511 2014-06-30       25.27    648122   24.69    25.39   24.61
...         ...         ...       ...     ...      ...     ...
4    2024-06-14      844.54   6598781  846.93   872.00  825.61
3    2024-06-17      887.41   8911764  857.40   921.91  828.09
2    2024-06-18      920.01   8762073  897.00   938.88  883.31
1    2024-06-20      917.64  13432940  961.29  1014.02  900.32
0    2024-06-21      905.26   7465581  900.10   917.95  863.22

[2516 rows x 6 columns]


## SMCI, Technology

In [5]:
# 只需要改成升序
smci = pd.read_excel('data/SMCI_merged_data.xlsx')
smci['Date'] = pd.to_datetime(smci['Date'])
# 按日期升序排序
smci_sorted = smci.sort_values(by='Date')

In [6]:
smci_sorted.head()

Unnamed: 0,Date,DPRIME,Close,Volume,Open,High,Low
2515,2014-06-24,3.25,25.34,243530,25.52,25.98,25.303
2514,2014-06-25,3.25,25.27,491864,25.24,25.49,24.57
2513,2014-06-26,3.25,25.45,342669,26.0,26.0,25.21
2512,2014-06-27,3.25,24.69,1482237,25.24,25.405,24.69
2511,2014-06-30,3.25,25.27,648122,24.69,25.39,24.61


In [7]:
# 保存预处理后的数据为CSV文件
smci_sorted.to_csv('data/smci_sorted.csv', index=False)

## MI, technology - 06/25/2019

In [9]:
# 调用函数并传入文件路径
Microstrategy_sorted = preprocess_stock_data('data/Microstrategy.csv')
Microstrategy_sorted.head()

Unnamed: 0,Date,Close/Last,Volume,Open,High,Low
1257,2019-06-25,140.16,114721,141.55,141.55,139.1
1256,2019-06-26,139.12,83074,140.02,140.3,138.72
1255,2019-06-27,141.03,81309,139.29,142.1,139.29
1254,2019-06-28,143.31,457484,141.5,143.93,140.03
1253,2019-07-01,140.03,169495,143.0,144.31,139.8


In [10]:
# 保存预处理后的数据为CSV文件
Microstrategy_sorted.to_csv('data/Microstrategy_sorted.csv', index=False)

## CSUI, industrial

In [11]:
# 调用函数并传入文件路径
Comfort_sorted = preprocess_stock_data('data/Comfort.csv')
# 保存预处理后的数据为CSV文件
Comfort_sorted.to_csv('data/Comfort_sorted.csv', index=False)
Comfort_sorted.head()

Unnamed: 0,Date,Close/Last,Volume,Open,High,Low
1257,2019-06-25,50.24,170809,49.97,50.64,49.73
1256,2019-06-26,50.13,134936,50.43,50.43,49.84
1255,2019-06-27,50.63,223288,50.21,50.7614,50.19
1254,2019-06-28,50.99,430571,50.83,51.91,50.83
1253,2019-07-01,51.19,178970,51.72,51.92,50.73


## OII, technology

In [12]:
# 调用函数并传入文件路径
onto_sorted = preprocess_stock_data('data/Onto.csv')
# 保存预处理后的数据为CSV文件
onto_sorted.to_csv('data/onto_sorted.csv', index=False)
onto_sorted.head()

Unnamed: 0,Date,Close/Last,Volume,Open,High,Low
1257,2019-06-25,33.51,424234,34.53,34.6,33.38
1256,2019-06-26,33.82,213329,33.92,34.33,33.66
1255,2019-06-27,35.45,680218,33.8,35.81,33.79
1254,2019-06-28,34.71,1197134,35.18,35.695,34.08
1253,2019-07-01,35.27,297535,35.27,35.5,35.02


## CC, consumer discretionary

In [13]:
# 调用函数并传入文件路径
Carvana_sorted = preprocess_stock_data('data/Carvana.csv')
# 保存预处理后的数据为CSV文件
Carvana_sorted.to_csv('data/Carvana_sorted.csv', index=False)
Carvana_sorted.head()

Unnamed: 0,Date,Close/Last,Volume,Open,High,Low
1257,2019-06-25,60.31,1388919,62.52,63.39,59.37
1256,2019-06-26,59.07,958220,60.94,61.67,58.9
1255,2019-06-27,61.98,893364,59.46,62.28,59.46
1254,2019-06-28,62.59,4044301,61.99,63.47,61.6
1253,2019-07-01,65.61,1364706,63.74,65.9,62.175


## EBI, consumer discretionary

In [14]:
# 调用函数并传入文件路径
Elf_sorted = preprocess_stock_data('data/Elf.csv')
# 保存预处理后的数据为CSV文件
Elf_sorted.to_csv('data/Elf_sorted.csv', index=False)
Elf_sorted.head()

Unnamed: 0,Date,Close/Last,Volume,Open,High,Low
1257,2019-06-25,13.89,772829,13.11,14.02,13.02
1256,2019-06-26,13.81,844401,13.85,14.1,13.65
1255,2019-06-27,14.21,824251,13.89,14.39,13.725
1254,2019-06-28,14.1,1737793,14.25,14.44,13.95
1253,2019-07-01,14.25,563042,14.27,14.63,13.81


## Fabrinet, Technology

In [15]:
# 调用函数并传入文件路径
Fabrinet_sorted = preprocess_stock_data('data/Fabrinet.csv')
# 保存预处理后的数据为CSV文件
Fabrinet_sorted.to_csv('data/Fabrinet_sorted.csv', index=False)
Fabrinet_sorted.head()

Unnamed: 0,Date,Close/Last,Volume,Open,High,Low
1257,2019-06-25,47.53,335199,48.66,49.075,47.42
1256,2019-06-26,48.71,576316,48.16,49.49,47.93
1255,2019-06-27,49.91,213334,49.26,49.94,48.61
1254,2019-06-28,49.67,1548922,49.95,50.55,49.31
1253,2019-07-01,51.95,524524,51.8,52.86,51.35


## LNW, consumer discretionary

In [16]:
# 调用函数并传入文件路径
LNW_sorted = preprocess_stock_data('data/LNW.csv')
# 保存预处理后的数据为CSV文件
LNW_sorted.to_csv('data/LNW_sorted.csv', index=False)
LNW_sorted.head()

Unnamed: 0,Date,Close/Last,Volume,Open,High,Low
1257,2019-06-25,18.25,1988809,18.13,18.78,17.91
1256,2019-06-26,18.34,1051399,18.24,18.75,18.02
1255,2019-06-27,18.71,2053872,18.38,18.945,18.27
1254,2019-06-28,19.82,2729475,18.74,19.99,18.66
1253,2019-07-01,19.74,1666701,20.19,20.9,19.66


## WIP, energy -  start from 01/09/2020 - not selected

## ANF, consumer discretionary

In [17]:
# 调用函数并传入文件路径
ANF_sorted = preprocess_stock_data('data/ANF.csv')
# 保存预处理后的数据为CSV文件
ANF_sorted.to_csv('data/ANF_sorted.csv', index=False)
ANF_sorted.head()

Unnamed: 0,Date,Close/Last,Volume,Open,High,Low
1257,2019-06-25,15.36,3591536,16.19,16.22,15.25
1256,2019-06-26,15.82,3024393,15.47,15.975,15.39
1255,2019-06-27,15.85,1715295,15.95,15.95,15.61
1254,2019-06-28,16.04,3292135,15.94,16.3,15.85
1253,2019-07-01,16.07,2519477,16.35,16.53,15.81
