In [9]:

import pandas as pd
# 設定顯示的最大列數
pd.set_option('display.max_rows', None)

In [10]:


# 讀取使用者上傳的CSV檔案
file_path = 'C:/Users/aaa29/台科大/台科大富邦/上市上櫃半導體資本額.csv'
df = pd.read_csv(file_path)

In [11]:
#過濾實收資本額在10e-100e之間
df_filtered_bigsmall = df[df['實收資本額(元)']>=1000000000]
#df_filtered_bigsmall = df_filtered_bigsmall[10000000000>=df_filtered_bigsmall['實收資本額(元)']]

In [12]:
# 讀取2018-2022每季存貨營收筆的資料
file_path = 'C:/Users/aaa29/台科大/台科大富邦/上市上櫃半導體存貨營收比2018_2022.csv'
df_kutsen_2018_2022 = pd.read_csv(file_path)


# Calculating the standard deviation of '季底存貨/營收TSE' for each unique '代號'
df_kutsen_2018_2022 = df_kutsen_2018_2022.groupby('代號')['季底存貨/營收TSE'].agg(['mean', 'std']).reset_index()
df_kutsen_2018_2022.columns = ['代號', '平均','標準差']

# 讀取2023每季存貨營收筆的資料
file_path = 'C:/Users/aaa29/台科大/台科大富邦/上市上櫃半導體存貨營收比2023.csv'
df_kutsen_2023 = pd.read_csv(file_path)

# 合併資料框，基於 "代號"
df_kutsen = pd.merge(df_kutsen_2023, df_kutsen_2018_2022, on='代號', how='left')

# 計算差距，以標準差為單位表示
df_kutsen['差異標準差'] = (df_kutsen['季底存貨/營收TSE'] - df_kutsen['平均']) / df_kutsen['標準差']

df_kutsen

Unnamed: 0,代號,名稱,年/月,季底存貨/營收TSE,平均,標準差,差異標準差
0,1560,中砂,2023/09,117.44,99.384,12.150703,1.486005
1,1560,中砂,2023/06,120.15,99.384,12.150703,1.709037
2,1560,中砂,2023/03,114.37,99.384,12.150703,1.233344
3,2303,聯電,2023/09,64.06,47.2675,5.762904,2.913896
4,2303,聯電,2023/06,61.38,47.2675,5.762904,2.448852
5,2303,聯電,2023/03,60.28,47.2675,5.762904,2.257976
6,2329,華泰,2023/09,33.83,39.8515,6.603154,-0.911913
7,2329,華泰,2023/06,41.98,39.8515,6.603154,0.322346
8,2329,華泰,2023/03,46.78,39.8515,6.603154,1.049271
9,2330,台積電,2023/09,47.94,37.8525,6.88702,1.464712


In [22]:
# 根據提供的規則來標註庫存狀態
def categorize_inventory(diff_std):
    if diff_std >= 2:
        return '庫存過高'
    elif 1 <= diff_std < 2:
        return '庫存偏高'
    elif -1 <= diff_std < 1:
        return '庫存健康'
    elif -2 <= diff_std < -1:
        return '庫存偏低'
    else:
        return '庫存過低'

# 套用庫存狀態分類
df_kutsen['庫存狀態'] = df_kutsen['差異標準差'].apply(categorize_inventory)

# 建立庫存狀態的排序等級
inventory_status_order = {
    '庫存過高': 5,
    '庫存偏高': 4,
    '庫存健康': 3,
    '庫存偏低': 2,
    '庫存過低': 1
}

# 將庫存狀態轉換為數字等級
df_kutsen['庫存狀態等級'] =df_kutsen['庫存狀態'].map(inventory_status_order)

# 撈出 "庫存狀態等級" 遞增
strictly_increasing_stocks =df_kutsen.groupby('代號')['庫存狀態等級'].apply(
    lambda x: x.is_monotonic_increasing and len(x.unique()) > 1
)

# 選取符合條件的股票
strictly_increasing_stocks_df =df_kutsen[df_kutsen['代號'].isin(strictly_increasing_stocks[strictly_increasing_stocks].index)]


# 撈出 "差異標準差" 全部落在庫存健康的範疇
healthy_stocks = df_kutsen.groupby('代號')['庫存狀態'].apply(lambda x: (x == '庫存健康').all())
healthy_stocks_df = df_kutsen[df_kutsen['代號'].isin(healthy_stocks[healthy_stocks].index)]

#合併健康跟緩降
kuratio_final_df = pd.concat([strictly_increasing_stocks_df, healthy_stocks_df])

# 按代號將相同股票資料合併到同一行
kuratio_final_df_pivot = kuratio_final_df.pivot_table(
    index=['代號', '名稱'], 
    columns='年/月', 
    values=['季底存貨/營收TSE', '庫存狀態'],
    aggfunc='first'
).reset_index()

# 調整欄位名稱
kuratio_final_df_pivot.columns = [f"{j}_{i}" if j else i for i, j in kuratio_final_df_pivot.columns]

In [23]:
# 進行合併，基於 "代號" #第一步第二步
first_second_step_df = pd.merge(df_filtered_bigsmall, kuratio_final_df_pivot, on='代號', how='left')
first_second_step_df = first_second_step_df.dropna()
first_second_step_df

Unnamed: 0,代號,名稱_x,實收資本額(元),名稱_y,2023/03_季底存貨/營收TSE,2023/06_季底存貨/營收TSE,2023/09_季底存貨/營收TSE,2023/03_庫存狀態,2023/06_庫存狀態,2023/09_庫存狀態
3,3711,日月光投控,44120640000.0,日月光投控,65.45,56.92,49.92,庫存過高,庫存偏高,庫存健康
6,2337,旺宏,18558260000.0,旺宏,210.58,191.47,184.11,庫存偏高,庫存健康,庫存健康
8,2454,聯發科,16016940000.0,聯發科,72.41,62.23,48.49,庫存過高,庫存偏高,庫存健康
9,2449,京元電子,12227450000.0,京元電子,16.56,14.67,13.43,庫存健康,庫存健康,庫存健康
10,2329,華泰,8305783000.0,華泰,46.78,41.98,33.83,庫存偏高,庫存健康,庫存健康
11,6239,力成,7591466000.0,力成,62.13,50.47,41.22,庫存過高,庫存過高,庫存偏高
13,8150,南茂,7272401000.0,南茂,62.74,52.14,48.44,庫存過高,庫存偏高,庫存健康
15,3034,聯詠,6085115000.0,聯詠,44.05,34.71,31.51,庫存健康,庫存健康,庫存偏低
16,2401,凌陽,5919949000.0,凌陽,168.3,125.11,103.96,庫存過高,庫存偏高,庫存健康
18,2441,超豐,5688459000.0,超豐,41.82,34.32,31.22,庫存偏高,庫存偏高,庫存健康


In [26]:
# Assuming your DataFrame is named 'df'
# Extract the '代號' column into a list
code_list = first_second_step_df['代號'].tolist()

# Display the list
print(code_list)
len(code_list)

[3711, 2337, 2454, 2449, 2329, 6239, 8150, 3034, 2401, 2441, 2388, 2379, 2363, 2451, 3105, 4919, 3707, 2369, 6789, 2458, 3016, 3006, 3374, 3035, 2338, 3545, 6271, 2455, 2351, 2436, 3372, 5471, 6526, 3014, 3227, 3265, 3443, 6525, 4961, 8016, 8261, 4991, 4952, 6451, 5285]


45

In [27]:
# step3
file_path = 'C:/Users/aaa29/台科大/台科大富邦/上市上櫃半導體篩選後毛利2023.csv'
df_moali = pd.read_csv(file_path)
df_moali

from scipy.stats import gmean

# 計算已實現銷貨毛利成長率的幾何平均，並排序
df_moali_geom_avg = df_moali.groupby('代號')['已實現銷貨毛利成長率(%)'].agg(gmean).reset_index()
df_moali_geom_avg = df_moali_geom_avg.sort_values(by='已實現銷貨毛利成長率(%)', ascending=False).head(10)

# 合併原始名稱以便顯示
df_moali_geom_avg = df_moali_geom_avg.merge(df_moali[['代號', '名稱']].drop_duplicates(), on='代號', how='left')

  log_a = np.log(a)


In [28]:
df_moali_geom_avg

Unnamed: 0,代號,已實現銷貨毛利成長率(%),名稱
0,6451,2.90465,訊芯-KY
1,3014,1.249876,聯陽
2,3443,1.100161,創意
3,3227,1.016706,原相
4,2451,0.942099,創見
5,2329,0.941869,華泰
6,2338,0.904973,光罩
7,2351,0.900141,順德
8,3034,0.87165,聯詠
9,2388,0.838922,威盛
