In [1]:
import pandas as pd
import datetime as dt
import csv
import numpy as np

In [2]:
# Read the stock price data from a CSV file
stock_price_all_day = pd.read_csv("/Users/elaine/Desktop/side_project/stock_price/stock_price_all_day.csv",
                     usecols=["Stock_Code","Date","Trade_Volume","Trade_Value","Opening_Price","Highest_Price","Lowest_Price","Closing_Price","Change","Transaction"],
                     encoding='utf-8', dtype={"Trade_Value":int})

In [3]:
# Add moving average columns to the DataFrame
stock_price_all_day['moving_average_5'] = stock_price_all_day.groupby('Stock_Code')['Closing_Price'].transform(lambda x: x.rolling(5).mean())
stock_price_all_day["moving_average_10"] = stock_price_all_day.groupby('Stock_Code')['Closing_Price'].transform(lambda x: x.rolling(10).mean())
stock_price_all_day

Unnamed: 0,Stock_Code,Date,Trade_Volume,Trade_Value,Opening_Price,Highest_Price,Lowest_Price,Closing_Price,Change,Transaction,moving_average_5,moving_average_10
0,2002,2012-1-2,6819530.0,195212661,28.80,28.85,28.50,28.55,-0.25,2737.0,,
1,2002,2012-1-3,22624652.0,654596058,28.80,29.00,28.70,29.00,0.45,6031.0,,
2,2002,2012-1-4,16452869.0,476519401,29.00,29.10,28.80,29.00,0,4604.0,,
3,2002,2012-1-5,17206110.0,497876490,28.90,29.00,28.85,29.00,0,3923.0,,
4,2002,2012-1-6,14229626.0,411580354,29.00,29.00,28.80,29.00,0,3698.0,28.91,
...,...,...,...,...,...,...,...,...,...,...,...,...
225711,2353,2017-1-18,10280315.0,144150610,14.00,14.15,13.90,14.05,0,2752.0,13.70,13.530
225712,2353,2017-1-19,13765714.0,194438697,14.05,14.25,13.95,14.20,0.15,3654.0,13.84,13.615
225713,2353,2017-1-20,19397952.0,281429076,14.20,14.70,14.15,14.50,0.3,5228.0,14.05,13.720
225714,2353,2017-1-23,14742665.0,215420885,14.65,14.70,14.50,14.65,0.15,3768.0,14.29,13.855


In [12]:
#stock_price_all_day[stock_price_all_day["Stock_Code"] == 2412]

# Calculate the KD lines
#Step 1: Calculate RSV - (Today's closing price - Lowest price in the past 9 days) / (Highest price in the past 9 days - Lowest price in the past 9 days)
#Step 2: Calculate K - K = 2/3 * (Yesterday's K value) + 1/3 * (Today's RSV)
#Step 3: Calculate D - D = 2/3 * (Yesterday's D value) + 1/3 * (Today's K)

def KD(data):
    # Create a copy of the data DataFrame
    data_df = data.copy()
    
    # Calculate the minimum and maximum values for the past 9 days
    data_df['min'] = data_df["Lowest_Price"].rolling(9).min()
    data_df['max'] = data_df["Highest_Price"].rolling(9).max()
    
    # Calculate RSV
    data_df['RSV'] = (data_df["Closing_Price"] - data_df['min'])/(data_df['max'] - data_df['min'])
    
    # Drop rows with NaN values
    data_df = data_df.dropna()
    
    # Calculate K values
    # Initialize K with a value of 50
    K_list = [50]
    for num,rsv in enumerate(list(data_df['RSV'])):
        K_yestarday = K_list[num]
        K_today = 2/3 * K_yestarday + 1/3 * rsv
        K_list.append(K_today)
    data_df['K'] = K_list[1:]
    
    # Calculate D values
    # Initialize D with a value of 50
    D_list = [50]
    for num,K in enumerate(list(data_df['K'])):
        D_yestarday = D_list[num]
        D_today = 2/3 * D_yestarday + 1/3 * K
        D_list.append(D_today)
    data_df['D'] = D_list[1:]
    
    # Merge the calculated K and D values with the original DataFrame
    use_df = pd.merge(data,data_df[['K','D','RSV']],left_index=True,right_index=True,how='left')
    return use_df 

In [13]:
stock_indicator_summary = stock_price_all_day.groupby('Stock_Code', group_keys=False).apply(KD)

In [14]:
stock_indicator_summary.head(20)

Unnamed: 0,Stock_Code,Date,Trade_Volume,Trade_Value,Opening_Price,Highest_Price,Lowest_Price,Closing_Price,Change,Transaction,moving_average_5,moving_average_10,K,D,RSV
0,2002,2012-1-2,6819530.0,195212661,28.8,28.85,28.5,28.55,-0.25,2737.0,,,,,
1,2002,2012-1-3,22624652.0,654596058,28.8,29.0,28.7,29.0,0.45,6031.0,,,,,
2,2002,2012-1-4,16452869.0,476519401,29.0,29.1,28.8,29.0,0.0,4604.0,,,,,
3,2002,2012-1-5,17206110.0,497876490,28.9,29.0,28.85,29.0,0.0,3923.0,,,,,
4,2002,2012-1-6,14229626.0,411580354,29.0,29.0,28.8,29.0,0.0,3698.0,28.91,,,,
5,2002,2012-1-9,9747591.0,281650039,29.0,29.0,28.75,29.0,0.0,3379.0,29.0,,,,
6,2002,2012-1-10,22419860.0,649584515,29.0,29.0,28.9,29.0,0.0,6430.0,29.0,,,,
7,2002,2012-1-11,11095428.0,321173962,29.0,29.0,28.9,28.95,-0.05,4032.0,28.99,,,,
8,2002,2012-1-12,24075904.0,696418517,28.95,29.0,28.85,28.95,0.0,5360.0,28.98,,,,
9,2002,2012-1-13,38016945.0,1101096317,28.95,29.05,28.9,28.95,0.0,8825.0,28.97,28.94,33.541667,44.513889,0.625


In [15]:
# Check for null values in the DataFrame
stock_indicator_summary.isnull()

Unnamed: 0,Stock_Code,Date,Trade_Volume,Trade_Value,Opening_Price,Highest_Price,Lowest_Price,Closing_Price,Change,Transaction,moving_average_5,moving_average_10,K,D,RSV
0,False,False,False,False,False,False,False,False,False,False,True,True,True,True,True
1,False,False,False,False,False,False,False,False,False,False,True,True,True,True,True
2,False,False,False,False,False,False,False,False,False,False,True,True,True,True,True
3,False,False,False,False,False,False,False,False,False,False,True,True,True,True,True
4,False,False,False,False,False,False,False,False,False,False,False,True,True,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
225711,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
225712,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
225713,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
225714,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [16]:
# Print the updated DataFrame
stock_indicator_summary

Unnamed: 0,Stock_Code,Date,Trade_Volume,Trade_Value,Opening_Price,Highest_Price,Lowest_Price,Closing_Price,Change,Transaction,moving_average_5,moving_average_10,K,D,RSV
0,2002,2012-1-2,6819530.0,195212661,28.80,28.85,28.50,28.55,-0.25,2737.0,,,,,
1,2002,2012-1-3,22624652.0,654596058,28.80,29.00,28.70,29.00,0.45,6031.0,,,,,
2,2002,2012-1-4,16452869.0,476519401,29.00,29.10,28.80,29.00,0,4604.0,,,,,
3,2002,2012-1-5,17206110.0,497876490,28.90,29.00,28.85,29.00,0,3923.0,,,,,
4,2002,2012-1-6,14229626.0,411580354,29.00,29.00,28.80,29.00,0,3698.0,28.91,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
225711,2353,2017-1-18,10280315.0,144150610,14.00,14.15,13.90,14.05,0,2752.0,13.70,13.530,0.741805,0.531337,0.894737
225712,2353,2017-1-19,13765714.0,194438697,14.05,14.25,13.95,14.20,0.15,3654.0,13.84,13.615,0.811997,0.624891,0.952381
225713,2353,2017-1-20,19397952.0,281429076,14.20,14.70,14.15,14.50,0.3,5228.0,14.05,13.720,0.830220,0.693334,0.866667
225714,2353,2017-1-23,14742665.0,215420885,14.65,14.70,14.50,14.65,0.15,3768.0,14.29,13.855,0.875702,0.754123,0.966667


In [17]:
stock_indicator_summary.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 225716 entries, 0 to 225715
Data columns (total 15 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   Stock_Code         225716 non-null  int64  
 1   Date               225716 non-null  object 
 2   Trade_Volume       225716 non-null  float64
 3   Trade_Value        225716 non-null  int64  
 4   Opening_Price      225716 non-null  float64
 5   Highest_Price      225716 non-null  float64
 6   Lowest_Price       225716 non-null  float64
 7   Closing_Price      225716 non-null  float64
 8   Change             225716 non-null  object 
 9   Transaction        225716 non-null  float64
 10  moving_average_5   225376 non-null  float64
 11  moving_average_10  224951 non-null  float64
 12  K                  224951 non-null  float64
 13  D                  224951 non-null  float64
 14  RSV                224951 non-null  float64
dtypes: float64(11), int64(2), object(2)
memory usage: 3

In [18]:
stock_indicator_summary.to_csv("/Users/elaine/Desktop/side_project/stock_price/stock_indicator_summary.csv", index=False)

In [None]:
#grouped_df = stock_price_all_day.groupby('Stock_Code').apply(KD)

In [None]:
#test2002 = stock_price_all_day[stock_price_all_day["Stock_Code"] == 2002].reset_index(drop=True)
#test2002_summary = KD(test2002)
#test2002_summary.head(15)

In [None]:
#stock_price_all_day[stock_price_all_day["Stock_Code"] == 2412]
#test = stock_price_all_day[stock_price_all_day["Stock_Code"] == 2412].reset_index(drop=True)

In [None]:
# Apply the KD function to the stock_price_all_day DataFrame
#stock_indicator_summary = KD(stock_price_all_day)

In [None]:
# Fill any null values with 0
#stock_indicator_summary = stock_indicator_summary.fillna(value=0)