In [None]:
pip install pandas_ta --quiet

In [None]:
import pandas as pd
import datetime as dt
import numpy as np
import pandas_ta as ta

EMA + RSI Technical Indicator

In [None]:
# --- 1. Load Data ---

file_path = 'BBRI.JK_OHLC_Data.csv'

try:
    df = pd.read_csv(
        file_path,
        skiprows=2, # depends on the dataset
        header=0,   # row 3 become header
        names=['Datetime', 'Close', 'High', 'Low', 'Open', 'Volume'] # Column Naming
    )

    df['Datetime'] = pd.to_datetime(df['Datetime'])
    # Convert Datetime to GMT+7
    df['Datetime'] = df['Datetime'].dt.tz_convert('Asia/Jakarta') #For BBRI_OHLC.csv Only


    print("Data successfully Loaded. Here are the first 5 rows: ")
    print(df.head())

except FileNotFoundError:
    print(f"Error: File is not found at: '{file_path}'")
    print("Make sure the CSV is at the correct directory.")

Data successfully Loaded. Here are the first 5 rows: 
                   Datetime   Close    High     Low    Open    Volume
0 2023-10-09 09:00:00+07:00  5200.0  5225.0  5175.0  5200.0  15199300
1 2023-10-09 10:00:00+07:00  5175.0  5200.0  5125.0  5200.0  33701800
2 2023-10-09 11:00:00+07:00  5150.0  5175.0  5150.0  5150.0   7039400
3 2023-10-09 13:00:00+07:00  5175.0  5175.0  5150.0  5175.0   7545900
4 2023-10-09 14:00:00+07:00  5125.0  5175.0  5125.0  5175.0  29913600


In [None]:
df['EMA_12'] = ta.ema(df['Close'], length=12)
df['EMA_26'] = ta.ema(df['Close'], length=26)
df['RSI_14'] = ta.rsi(df['Close'], length=14)

In [None]:
# --- 4. Show the results ---
print(df.head(20))
df.to_csv('BBRI.JK_OHLC_Data_Technical.csv', index=False)

                    Datetime   Close    High     Low    Open    Volume  \
0  2023-10-09 09:00:00+07:00  5200.0  5225.0  5175.0  5200.0  15199300   
1  2023-10-09 10:00:00+07:00  5175.0  5200.0  5125.0  5200.0  33701800   
2  2023-10-09 11:00:00+07:00  5150.0  5175.0  5150.0  5150.0   7039400   
3  2023-10-09 13:00:00+07:00  5175.0  5175.0  5150.0  5175.0   7545900   
4  2023-10-09 14:00:00+07:00  5125.0  5175.0  5125.0  5175.0  29913600   
5  2023-10-09 15:00:00+07:00  5150.0  5175.0  5125.0  5125.0  18798200   
6  2023-10-09 16:00:00+07:00  5125.0  5125.0  5125.0  5125.0         0   
7  2023-10-10 09:00:00+07:00  5175.0  5200.0  5125.0  5150.0  22655400   
8  2023-10-10 10:00:00+07:00  5175.0  5200.0  5175.0  5175.0   4942200   
9  2023-10-10 11:00:00+07:00  5200.0  5225.0  5175.0  5200.0  15824500   
10 2023-10-10 13:00:00+07:00  5225.0  5225.0  5175.0  5200.0   6589900   
11 2023-10-10 14:00:00+07:00  5225.0  5250.0  5200.0  5225.0  18885300   
12 2023-10-10 15:00:00+07:00  5200.0  

In [None]:
df_BBRI_Full = pd.read_csv('BBRI.JK_OHLC_Data_Technical.csv')
df_BBRI_Full['Datetime'] = pd.to_datetime(df_BBRI_Full['Datetime'])

df_sentiment_Full = pd.read_csv('BBRI_Sentiment_Final_IndoBERT_Full.csv')
df_sentiment_Full['Publish_date'] = pd.to_datetime(df_sentiment_Full['Publish_date'])

In [None]:
sentiment_map = {
    'Neutral': 0,
    'Bearish': -1,
    'Bullish': 1
}
try:
    df_sentiment_Full['Publish_date'] = df_sentiment_Full['Publish_date'].dt.tz_localize('UTC+07:00')
    print("Berhasil melokalkan timezone data sentimen ke 'Asia/Jakarta'.")
except Exception as e:
    print(f"Peringatan saat melokalkan timezone: {e}")
    print("Melanjutkan dengan asumsi timezone sudah sesuai...")

df_sentiment_Full['sentiment_value'] = df_sentiment_Full['sentiment'].map(sentiment_map)

# Calculate Sentiment Score
df_sentiment_Full['calculated_score'] = df_sentiment_Full['sentiment_score'] * df_sentiment_Full['sentiment_value']

print("Pemrosesan data sentimen awal selesai.")

df_sentiment_Full['Mapping_Time'] = df_sentiment_Full['Publish_date']

cutoff_time = dt.time(16, 0, 0)

# Identification sentiment published above 16.00
is_after_cutoff_BBRI = df_sentiment_Full['Publish_date'].dt.time >= cutoff_time

# Move to 9:00 the next day

next_day_normalized = (df_sentiment_Full.loc[is_after_cutoff_BBRI, 'Publish_date'] +
                       pd.Timedelta(days=1)).dt.normalize()

market_open_next_day = next_day_normalized + pd.Timedelta(hours=9)

df_sentiment_Full.loc[is_after_cutoff_BBRI, 'Mapping_Time'] = market_open_next_day

df_sentiment_to_merge_BBRI = df_sentiment_Full[['Mapping_Time', 'calculated_score']].copy()
df_sentiment_to_merge_BBRI = df_sentiment_to_merge_BBRI.sort_values(by='Mapping_Time')

merged_data_BBRI = pd.merge_asof(
    df_sentiment_to_merge_BBRI,
    df_BBRI_Full,
    left_on='Mapping_Time',
    right_on='Datetime',
    direction='forward'
)

df_daily_sentiment_BBRI = merged_data_BBRI.groupby('Datetime')['calculated_score'].mean().reset_index()

df_daily_sentiment_BBRI = df_daily_sentiment_BBRI.rename(columns={
    'calculated_score': 'average_score'
})

print("Agregasi sentimen per jam perdagangan selesai.")
print(df_daily_sentiment_BBRI.head(25))

Berhasil melokalkan timezone data sentimen ke 'Asia/Jakarta'.
Pemrosesan data sentimen awal selesai.
Agregasi sentimen per jam perdagangan selesai.
                    Datetime  average_score
0  2023-10-09 09:00:00+07:00       0.000000
1  2023-10-10 09:00:00+07:00       0.000000
2  2023-10-10 13:00:00+07:00       0.000000
3  2023-10-10 14:00:00+07:00      -0.791827
4  2023-10-10 15:00:00+07:00       0.000000
5  2023-10-10 16:00:00+07:00       0.509673
6  2023-10-11 09:00:00+07:00       0.000000
7  2023-10-11 10:00:00+07:00       0.000000
8  2023-10-11 11:00:00+07:00       0.159258
9  2023-10-11 13:00:00+07:00       0.000000
10 2023-10-12 09:00:00+07:00      -0.298258
11 2023-10-12 13:00:00+07:00       0.000000
12 2023-10-13 09:00:00+07:00       0.000000
13 2023-10-13 11:00:00+07:00       0.000000
14 2023-10-13 14:00:00+07:00      -0.037286
15 2023-10-13 15:00:00+07:00      -0.632930
16 2023-10-13 16:00:00+07:00       0.000000
17 2023-10-16 09:00:00+07:00      -0.314417
18 2023-10-16 11

In [None]:
# --- 6. Penggabungan Final ---
# Gabungkan data saham asli dengan data sentimen yang sudah diagregasi
df_final_BBRI = pd.merge(
    df_BBRI_Full,
    df_daily_sentiment_BBRI,
    on='Datetime',
    how='left'  # 'left' agar semua jam perdagangan tetap ada
)

# --- 7. Pembersihan Akhir ---
# Jika ada jam perdagangan tanpa sentimen yang dipetakan,
# 'average_score' akan NaN. Kita isi dengan 0 (Netral).
df_final_BBRI['average_score'] = df_final_BBRI['average_score'].fillna(0)
df_final_BBRI['Sentiment_value'] = np.where(df_final_BBRI['average_score'] > 0, 1, np.where(df_final_BBRI['average_score'] < 0, -1, 0))
df_final_BBRI['ABS_Average_Score'] = df_final_BBRI['average_score'].abs()


# --- 8. Tampilkan Hasil ---
print("\n" + "=" * 30)
print("HASIL PENGGABUNGAN (5 baris pertama):")
print(df_final_BBRI.head())
print("\n" + "=" * 30)
print("\nInfo Tipe Data Hasil Akhir:")
print(df_final_BBRI.info())

df_final_BBRI.to_csv('BBRI_merged_stock_and_sentiment_Full.csv', index=False)


HASIL PENGGABUNGAN (5 baris pertama):
                   Datetime   Close    High     Low    Open    Volume  EMA_12  \
0 2023-10-09 09:00:00+07:00  5200.0  5225.0  5175.0  5200.0  15199300     NaN   
1 2023-10-09 10:00:00+07:00  5175.0  5200.0  5125.0  5200.0  33701800     NaN   
2 2023-10-09 11:00:00+07:00  5150.0  5175.0  5150.0  5150.0   7039400     NaN   
3 2023-10-09 13:00:00+07:00  5175.0  5175.0  5150.0  5175.0   7545900     NaN   
4 2023-10-09 14:00:00+07:00  5125.0  5175.0  5125.0  5175.0  29913600     NaN   

   EMA_26    RSI_14  average_score  Sentiment_value  ABS_Average_Score  
0     NaN       NaN            0.0                0                0.0  
1     NaN  0.000000            0.0                0                0.0  
2     NaN  0.000000            0.0                0                0.0  
3     NaN  7.142857            0.0                0                0.0  
4     NaN  6.190476            0.0                0                0.0  


Info Tipe Data Hasil Akhir:
<class