In [1]:
import os
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
# Load data
file_path = '../trainingDataset/company_information.csv'  # Ganti dengan path file Anda
data = pd.read_csv(file_path)

In [3]:
# Bersihkan data numerik
numerical_features = [
    'Revenue (IDR)', 'Gross Profit (IDR)', 'Net Income (IDR)',
    'Market Cap (IDR)', 'Annual EPS', 'Return on Equity (%)',
    '1 Year Price Returns (%)', '3 Year Price Returns (%)',
    '5 Year Price Returns (%)', 'Dividend Yield (%)', 'Payout Ratio (%)'
]
for col in numerical_features:
    data[col] = data[col].replace({',': '', '%': ''}, regex=True).astype(float)

In [4]:
# One-Hot Encoding untuk kolom 'Sector'
sector_encoded = pd.get_dummies(data['Sector'], prefix='Sector')

# Gabungkan fitur numerik yang relevan
features = pd.concat([sector_encoded, data[numerical_features]], axis=1)

# Normalisasi data
scaler = MinMaxScaler()
features_scaled = scaler.fit_transform(features)

# Simpan nama saham untuk referensi
stocks = data['Kode Saham']

In [5]:
# Input layer
input_stock = tf.keras.layers.Input(shape=(features_scaled.shape[1],), name="stock_features")

# Encoder: Mengubah ke 32 dimensi
x = tf.keras.layers.Dense(128, activation='relu')(input_stock)
x = tf.keras.layers.Dense(64, activation='relu')(x)
embedding = tf.keras.layers.Dense(32, activation='relu', name="embedding")(x)

# Decoder: Mengembalikan ke dimensi asli
x = tf.keras.layers.Dense(64, activation='relu')(embedding)
x = tf.keras.layers.Dense(128, activation='relu')(x)
output_reconstructed = tf.keras.layers.Dense(features_scaled.shape[1], activation='linear')(x)

# Model
model = tf.keras.Model(inputs=input_stock, outputs=output_reconstructed)
model.compile(optimizer='adam', loss=tf.keras.losses.MeanSquaredError())
model.summary()


In [6]:
# Latih model
model.fit(features_scaled, features_scaled, epochs=100, batch_size=2, verbose=0)

<keras.src.callbacks.history.History at 0x1d4b03b9b10>

In [7]:
# Ekstrak embedding
encoder = tf.keras.Model(inputs=input_stock, outputs=embedding)
embeddings = encoder.predict(features_scaled)

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step


In [8]:
# Hitung kemiripan kosinus
similarity_matrix = cosine_similarity(embeddings)

In [18]:
def recommend_with_model_and_returns_and_data(target_returns, top_n=69):
    """
    target_returns: List atau array [1 Year Return, 3 Year Return, 5 Year Return]
    """
    # Hitung jarak absolut ke semua saham lain berdasarkan return
    return_diff = data[['1 Year Price Returns (%)']].apply(
        lambda x: abs(x.values - target_returns).sum(), axis=1
    )

    # Ekstrak embedding dari model
    embeddings = encoder.predict(features_scaled)

    # Hitung rata-rata embedding target return (proyeksi ke ruang embedding)
    avg_target_embedding = embeddings.mean(axis=0)

    # Hitung jarak kosinus antara proyeksi target dan semua saham
    similarity_scores = cosine_similarity([avg_target_embedding], embeddings).flatten()

    # Skor gabungan: Pertimbangkan kemiripan embedding dan return (bobot seimbang)
    combined_score = similarity_scores - 0.1 * return_diff  # Bobot 0.1 untuk jarak return

    # Urutkan dan ambil top-n rekomendasi
    similar_indices = combined_score.argsort()[::-1][:top_n]
    similar_stocks = stocks.iloc[similar_indices]
    similar_scores = combined_score[similar_indices]

    # Ambil data saham terkait untuk rekomendasi
    recommendations = []
    for idx, score in zip(similar_indices, similar_scores):
        stock_data = data.iloc[idx][['Kode Saham', 'Revenue (IDR)', 'Gross Profit (IDR)', 'Net Income (IDR)',
                                     'Market Cap (IDR)', 'Annual EPS', 'Return on Equity (%)',
                                     '1 Year Price Returns (%)', '3 Year Price Returns (%)', '5 Year Price Returns (%)',
                                     'Dividend Yield (%)', 'Payout Ratio (%)']]
        recommendations.append((stock_data, score))

    return recommendations

# Contoh penggunaan
target_returns = [20]  # Misal return target
result = recommend_with_model_and_returns_and_data(target_returns, top_n=69)

# Tampilkan hasil dengan data saham
for stock_info, score in result:
    print(f"Stock: {stock_info['Kode Saham']}, Similarity Score: {score}")
    print(stock_info[['Kode Saham', 'Revenue (IDR)', 'Gross Profit (IDR)', 'Net Income (IDR)', 
                      'Market Cap (IDR)', 'Annual EPS', 'Return on Equity (%)',
                      '1 Year Price Returns (%)', '3 Year Price Returns (%)', '5 Year Price Returns (%)',
                      'Dividend Yield (%)', 'Payout Ratio (%)']])
    print("-" * 50)


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
Stock: IGAR, Similarity Score: 0.7152563514709473
Kode Saham                            IGAR
Revenue (IDR)               875000000000.0
Gross Profit (IDR)          129000000000.0
Net Income (IDR)             51000000000.0
Market Cap (IDR)            530000000000.0
Annual EPS                            55.1
Return on Equity (%)                  8.12
1 Year Price Returns (%)             21.56
3 Year Price Returns (%)             17.78
5 Year Price Returns (%)             54.97
Dividend Yield (%)                     0.0
Payout Ratio (%)                       0.0
Name: 31, dtype: object
--------------------------------------------------
Stock: INDF, Similarity Score: 0.5116128463745117
Kode Saham                               INDF
Revenue (IDR)               114759000000000.0
Gross Profit (IDR)           39561000000000.0
Net Income (IDR)              9825000000000.0
Market Cap (IDR)                68710000000.0
Annual 

In [10]:
# Buat folder jika belum ada
folder_name = "../trainingModel"
os.makedirs(folder_name, exist_ok=True)

# Path lengkap untuk menyimpan model
model_path = os.path.join(folder_name, "stock_recommendation_model.h5")

# Simpan model ke folder
model.save(model_path)
print(f"Model saved to: {model_path}")



Model saved to: ../trainingModel\stock_recommendation_model.h5


In [15]:
import json
from sklearn.preprocessing import MinMaxScaler

# Simpan scaler sebagai file JSON
scaler_data = {
    'min': scaler.data_min_.tolist(),
    'scale': scaler.scale_.tolist()
}

with open('scaler.json', 'w') as f:
    json.dump(scaler_data, f)


In [11]:
# # Memuat kembali model dari folder
# loaded_model = tf.keras.models.load_model(model_path)
# print("Model loaded successfully.")

# # Ekstrak encoder dari model yang dimuat
# loaded_encoder = tf.keras.Model(inputs=loaded_model.input, outputs=loaded_model.get_layer("embedding").output)

# # Mendapatkan kembali embedding menggunakan encoder yang dimuat
# loaded_embeddings = loaded_encoder.predict(features_scaled)

# # Hitung ulang kemiripan kosinus dengan model yang dimuat
# loaded_similarity_matrix = cosine_similarity(loaded_embeddings)

In [12]:
# # Fungsi rekomendasi menggunakan model yang dimuat
# def recommend_from_loaded_model(stock_name, top_n=3):
#     idx = stocks[stocks == stock_name].index[0]
    
#     # Ambil skor kemiripan untuk saham tersebut
#     similarity_scores = loaded_similarity_matrix[idx]
    
#     # Urutkan berdasarkan skor (kecuali saham itu sendiri)
#     similar_indices = similarity_scores.argsort()[::-1][1:top_n+1]
#     similar_stocks = stocks.iloc[similar_indices]
#     similar_scores = similarity_scores[similar_indices]
    
#     # Gabungkan hasil (nama saham dan skor)
#     recommendations = list(zip(similar_stocks, similar_scores))
#     return recommendations

In [13]:
# # Contoh penggunaan
# result = recommend_from_loaded_model('AALI', top_n=3)
# print(result)

In [14]:
# # Mendapatkan prediksi rekonstruksi dari model
# predicted_features = loaded_model.predict(features_scaled)

# # Menghitung MAE antara input asli dan rekonstruksi
# mae = mean_absolute_error(features_scaled, predicted_features)

# print(f"Mean Absolute Error (MAE) of the model: {mae:.4f}")