In [1]:
import os
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error
from sklearn.metrics.pairwise import cosine_similarity

2024-11-30 03:47:34.982813: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-11-30 03:47:34.985712: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-11-30 03:47:34.994972: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1732913255.011154  301756 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1732913255.015307  301756 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-30 03:47:35.033431: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU ins

In [2]:
# Load data
file_path = '../trainingDataset/company_information.csv'  # Ganti dengan path file Anda
data = pd.read_csv(file_path)

In [3]:
# Bersihkan data numerik
numerical_features = [
    'Revenue (B)', 'Gross Profit (B)', 'Net Income (B)',
    'Market Cap (B)', 'Annual EPS', 'Return on Equity (%)',
    '1 Year Price Returns (%)', '3 Year Price Returns (%)',
    '5 Year Price Returns (%)', 'Dividend Yield (%)', 'Payout Ratio (%)'
]
for col in numerical_features:
    data[col] = data[col].replace({',': '', '%': ''}, regex=True).astype(float)

In [4]:
# One-Hot Encoding untuk kolom 'Sector'
sector_encoded = pd.get_dummies(data['Sector'], prefix='Sector')

# Gabungkan fitur numerik yang relevan
features = pd.concat([sector_encoded, data[numerical_features]], axis=1)

# Normalisasi data
scaler = MinMaxScaler()
features_scaled = scaler.fit_transform(features)

# Simpan nama saham untuk referensi
stocks = data['Kode Saham']

In [5]:
# Input layer
input_stock = tf.keras.layers.Input(shape=(features_scaled.shape[1],), name="stock_features")

# Encoder: Mengubah ke 32 dimensi
x = tf.keras.layers.Dense(128, activation='relu')(input_stock)
x = tf.keras.layers.Dense(64, activation='relu')(x)
embedding = tf.keras.layers.Dense(32, activation='relu', name="embedding")(x)

# Decoder: Mengembalikan ke dimensi asli
x = tf.keras.layers.Dense(64, activation='relu')(embedding)
x = tf.keras.layers.Dense(128, activation='relu')(x)
output_reconstructed = tf.keras.layers.Dense(features_scaled.shape[1], activation='linear')(x)

# Model
model = tf.keras.Model(inputs=input_stock, outputs=output_reconstructed)
model.compile(optimizer='adam', loss=tf.keras.losses.MeanSquaredError())


W0000 00:00:1732913257.189314  301756 gpu_device.cc:2344] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [6]:
# Latih model
model.fit(features_scaled, features_scaled, epochs=100, batch_size=2, verbose=0)

<keras.src.callbacks.history.History at 0x7fe70c1b4100>

In [7]:
# Ekstrak embedding
encoder = tf.keras.Model(inputs=input_stock, outputs=embedding)
embeddings = encoder.predict(features_scaled)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step


In [8]:
# Hitung kemiripan kosinus
similarity_matrix = cosine_similarity(embeddings)

In [9]:
# Fungsi rekomendasi
def recommend(stock_name, top_n=3):
    idx = stocks[stocks == stock_name].index[0]
    
    # Ambil skor kemiripan untuk saham tersebut
    similarity_scores = similarity_matrix[idx]
    
    # Urutkan berdasarkan skor (kecuali saham itu sendiri)
    similar_indices = similarity_scores.argsort()[::-1][1:top_n+1]
    similar_stocks = stocks.iloc[similar_indices]
    similar_scores = similarity_scores[similar_indices]
    
    # Gabungkan hasil (nama saham dan skor)
    recommendations = list(zip(similar_stocks, similar_scores))
    return recommendations

In [10]:
# Contoh penggunaan
result = recommend('AALI', top_n=3)
print(result)

[('ADMF', np.float32(0.8187435)), ('ABMM', np.float32(0.7905521)), ('ACES', np.float32(0.76197934))]


In [11]:
# Buat folder jika belum ada
folder_name = "../trainingModel"
os.makedirs(folder_name, exist_ok=True)

# Path lengkap untuk menyimpan model
model_path = os.path.join(folder_name, "stock_recommendation_model.h5")

# Simpan model ke folder
model.save(model_path)
print(f"Model saved to: {model_path}")



Model saved to: ../trainingModel/stock_recommendation_model.h5


In [12]:
# Memuat kembali model dari folder
loaded_model = tf.keras.models.load_model(model_path)
print("Model loaded successfully.")

# Ekstrak encoder dari model yang dimuat
loaded_encoder = tf.keras.Model(inputs=loaded_model.input, outputs=loaded_model.get_layer("embedding").output)

# Mendapatkan kembali embedding menggunakan encoder yang dimuat
loaded_embeddings = loaded_encoder.predict(features_scaled)

# Hitung ulang kemiripan kosinus dengan model yang dimuat
loaded_similarity_matrix = cosine_similarity(loaded_embeddings)



Model loaded successfully.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step


In [13]:
# Fungsi rekomendasi menggunakan model yang dimuat
def recommend_from_loaded_model(stock_name, top_n=3):
    idx = stocks[stocks == stock_name].index[0]
    
    # Ambil skor kemiripan untuk saham tersebut
    similarity_scores = loaded_similarity_matrix[idx]
    
    # Urutkan berdasarkan skor (kecuali saham itu sendiri)
    similar_indices = similarity_scores.argsort()[::-1][1:top_n+1]
    similar_stocks = stocks.iloc[similar_indices]
    similar_scores = similarity_scores[similar_indices]
    
    # Gabungkan hasil (nama saham dan skor)
    recommendations = list(zip(similar_stocks, similar_scores))
    return recommendations

In [14]:
# Contoh penggunaan
result = recommend_from_loaded_model('AALI', top_n=3)
print(result)

[('ADMF', np.float32(0.8187435)), ('ABMM', np.float32(0.7905521)), ('ACES', np.float32(0.76197934))]


In [15]:
# Mendapatkan prediksi rekonstruksi dari model
predicted_features = loaded_model.predict(features_scaled)

# Menghitung MAE antara input asli dan rekonstruksi
mae = mean_absolute_error(features_scaled, predicted_features)

print(f"Mean Absolute Error (MAE) of the model: {mae:.4f}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
Mean Absolute Error (MAE) of the model: 0.0085
