# **Library Installation**

In [21]:
# Library
import time
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.regularizers import l1_l2
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import LabelEncoder

In [22]:
# Set seed
np.random.seed(42)

# **Read Data**

In [23]:
# Load dataset
df = pd.read_excel('XCleanCryptocurrencyDataset.xlsx', index_col=0)
display(df)

Unnamed: 0,full_text,processed_text,vader_sentiment
0,'Token: $GROK24 - Grok 2024 Network: Ethereum ...,token grok grok network ethereum contract xccc...,Positive
1,@metaversejoji Let's check @SolanaMono $SOL #W...,let check sol,Negative
2,"Day's DCA: $BTC, $ATOM, $DVPN, $AXL, $JKL, $HU...",day dca btc atom dvpn axl jkl huahua,Negative
3,@BorkSOL @Cerita_Crypto @solana @aeyakovenko Y...,project really amazing thats followed send please,Positive
4,👉 WL FOR .0 SOL MINT 👈 👉40 HOURS TILL SNAPSHOT...,sol mint hour till snapshot requirement join d...,Positive
...,...,...,...
9879,CyberKong VX #11328 was adopted for 0.18 $ETH...,cyberkong adopted eth blur,Negative
9880,BULLISH ON SOLANA BULLISH ON JUP BULLISH ON MA...,bullish solana bullish jup bullish madlads,Negative
9881,@naija_bitcoin 🍿🍿🍿🍿🍿 rd to 3k before valentine...,valentine,Negative
9882,Binance Futures #KLAY/ #USDT Take-Profit targe...,binance future takeprofit target profit period...,Positive


# **Data Splitting**

In [24]:
# Asumsikan df adalah DataFrame Anda yang sudah dimuat
X = df['processed_text']  # Kolom teks yang sudah diproses
y = df['vader_sentiment']  # Target/Label

# Encoding target
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
y_categorical = to_categorical(y_encoded)

# Tokenisasi dan pembuatan sequences
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(X)
sequences = tokenizer.texts_to_sequences(X)
X_padded = pad_sequences(sequences, maxlen=100)  # Sesuaikan maxlen sesuai dengan kebutuhan

# Splitting data
X_train, X_test, y_train, y_test = train_test_split(X_padded, y_categorical, test_size=0.2, random_state=42)

# Batasan feature selection
dim = X_train.shape[1]  # Jumlah token maksimal dalam sequences
lb = [0] * dim
ub = [1] * dim

# **Feature Selection**

In [25]:
def objective_function(weights):
    # Memilih fitur berdasarkan bobot feature selection
    # Dalam konteks ini, weights akan menentukan embedding tokens yang akan digunakan
    selected_indices = np.where(weights > 0.5)[0]
    X_train_selected = X_train[:, selected_indices]
    X_test_selected = X_test[:, selected_indices]

    # Definisikan dan latih model LSTM di sini
    model = Sequential([
        Embedding(input_dim=5000, output_dim=300, input_length=len(selected_indices)),  # Sesuaikan parameter
        LSTM(256, dropout=0.2, recurrent_dropout=0.2),
        Dense(y_train.shape[1], activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    
    start_time = time.time()
    model.fit(X_train_selected, y_train, epochs=10, batch_size=128, verbose=0)  # Kurangi epoch untuk kecepatan
    end_time = time.time()
    time_execution = end_time - start_time

    # Evaluasi model
    loss, accuracy = model.evaluate(X_test_selected, y_test, verbose=0)
    return -accuracy, loss, time_execution  # Negatif karena kita ingin memaksimalkan akurasi

# Particle Swarm Optimization (PSO)

In [28]:
def pso(func, lb, ub, ieqcons=[], f_ieqcons=None, args=(), kwargs={}, swarmsize=100, omega=0.5, phip=0.5, phig=0.5, maxiter=100, minstep=1e-8, minfunc=1e-8, debug=False):
    """
    Perform a particle swarm optimization (PSO)

    Parameters:
    - func: function to be minimized
    - lb: lower bounds of the design variables
    - ub: upper bounds of the design variables
    - ieqcons: list of inequality constraint functions (optional)
    - f_ieqcons: function returning a list of inequality constraints (optional)
    - args: additional arguments passed to func and f_ieqcons
    - kwargs: additional keyword arguments passed to func and f_ieqcons
    - swarmsize: number of particles in the swarm
    - omega: particle velocity scaling factor
    - phip: scaling factor to search away from the particle's best known position
    - phig: scaling factor to search away from the swarm's best known position
    - maxiter: maximum number of iterations
    - minstep: minimum step size of swarm's best position before the search terminates
    - minfunc: minimum change of swarm's best objective value before the search terminates
    - debug: if True, progress statements will be displayed every iteration

    Returns:
    - g: the swarm's best known position (optimal design)
    - f: the objective value at g
    """
    dim = len(lb)
    # Initialize the particle positions and their velocities
    positions = np.random.uniform(low=lb, high=ub, size=(swarmsize, dim))
    velocities = np.zeros((swarmsize, dim))
    # Initialize the global and local best positions
    personal_best_positions = positions.copy()
    personal_best_values = np.array([np.inf for _ in range(swarmsize)])
    global_best_value = np.inf
    global_best_position = None

    for iteration in range(maxiter):
        # Update velocities and positions
        for i in range(swarmsize):
            r_p, r_g = np.random.rand(dim), np.random.rand(dim)
            velocities[i] = omega * velocities[i] + \
                            phip * r_p * (personal_best_positions[i] - positions[i]) + \
                            phig * r_g * (global_best_position - positions[i]) if global_best_position is not None else 0
            positions[i] += velocities[i]
            positions[i] = np.clip(positions[i], lb, ub)  # Keep within bounds

            # Evaluate the fitness
            value = func(positions[i], *args, **kwargs)
            # Update the personal best
            if value[0] < personal_best_values[i]:
                personal_best_positions[i] = positions[i]
                personal_best_values[i] = value[0]
            # Update the global best
            if value[0] < global_best_value:
                global_best_position = positions[i]
                global_best_value = value[0]

        if debug:
            print(f"Iteration {iteration}: Best Value = {global_best_value}")

        # Check for early stopping criteria
        if np.abs(global_best_value - personal_best_values.min()) < minfunc or np.linalg.norm(velocities.max()) < minstep:
            break

    return global_best_position, global_best_value, value[1], value[2] #value 1 loss, value 2 time exec

In [29]:
# Jalankan PSO
best_solution_pso, best_score_pso, loss_pso, times_pso = pso(func=objective_function, lb=lb, ub=ub, swarmsize=15, maxiter=50)  # Kurangi untuk kecepatan


In [42]:
print(f"Best PSO-LSTM Solution: {best_solution_pso},\nBest Score PSO-LSTM: {-best_score_pso},\nLoss PSO-LSTM: {loss_pso},\nTime Execution PSO-LSTM: {times_pso}")

Best PSO-LSTM Solution: [0.56667511 0.06194734 0.6740345  0.31378824 0.09530371 0.51737825
 0.11498634 0.27555207 0.68306099 0.24752921 0.70062825 0.06438129
 0.33472305 0.50956819 0.33610333 0.35151045 0.36165921 0.31491021
 0.55142646 0.33400724 0.70102802 0.49601811 0.44299027 0.78433151
 0.34930156 0.94452619 0.15605049 0.72189799 0.47133042 0.59198462
 0.62300931 0.73323702 0.04437994 0.17035785 0.7979885  0.61472971
 0.77087226 0.74304677 0.79352928 0.4602397  0.38548844 0.13293633
 0.7747818  0.59460562 0.1376481  0.33959276 0.7251865  0.3661044
 0.34708587 0.44456307 0.22333062 0.42093343 0.42862375 0.22072376
 0.74478143 0.40235544 0.78745897 0.73573729 0.28671633 0.63667443
 0.5779031  0.87693183 0.29292816 0.64982927 0.59746477 0.38506628
 0.15522687 0.612436   0.70130927 0.39226945 0.55649169 0.52824235
 0.71447824 0.20936418 0.82306321 0.49708883 0.26284929 0.08765392
 0.14243725 0.64019651 0.26892317 0.83405132 0.0681965  0.32164437
 0.38151656 0.43053671 0.19016537 0.473

# Ant Colony Optimization (ACO)

In [8]:
def aco(func, lb, ub, ants=100, maxiter=100, alpha=1.0, beta=2.0, evaporation_rate=0.5, pheromone_deposit=0.1, debug=False):
    """
    Perform an Ant Colony Optimization (ACO)

    Parameters:
    - func: The function to be minimized
    - lb: The lower bounds of the design variable(s)
    - ub: The upper bounds of the design variable(s)
    - ants: The number of ants in the colony (Default: 100)
    - maxiter: The maximum number of iterations (Default: 100)
    - alpha: Relative importance of pheromone (Default: 1.0)
    - beta: Relative importance of heuristic information (Default: 2.0)
    - evaporation_rate: Rate at which pheromone evaporates (Default: 0.5)
    - pheromone_deposit: Amount of pheromone deposited by ants (Default: 0.1)
    - debug: If True, progress statements will be displayed (Default: False)

    Returns:
    - The best known position and objective value
    """
    # Initialize variables
    dim = len(lb)
    pheromone_levels = np.ones((ants, dim))
    best_val = np.inf
    best_pos = None

    # Main ACO loop
    for iteration in range(maxiter):
        positions = np.random.uniform(low=lb, high=ub, size=(ants, dim))
        for ant in range(ants):
            val = func(positions[ant])
            if val < best_val:
                best_val = val
                best_pos = positions[ant]

            # Update pheromones
            pheromone_levels[ant] += pheromone_deposit

        # Evaporate pheromones
        pheromone_levels *= (1 - evaporation_rate)

        if debug:
            print(f"Iteration {iteration}: Best Value = {best_val}")

    return best_pos, best_val

In [9]:
# Jalankan ACO
optimizer_results_aco = aco(func=objective_function, lb=lb, ub=ub, ants=10, maxiter=50)  # Kurangi untuk kecepatan

best_weights_aco = optimizer_results_aco[0]

# Cat Swarm Optimization (CSO)

In [10]:
def cso(func, lb, ub, cats=100, maxiter=100, mix_rate=0.5, seeking_memory_pool=5, seeking_range_of_selected_dimension=0.2, counts_of_dimension_to_change=2, debug=False):
    """
    Perform a Cat Swarm Optimization (CSO)

    Parameters:
    - func: The function to be minimized
    - lb: The lower bounds of the design variable(s)
    - ub: The upper bounds of the design variable(s)
    - cats: The number of cats in the swarm (Default: 100)
    - maxiter: The maximum number of iterations (Default: 100)
    - mix_rate: Mixture rate to switch between seeking and tracing modes (Default: 0.5)
    - seeking_memory_pool: Size of memory pool in seeking mode (Default: 5)
    - seeking_range_of_selected_dimension: Range of selected dimension in seeking mode (Default: 0.2)
    - counts_of_dimension_to_change: Number of dimensions to change in seeking mode (Default: 2)
    - debug: If True, progress statements will be displayed (Default: False)

    Returns:
    - The best known position and objective value
    """
    # Initialize variables
    dim = len(lb)
    best_val = np.inf
    best_pos = None
    positions = np.random.uniform(low=lb, high=ub, size=(cats, dim))

    # Main CSO loop
    for iteration in range(maxiter):
        for cat in range(cats):
            if np.random.rand() < mix_rate:
                # Seeking mode
                for _ in range(seeking_memory_pool):
                    candidate_position = positions[cat] + np.random.uniform(-1, 1, size=dim) * seeking_range_of_selected_dimension
                    candidate_position = np.clip(candidate_position, lb, ub)
                    val = func(candidate_position)
                    if val < best_val:
                        best_val = val
                        best_pos = candidate_position
            else:
                # Tracing mode (simplified as random walk in this example)
                positions[cat] += np.random.uniform(-1, 1, size=dim)
                positions[cat] = np.clip(positions[cat], lb, ub)
                val = func(positions[cat])
                if val < best_val:
                    best_val = val
                    best_pos = positions[cat]

        if debug:
            print(f"Iteration {iteration}: Best Value = {best_val}")

    return best_pos, best_val

In [11]:
# Jalankan CSO
optimizer_results_cso = cso(func=objective_function, lb=lb, ub=ub, cats=10, maxiter=50)  # Kurangi untuk kecepatan

best_weights_cso = optimizer_results_cso[0]

# **Modeling**

In [53]:
def create_lstm_model(input_length, num_classes):
    model = Sequential([
        Embedding(input_dim=5000, output_dim=50, input_length=input_length),
        LSTM(50, dropout=0.2, recurrent_dropout=0.2, kernel_regularizer=l1_l2(l1=0.01, l2=0.01)),
        Dense(num_classes, activation='softmax' if num_classes > 2 else 'sigmoid') # Gunakan 'sigmoid' untuk binary, 'softmax' untuk multiclass
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy' if num_classes > 2 else 'binary_crossentropy', metrics=['accuracy']) # Sesuaikan loss function
    return model

# Early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=3, verbose=1, mode='min', restore_best_weights=True)

# LSTM

In [54]:
model_lstm = create_lstm_model(X_train.shape[1], y_train.shape[1])
history_lstm = model_lstm.fit(X_train, y_train, validation_split=0.2, epochs=10, batch_size=64, callbacks=[early_stopping])

# Evaluasi model
loss, accuracy = model_lstm.evaluate(X_test, y_test)
print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 7: early stopping
Test Loss: 0.7887192368507385
Test Accuracy: 0.7211782336235046


# PSO-LSTM

In [55]:
# Asumsikan best_weights sudah ada
selected_indices_pso = np.where(best_weights_pso > 0.5)[0]  # Ambil indeks dengan bobot > 0.5

# Memfilter X_train dan X_test berdasarkan fitur terpilih
X_train_selected_pso = X_train[:, selected_indices_pso]
X_test_selected_pso = X_test[:, selected_indices_pso]

model_pso_lstm = create_lstm_model(X_train_selected_pso.shape[1], y_train.shape[1])
history_pso_lstm = model_pso_lstm.fit(X_train_selected_pso, y_train, validation_split=0.2, epochs=10, batch_size=64, callbacks=[early_stopping])

# Evaluasi model
loss, accuracy = model_pso_lstm.evaluate(X_test_selected_pso, y_test)
print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 7: early stopping
Test Loss: 0.8284865021705627
Test Accuracy: 0.7034027576446533


# ACO-LSTM

In [56]:
# Asumsikan best_weights sudah ada
selected_indices_aco = np.where(best_weights_aco > 0.5)[0]  # Ambil indeks dengan bobot > 0.5

# Memfilter X_train dan X_test berdasarkan fitur terpilih
X_train_selected_aco = X_train[:, selected_indices_aco]
X_test_selected_aco = X_test[:, selected_indices_aco]

model_aco_lstm = create_lstm_model(X_train_selected_aco.shape[1], y_train.shape[1])
history_aco_lstm = model_aco_lstm.fit(X_train_selected_aco, y_train, validation_split=0.2, epochs=10, batch_size=64, callbacks=[early_stopping])

# Evaluasi model
loss, accuracy = model_aco_lstm.evaluate(X_test_selected_aco, y_test)
print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 8: early stopping
Test Loss: 0.837474524974823
Test Accuracy: 0.7181310057640076


# CSO-LSTM

In [57]:
# Asumsikan best_weights sudah ada
selected_indices_cso = np.where(best_weights_cso > 0.5)[0]  # Ambil indeks dengan bobot > 0.5

# Memfilter X_train dan X_test berdasarkan fitur terpilih
X_train_selected_cso = X_train[:, selected_indices_cso]
X_test_selected_cso = X_test[:, selected_indices_cso]

model_cso_lstm = create_lstm_model(X_train_selected_cso.shape[1], y_train.shape[1])
history_cso_lstm = model_cso_lstm.fit(X_train_selected_cso, y_train, validation_split=0.2, epochs=10, batch_size=64, callbacks=[early_stopping])

# Evaluasi model
loss, accuracy = model_cso_lstm.evaluate(X_test_selected_cso, y_test)
print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 8: early stopping
Test Loss: 0.8900994062423706
Test Accuracy: 0.679024875164032
