# **Library Installation**

In [1]:
# Library
import time
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.regularizers import l1_l2
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score, roc_auc_score




In [2]:
# Set seed
np.random.seed(42)

# **Read Data**

In [3]:
# Load dataset
df = pd.read_excel('XCleanCryptocurrencyDataset.xlsx', index_col=0)
display(df)

Unnamed: 0,full_text,processed_text,vader_sentiment
0,'Token: $GROK24 - Grok 2024 Network: Ethereum ...,token grok grok network ethereum contract xccc...,Positive
1,@metaversejoji Let's check @SolanaMono $SOL #W...,let check sol,Neutral
2,"Day's DCA: $BTC, $ATOM, $DVPN, $AXL, $JKL, $HU...",day dca btc atom dvpn axl jkl huahua,Neutral
3,@BorkSOL @Cerita_Crypto @solana @aeyakovenko Y...,project really amazing thats followed send please,Positive
4,👉 WL FOR .0 SOL MINT 👈 👉40 HOURS TILL SNAPSHOT...,sol mint hour till snapshot requirement join d...,Positive
...,...,...,...
9879,CyberKong VX #11328 was adopted for 0.18 $ETH...,cyberkong adopted eth blur,Neutral
9880,BULLISH ON SOLANA BULLISH ON JUP BULLISH ON MA...,bullish solana bullish jup bullish madlads,Neutral
9881,@naija_bitcoin 🍿🍿🍿🍿🍿 rd to 3k before valentine...,valentine,Neutral
9882,Binance Futures #KLAY/ #USDT Take-Profit targe...,binance future takeprofit target profit period...,Positive


# **Data Splitting**

In [4]:
# Asumsikan df adalah DataFrame Anda yang sudah dimuat
X = df['processed_text']  # Kolom teks yang sudah diproses
y = df['vader_sentiment']  # Target/Label

# Encoding target
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
y_categorical = to_categorical(y_encoded)

# Tokenisasi dan pembuatan sequences
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(X)
sequences = tokenizer.texts_to_sequences(X)
X_padded = pad_sequences(sequences, maxlen=1000)  # Sesuaikan maxlen sesuai dengan kebutuhan default:100

# Splitting data
X_train, X_test, y_train, y_test = train_test_split(X_padded, y_categorical, test_size=0.2, random_state=42)

# Batasan feature selection
dim = X_train.shape[1]  # Jumlah token maksimal dalam sequences
lb = [0] * dim
ub = [1] * dim

In [5]:
# # Assuming df is your loaded DataFrame
# X = df['processed_text']  # Preprocessed text column
# y = df['vader_sentiment']  # Target/Label

# # Encoding the target
# label_encoder = LabelEncoder()
# y_encoded = label_encoder.fit_transform(y)
# y_categorical = to_categorical(y_encoded)

# # Apply TF-IDF encoding instead of tokenization and padding
# tfidf_vectorizer = TfidfVectorizer(max_features=100)  # Adjust max_features as needed
# X_tfidf = tfidf_vectorizer.fit_transform(X).toarray()

# # Splitting data
# X_train, X_test, y_train, y_test = train_test_split(X_tfidf, y_categorical, test_size=0.2, random_state=42)

# # Since TF-IDF produces a dense matrix of features, we adjust the feature selection bounds accordingly
# dim = X_train.shape[1]  # The new dimension is the number of features from TF-IDF
# lb = [0] * dim  # Lower bound for each feature
# ub = [1] * dim  # Upper bound for each feature

# **Feature Selection**

# Particle Swarm Optimization (PSO)

In [6]:
def pso(lb, ub, ieqcons=[], f_ieqcons=None, args=(), kwargs={}, swarmsize=100, omega=0.5, phip=0.5, phig=0.5, maxiter=100, minstep=1e-8, minfunc=1e-8, debug=False):
    """
    Perform a particle swarm optimization (PSO) without an external objective function.

    Parameters:
    - lb: lower bounds of the design variables
    - ub: upper bounds of the design variables
    - ieqcons: list of inequality constraint functions (optional)
    - f_ieqcons: function returning a list of inequality constraints (optional)
    - args: additional arguments passed to the internal evaluation function
    - kwargs: additional keyword arguments passed to the internal evaluation function
    - swarmsize: number of particles in the swarm
    - omega: particle velocity scaling factor
    - phip: scaling factor to search away from the particle's best known position
    - phig: scaling factor to search away from the swarm's best known position
    - maxiter: maximum number of iterations
    - minstep: minimum step size of swarm's best position before the search terminates
    - minfunc: minimum change of swarm's best objective value before the search terminates
    - debug: if True, progress statements will be displayed every iteration

    Returns:
    - g: the swarm's best known position (optimal design)
    - f: the objective value at g
    """
    dim = len(lb)
    # Initialize the particle positions and their velocities
    positions = np.random.uniform(low=lb, high=ub, size=(swarmsize, dim))
    velocities = np.zeros((swarmsize, dim))
    # Initialize the global and local best positions
    personal_best_positions = positions.copy()
    personal_best_values = np.array([np.inf for _ in range(swarmsize)])
    global_best_value = np.inf
    global_best_position = None

    def evaluate(position):
        # Define the objective directly inside PSO. For demonstration, using sum of squares.
        return np.sum(position**2)

    for iteration in range(maxiter):
        # Update velocities and positions
        for i in range(swarmsize):
            r_p, r_g = np.random.rand(dim), np.random.rand(dim)
            velocities[i] = omega * velocities[i] + \
                            phip * r_p * (personal_best_positions[i] - positions[i]) + \
                            phig * r_g * (global_best_position - positions[i]) if global_best_position is not None else 0
            positions[i] += velocities[i]
            positions[i] = np.clip(positions[i], lb, ub)  # Keep within bounds

            # Evaluate the fitness directly without an external function
            value = evaluate(positions[i])
            # Update the personal best
            if value < personal_best_values[i]:
                personal_best_positions[i] = positions[i]
                personal_best_values[i] = value
            # Update the global best
            if value < global_best_value:
                global_best_position = positions[i]
                global_best_value = value

        if debug:
            print(f"Iteration {iteration}: Best Value = {global_best_value}")

        # Check for early stopping criteria
        if np.abs(global_best_value - personal_best_values.min()) < minfunc or np.linalg.norm(velocities.max()) < minstep:
            break

    return global_best_position, global_best_value

In [7]:
# Calling the modified PSO function
best_pos_pso, best_val_pso = pso(lb=lb, ub=ub, swarmsize=100, maxiter=100)  # Kurangi untuk kecepatan

print(f"Optimal Position: {best_pos_pso}")
print(f"Objective Value at Optimal Position: {best_val_pso}")

Optimal Position: [0.35991506 0.25832553 0.17457657 0.70890614 0.70901387 0.12253228
 0.16202495 0.86759261 0.67683665 0.73486348 0.14925183 0.07399279
 0.39945882 0.46360679 0.45862317 0.54375208 0.63640685 0.44840585
 0.17753393 0.49907648 0.38063109 0.40731705 0.15675811 0.4992129
 0.70485381 0.57937239 0.73671191 0.84708222 0.67493368 0.24590818
 0.52458943 0.26237659 0.35745392 0.39749861 0.53717026 0.60887031
 0.61984983 0.45545886 0.28010228 0.59271557 0.7315377  0.58581156
 0.37169813 0.92119942 0.32553578 0.55686064 0.18565107 0.75142074
 0.65531181 0.41182608 0.68810899 0.29917674 0.24166755 0.45391906
 0.63243145 0.19378382 0.60617622 0.60868018 0.04524871 0.49343289
 0.49698172 0.663065   0.77628783 0.29755796 0.61969487 0.21372087
 0.41357171 0.91158501 0.78872063 0.50445342 0.23618572 0.25458398
 0.7147852  0.52496208 0.69921876 0.71141705 0.3217466  0.2632762
 0.05243295 0.18992464 0.31200702 0.24415049 0.42788616 0.78257428
 0.68743916 0.51442654 0.52487002 0.69400317 0

# Ant Colony Optimization (ACO)

In [8]:
def aco(lb, ub, ants=100, maxiter=100, alpha=1.0, beta=2.0, evaporation_rate=0.5, pheromone_deposit=0.1, debug=False):
    """
    Perform an Ant Colony Optimization (ACO) without an external objective function.

    Parameters:
    - lb: The lower bounds of the design variable(s)
    - ub: The upper bounds of the design variable(s)
    - ants: The number of ants in the colony (Default: 100)
    - maxiter: The maximum number of iterations (Default: 100)
    - alpha: Relative importance of pheromone (Default: 1.0)
    - beta: Relative importance of heuristic information (Default: 2.0)
    - evaporation_rate: Rate at which pheromone evaporates (Default: 0.5)
    - pheromone_deposit: Amount of pheromone deposited by ants (Default: 0.1)
    - debug: If True, progress statements will be displayed (Default: False)

    Returns:
    - The best known position and objective value
    """
    dim = len(lb)
    pheromone_levels = np.ones((ants, dim))
    best_val = np.inf
    best_pos = None

    def evaluate(position):
        # Define the objective directly inside ACO. For demonstration, using sum of squares.
        return np.sum(position**2)

    # Main ACO loop
    for iteration in range(maxiter):
        positions = np.random.uniform(low=lb, high=ub, size=(ants, dim))
        for ant in range(ants):
            val = evaluate(positions[ant])  # Use the internal evaluate function
            if val < best_val:
                best_val = val
                best_pos = positions[ant]

            # Update pheromones
            pheromone_levels[ant] += pheromone_deposit

        # Evaporate pheromones
        pheromone_levels *= (1 - evaporation_rate)

        if debug:
            print(f"Iteration {iteration}: Best Value = {best_val}")

    return best_pos, best_val

In [9]:
# Calling the modified PSO function
best_pos_aco, best_val_aco = aco(lb=lb, ub=ub, ants=100, maxiter=100)

print(f"Optimal Position: {best_pos_aco}")
print(f"Objective Value at Optimal Position: {best_val_aco}")

Optimal Position: [0.19691547 0.35158993 0.92117734 0.10252191 0.57493406 0.7361747
 0.37994098 0.24434645 0.99141828 0.15037959 0.3136146  0.59097791
 0.78702276 0.6272616  0.04126047 0.01877709 0.00535634 0.10991951
 0.95273278 0.9614997  0.2143631  0.54187743 0.38206646 0.97725769
 0.38436827 0.41866795 0.16300202 0.13162775 0.73165949 0.6958299
 0.75791223 0.85832678 0.43024306 0.25406855 0.08122921 0.34723733
 0.84320706 0.98252415 0.24880773 0.65356582 0.74249317 0.89658656
 0.91523492 0.9717555  0.4605464  0.7539407  0.20766435 0.53487099
 0.19281738 0.03925469 0.98723849 0.80787787 0.06489214 0.49246565
 0.64510767 0.86824849 0.10859142 0.99688986 0.42033827 0.59750946
 0.16532794 0.95865765 0.24444413 0.08199649 0.44766087 0.40689459
 0.45658645 0.83158254 0.6432925  0.7342336  0.60746273 0.2864221
 0.16155206 0.54702809 0.48266984 0.31626299 0.25024589 0.57891597
 0.98821114 0.97657622 0.7346792  0.44841906 0.35016734 0.00784013
 0.62124943 0.5756385  0.03968691 0.9109175  0.

# Cat Swarm Optimization (CSO)

In [10]:
def cso(lb, ub, cats=100, maxiter=100, mix_rate=0.5, seeking_memory_pool=5, seeking_range_of_selected_dimension=0.2, counts_of_dimension_to_change=2, debug=False):
    """
    Perform a Cat Swarm Optimization (CSO) without an external objective function.

    Parameters:
    - lb: The lower bounds of the design variable(s)
    - ub: The upper bounds of the design variable(s)
    - cats: The number of cats in the swarm (Default: 100)
    - maxiter: The maximum number of iterations (Default: 100)
    - mix_rate: Mixture rate to switch between seeking and tracing modes (Default: 0.5)
    - seeking_memory_pool: Size of memory pool in seeking mode (Default: 5)
    - seeking_range_of_selected_dimension: Range of selected dimension in seeking mode (Default: 0.2)
    - counts_of_dimension_to_change: Number of dimensions to change in seeking mode (Default: 2)
    - debug: If True, progress statements will be displayed (Default: False)

    Returns:
    - The best known position and objective value
    """
    dim = len(lb)
    best_val = np.inf
    best_pos = None
    positions = np.random.uniform(low=lb, high=ub, size=(cats, dim))

    def evaluate(position):
        # Define the objective directly inside CSO. For demonstration, using sum of squares.
        return np.sum(position**2)

    # Main CSO loop
    for iteration in range(maxiter):
        for cat in range(cats):
            if np.random.rand() < mix_rate:
                # Seeking mode
                for _ in range(seeking_memory_pool):
                    candidate_position = positions[cat] + np.random.uniform(-1, 1, size=dim) * seeking_range_of_selected_dimension
                    candidate_position = np.clip(candidate_position, lb, ub)
                    val = evaluate(candidate_position)  # Use the internal evaluate function
                    if val < best_val:
                        best_val = val
                        best_pos = candidate_position
            else:
                # Tracing mode (simplified as random walk in this example)
                positions[cat] += np.random.uniform(-1, 1, size=dim)
                positions[cat] = np.clip(positions[cat], lb, ub)
                val = evaluate(positions[cat])  # Use the internal evaluate function
                if val < best_val:
                    best_val = val
                    best_pos = positions[cat]

        if debug:
            print(f"Iteration {iteration}: Best Value = {best_val}")

    return best_pos, best_val

In [11]:
best_pos_cso, best_val_cso = cso(lb=lb, ub=ub, cats=100, maxiter=100)

print(f"Optimal Position: {best_pos_cso}")
print(f"Objective Value at Optimal Position: {best_val_cso}")

Optimal Position: [0.81826583 0.33427775 0.76507779 0.69836002 0.71104877 0.06134806
 0.72190407 0.02812705 0.59048766 0.5311182  0.8286445  0.96202035
 0.         0.23776484 0.21333852 0.20215863 0.15841281 0.97497728
 0.13985121 0.21306554 0.1188011  0.         0.73563864 0.96608398
 0.99702987 0.51055158 0.77976659 0.00240278 0.83366392 0.2998048
 0.6317847  0.20310213 0.74116226 0.24279657 1.         0.47581504
 0.05831978 0.83810079 0.70356862 0.51149586 0.65635207 0.29865283
 0.85147655 0.62933294 0.29672228 0.         0.48366284 0.09133543
 1.         0.21271731 0.50029436 0.94217084 0.88682571 0.95421887
 0.28358025 0.76887235 0.8546622  0.25101675 0.63626442 0.11053425
 0.84242426 0.88474396 0.62344376 0.34270617 0.21397351 0.
 0.44352359 0.81147033 0.4579802  0.69511    0.89369629 0.55384003
 0.23238559 0.30755496 0.60403851 0.51970023 0.38308325 0.02870732
 0.26185649 0.37394818 0.62139755 0.01230802 0.89304189 0.20136246
 0.64748951 0.68894493 0.21342344 0.21140755 0.697904

# **Modeling**

In [12]:
def create_lstm_model(input_length, num_classes):
    model = Sequential([
        Embedding(input_dim=5000, output_dim=50, input_length=input_length),
        LSTM(50, dropout=0.4, recurrent_dropout=0.4),
        Dense(num_classes, activation='softmax' if num_classes > 2 else 'sigmoid') # Gunakan 'sigmoid' untuk binary, 'softmax' untuk multiclass
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy' if num_classes > 2 else 'binary_crossentropy', metrics=['accuracy']) # Sesuaikan loss function
    return model

# Early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=3, verbose=1, mode='min', restore_best_weights=True)

# LSTM

In [13]:
model_lstm = create_lstm_model(X_train.shape[1], y_train.shape[1])

start_time_lstm = time.time()
history_lstm = model_lstm.fit(X_train, y_train, validation_split=0.2, epochs=10, batch_size=64, callbacks=[early_stopping])
end_time_lstm = time.time()
time_execution_lstm = end_time_lstm - start_time_lstm

# Evaluasi model
loss_lstm, accuracy_lstm = model_lstm.evaluate(X_test, y_test)
print(f"Test Loss: {loss_lstm}")
print(f"Test Accuracy: {accuracy_lstm}")
print(f"Time Execution: {time_execution_lstm}")



Epoch 1/10


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 6: early stopping
Test Loss: 0.5759505033493042
Test Accuracy: 0.797359049320221
Time Execution: 354.71002864837646


# PSO-LSTM

In [14]:
# Asumsikan best_weights sudah ada
selected_indices_pso = np.where(best_pos_pso > 0.5)[0]  # Ambil indeks dengan bobot > 0.5

# Memfilter X_train dan X_test berdasarkan fitur terpilih
X_train_selected_pso = X_train[:, selected_indices_pso]
X_test_selected_pso = X_test[:, selected_indices_pso]

model_pso_lstm = create_lstm_model(X_train_selected_pso.shape[1], y_train.shape[1])

start_time_pso_lstm = time.time()
history_pso_lstm = model_pso_lstm.fit(X_train_selected_pso, y_train, validation_split=0.2, epochs=10, batch_size=64, callbacks=[early_stopping])
end_time_pso_lstm = time.time()
time_execution_pso_lstm = end_time_pso_lstm - start_time_pso_lstm

# Evaluasi model
loss_pso_lstm, accuracy_pso_lstm = model_pso_lstm.evaluate(X_test_selected_pso, y_test)
print(f"Test Loss: {loss_pso_lstm}")
print(f"Test Accuracy: {accuracy_pso_lstm}")
print(f"Time Execution: {time_execution_pso_lstm}")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 6: early stopping
Test Loss: 0.7377488613128662
Test Accuracy: 0.6754697561264038
Time Execution: 132.95318913459778


# ACO-LSTM

In [15]:
# Asumsikan best_weights sudah ada
selected_indices_aco = np.where(best_pos_aco > 0.5)[0]  # Ambil indeks dengan bobot > 0.5

# Memfilter X_train dan X_test berdasarkan fitur terpilih
X_train_selected_aco = X_train[:, selected_indices_aco]
X_test_selected_aco = X_test[:, selected_indices_aco]

model_aco_lstm = create_lstm_model(X_train_selected_aco.shape[1], y_train.shape[1])

start_time_aco_lstm = time.time()
history_aco_lstm = model_aco_lstm.fit(X_train_selected_aco, y_train, validation_split=0.2, epochs=10, batch_size=64, callbacks=[early_stopping])
end_time_aco_lstm = time.time()
time_execution_aco_lstm = end_time_aco_lstm - start_time_aco_lstm

# Evaluasi model
loss_aco_lstm, accuracy_aco_lstm = model_aco_lstm.evaluate(X_test_selected_aco, y_test)
print(f"Test Loss: {loss_aco_lstm}")
print(f"Test Accuracy: {accuracy_aco_lstm}")
print(f"Time Execution: {time_execution_aco_lstm}")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 6: early stopping
Test Loss: 0.6664141416549683
Test Accuracy: 0.7465718388557434
Time Execution: 126.53417634963989


# CSO-LSTM

In [16]:
# Asumsikan best_weights sudah ada
selected_indices_cso = np.where(best_pos_cso > 0.5)[0]  # Ambil indeks dengan bobot > 0.5

# Memfilter X_train dan X_test berdasarkan fitur terpilih
X_train_selected_cso = X_train[:, selected_indices_cso]
X_test_selected_cso = X_test[:, selected_indices_cso]

model_cso_lstm = create_lstm_model(X_train_selected_cso.shape[1], y_train.shape[1])

start_time_cso_lstm = time.time()
history_cso_lstm = model_cso_lstm.fit(X_train_selected_cso, y_train, validation_split=0.2, epochs=10, batch_size=64, callbacks=[early_stopping])
end_time_cso_lstm = time.time()
time_execution_cso_lstm = end_time_cso_lstm - start_time_cso_lstm

# Evaluasi model
loss_cso_lstm, accuracy_cso_lstm = model_cso_lstm.evaluate(X_test_selected_cso, y_test)
print(f"Test Loss: {loss_cso_lstm}")
print(f"Test Accuracy: {accuracy_cso_lstm}")
print(f"Time Execution: {time_execution_cso_lstm}")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 6: early stopping
Test Loss: 0.6623703241348267
Test Accuracy: 0.7389537692070007
Time Execution: 132.79375791549683
