# **Library Installation**

In [11]:
# Library
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.regularizers import l1_l2
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import LabelEncoder

In [2]:
# Set seed
np.random.seed(42)

# **Read Data**

In [3]:
# Load dataset
df = pd.read_excel('XCleanCryptocurrencyDataset.xlsx', index_col=0)
display(df)

Unnamed: 0,full_text,processed_text,vader_sentiment
0,'Token: $GROK24 - Grok 2024 Network: Ethereum ...,token grok grok network ethereum contract xccc...,Positive
1,@metaversejoji Let's check @SolanaMono $SOL #W...,let check sol,Neutral
2,"Day's DCA: $BTC, $ATOM, $DVPN, $AXL, $JKL, $HU...",day dca btc atom dvpn axl jkl huahua,Neutral
3,@BorkSOL @Cerita_Crypto @solana @aeyakovenko Y...,project really amazing thats followed send please,Positive
4,👉 WL FOR .0 SOL MINT 👈 👉40 HOURS TILL SNAPSHOT...,sol mint hour till snapshot requirement join d...,Positive
...,...,...,...
9879,CyberKong VX #11328 was adopted for 0.18 $ETH...,cyberkong adopted eth blur,Neutral
9880,BULLISH ON SOLANA BULLISH ON JUP BULLISH ON MA...,bullish solana bullish jup bullish madlads,Neutral
9881,@naija_bitcoin 🍿🍿🍿🍿🍿 rd to 3k before valentine...,valentine,Neutral
9882,Binance Futures #KLAY/ #USDT Take-Profit targe...,binance future takeprofit target profit period...,Positive


# **Data Splitting**

In [46]:
# Asumsikan df adalah DataFrame Anda yang sudah dimuat
X = df['processed_text']  # Kolom teks yang sudah diproses
y = df['vader_sentiment']  # Target/Label

# Encoding target
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
y_categorical = to_categorical(y_encoded)

# Tokenisasi dan pembuatan sequences
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(X)
sequences = tokenizer.texts_to_sequences(X)
X_padded = pad_sequences(sequences, maxlen=100)  # Sesuaikan maxlen sesuai dengan kebutuhan

# Splitting data
X_train, X_test, y_train, y_test = train_test_split(X_padded, y_categorical, test_size=0.2, random_state=42)

# Batasan feature selection
dim = X_train.shape[1]  # Jumlah token maksimal dalam sequences
lb = [0] * dim
ub = [1] * dim

# **Feature Selection**

In [45]:
def objective_function(weights):
    # Memilih fitur berdasarkan bobot feature selection
    # Dalam konteks ini, weights akan menentukan embedding tokens yang akan digunakan
    selected_indices = np.where(weights > 0.5)[0]
    X_train_selected = X_train[:, selected_indices]
    X_test_selected = X_test[:, selected_indices]

    # Definisikan dan latih model LSTM di sini
    model = Sequential([
        Embedding(input_dim=5000, output_dim=50, input_length=len(selected_indices)),  # Sesuaikan parameter
        LSTM(50, dropout=0.2, recurrent_dropout=0.2),
        Dense(y_train.shape[1], activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    model.fit(X_train_selected, y_train, epochs=3, batch_size=64, verbose=0)  # Kurangi epoch untuk kecepatan

    # Evaluasi model
    loss, accuracy = model.evaluate(X_test_selected, y_test, verbose=0)
    return -accuracy  # Negatif karena kita ingin memaksimalkan akurasi

# Particle Swarm Optimization (PSO)

In [26]:
def pso(func, lb, ub, ieqcons=[], f_ieqcons=None, args=(), kwargs={}, swarmsize=100, omega=0.5, phip=0.5, phig=0.5, maxiter=100, minstep=1e-8, minfunc=1e-8, debug=False):
    """
    Perform a particle swarm optimization (PSO)

    Parameters:
    - func: function to be minimized
    - lb: lower bounds of the design variables
    - ub: upper bounds of the design variables
    - ieqcons: list of inequality constraint functions (optional)
    - f_ieqcons: function returning a list of inequality constraints (optional)
    - args: additional arguments passed to func and f_ieqcons
    - kwargs: additional keyword arguments passed to func and f_ieqcons
    - swarmsize: number of particles in the swarm
    - omega: particle velocity scaling factor
    - phip: scaling factor to search away from the particle's best known position
    - phig: scaling factor to search away from the swarm's best known position
    - maxiter: maximum number of iterations
    - minstep: minimum step size of swarm's best position before the search terminates
    - minfunc: minimum change of swarm's best objective value before the search terminates
    - debug: if True, progress statements will be displayed every iteration

    Returns:
    - g: the swarm's best known position (optimal design)
    - f: the objective value at g
    """
    dim = len(lb)
    # Initialize the particle positions and their velocities
    positions = np.random.uniform(low=lb, high=ub, size=(swarmsize, dim))
    velocities = np.zeros((swarmsize, dim))
    # Initialize the global and local best positions
    personal_best_positions = positions.copy()
    personal_best_values = np.array([np.inf for _ in range(swarmsize)])
    global_best_value = np.inf
    global_best_position = None

    for iteration in range(maxiter):
        # Update velocities and positions
        for i in range(swarmsize):
            r_p, r_g = np.random.rand(dim), np.random.rand(dim)
            velocities[i] = omega * velocities[i] + \
                            phip * r_p * (personal_best_positions[i] - positions[i]) + \
                            phig * r_g * (global_best_position - positions[i]) if global_best_position is not None else 0
            positions[i] += velocities[i]
            positions[i] = np.clip(positions[i], lb, ub)  # Keep within bounds

            # Evaluate the fitness
            value = func(positions[i], *args, **kwargs)
            # Update the personal best
            if value < personal_best_values[i]:
                personal_best_positions[i] = positions[i]
                personal_best_values[i] = value
            # Update the global best
            if value < global_best_value:
                global_best_position = positions[i]
                global_best_value = value

        if debug:
            print(f"Iteration {iteration}: Best Value = {global_best_value}")

        # Check for early stopping criteria
        if np.abs(global_best_value - personal_best_values.min()) < minfunc or np.linalg.norm(velocities.max()) < minstep:
            break

    return global_best_position, global_best_value

In [47]:
# Jalankan PSO
optimizer_results_pso = pso(func=objective_function, lb=lb, ub=ub, swarmsize=10, maxiter=50)  # Kurangi untuk kecepatan
# 10-20 partikel dan 50-100 iterasi 
best_weights_pso = optimizer_results_pso[0]

# Ant Colony Optimization (ACO)

In [8]:
def aco(func, lb, ub, ants=100, maxiter=100, alpha=1.0, beta=2.0, evaporation_rate=0.5, pheromone_deposit=0.1, debug=False):
    """
    Perform an Ant Colony Optimization (ACO)

    Parameters:
    - func: The function to be minimized
    - lb: The lower bounds of the design variable(s)
    - ub: The upper bounds of the design variable(s)
    - ants: The number of ants in the colony (Default: 100)
    - maxiter: The maximum number of iterations (Default: 100)
    - alpha: Relative importance of pheromone (Default: 1.0)
    - beta: Relative importance of heuristic information (Default: 2.0)
    - evaporation_rate: Rate at which pheromone evaporates (Default: 0.5)
    - pheromone_deposit: Amount of pheromone deposited by ants (Default: 0.1)
    - debug: If True, progress statements will be displayed (Default: False)

    Returns:
    - The best known position and objective value
    """
    # Initialize variables
    dim = len(lb)
    pheromone_levels = np.ones((ants, dim))
    best_val = np.inf
    best_pos = None

    # Main ACO loop
    for iteration in range(maxiter):
        positions = np.random.uniform(low=lb, high=ub, size=(ants, dim))
        for ant in range(ants):
            val = func(positions[ant])
            if val < best_val:
                best_val = val
                best_pos = positions[ant]

            # Update pheromones
            pheromone_levels[ant] += pheromone_deposit

        # Evaporate pheromones
        pheromone_levels *= (1 - evaporation_rate)

        if debug:
            print(f"Iteration {iteration}: Best Value = {best_val}")

    return best_pos, best_val

In [None]:
# Jalankan ACO
optimizer_results_aco = aco(func=objective_function, lb=lb, ub=ub, ants=50, maxiter=50)  # Kurangi untuk kecepatan

best_weights_aco = optimizer_results_aco[0]

# Cat Swarm Optimization (CSO)

In [None]:
def cso(func, lb, ub, cats=100, maxiter=100, mix_rate=0.5, seeking_memory_pool=5, seeking_range_of_selected_dimension=0.2, counts_of_dimension_to_change=2, debug=False):
    """
    Perform a Cat Swarm Optimization (CSO)

    Parameters:
    - func: The function to be minimized
    - lb: The lower bounds of the design variable(s)
    - ub: The upper bounds of the design variable(s)
    - cats: The number of cats in the swarm (Default: 100)
    - maxiter: The maximum number of iterations (Default: 100)
    - mix_rate: Mixture rate to switch between seeking and tracing modes (Default: 0.5)
    - seeking_memory_pool: Size of memory pool in seeking mode (Default: 5)
    - seeking_range_of_selected_dimension: Range of selected dimension in seeking mode (Default: 0.2)
    - counts_of_dimension_to_change: Number of dimensions to change in seeking mode (Default: 2)
    - debug: If True, progress statements will be displayed (Default: False)

    Returns:
    - The best known position and objective value
    """
    # Initialize variables
    dim = len(lb)
    best_val = np.inf
    best_pos = None
    positions = np.random.uniform(low=lb, high=ub, size=(cats, dim))

    # Main CSO loop
    for iteration in range(maxiter):
        for cat in range(cats):
            if np.random.rand() < mix_rate:
                # Seeking mode
                for _ in range(seeking_memory_pool):
                    candidate_position = positions[cat] + np.random.uniform(-1, 1, size=dim) * seeking_range_of_selected_dimension
                    candidate_position = np.clip(candidate_position, lb, ub)
                    val = func(candidate_position)
                    if val < best_val:
                        best_val = val
                        best_pos = candidate_position
            else:
                # Tracing mode (simplified as random walk in this example)
                positions[cat] += np.random.uniform(-1, 1, size=dim)
                positions[cat] = np.clip(positions[cat], lb, ub)
                val = func(positions[cat])
                if val < best_val:
                    best_val = val
                    best_pos = positions[cat]

        if debug:
            print(f"Iteration {iteration}: Best Value = {best_val}")

    return best_pos, best_val

In [None]:
# Jalankan CSO
optimizer_results_cso = cso(func=objective_function, lb=lb, ub=ub, cats=50, maxiter=50)  # Kurangi untuk kecepatan

best_weights_cso = optimizer_results_cso[0]

# **Modeling**

# LSTM

In [48]:
def create_lstm_model(input_length, num_classes):
    model = Sequential([
        Embedding(input_dim=5000, output_dim=50, input_length=input_length),
        LSTM(50, dropout=0.2, recurrent_dropout=0.2),
        Dense(num_classes, activation='softmax' if num_classes > 2 else 'sigmoid')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy' if num_classes > 2 else 'binary_crossentropy', metrics=['accuracy'])
    return model

In [49]:
model_lstm = create_lstm_model(X_train.shape[1], y_train.shape[1])
history_lstm = model_lstm.fit(X_train, y_train, validation_split=0.2, epochs=10, batch_size=64)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [50]:
# Evaluasi model
loss, accuracy = model_lstm.evaluate(X_test, y_test)
print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")

Test Loss: 0.931121289730072
Test Accuracy: 0.8136109709739685


In [31]:
# max_length = max([len(x) for x in X_train_seq])
# X_train_pad = pad_sequences(X_train_seq, maxlen=max_length, padding='post')
# X_test_pad = pad_sequences(X_test_seq, maxlen=max_length, padding='post')

# # Definisikan model
# model_pso_lstm = Sequential([
#     # Sesuaikan input_dim dengan ukuran vocabulary atau num_words dan output_dim dengan dimensi embedding
#     Embedding(input_dim=5000, output_dim=50, input_length=X_train_selected_pso.shape[1]),
#     LSTM(50, dropout=0.2, recurrent_dropout=0.2),
#     Dense(y_train.shape[1], activation='softmax')  # Gunakan 'sigmoid' untuk binary, 'softmax' untuk multiclass
# ])

# model_pso_lstm.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])  # Sesuaikan loss function

# # Pelatihan model
# history = model_pso_lstm.fit(X_train_selected_pso, y_train, epochs=10, batch_size=64, validation_split=0.2)

# # Evaluasi model
# loss, accuracy = model_pso_lstm.evaluate(X_test_selected_pso, y_test)
# print(f"Test Loss: {loss}")
# print(f"Test Accuracy: {accuracy}")

NameError: name 'X_train_seq' is not defined

# PSO-LSTM

In [51]:
# Asumsikan best_weights sudah ada
selected_indices_pso = np.where(best_weights_pso > 0.5)[0]  # Ambil indeks dengan bobot > 0.5

# Memfilter X_train dan X_test berdasarkan fitur terpilih
X_train_selected_pso = X_train[:, selected_indices_pso]
X_test_selected_pso = X_test[:, selected_indices_pso]

# Definisikan model
model_pso_lstm = Sequential([
    # Sesuaikan input_dim dengan ukuran vocabulary atau num_words dan output_dim dengan dimensi embedding
    Embedding(input_dim=5000, output_dim=50, input_length=X_train_selected_pso.shape[1]),
    LSTM(50, dropout=0.2, recurrent_dropout=0.2),
    Dense(y_train.shape[1], activation='softmax')  # Gunakan 'sigmoid' untuk binary, 'softmax' untuk multiclass
])

model_pso_lstm.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])  # Sesuaikan loss function

# Pelatihan model
history = model_pso_lstm.fit(X_train_selected_pso, y_train, epochs=10, batch_size=64, validation_split=0.2)

# Evaluasi model
loss, accuracy = model_pso_lstm.evaluate(X_test_selected_pso, y_test)
print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Loss: 1.1294374465942383
Test Accuracy: 0.7953276038169861


# ACO-LSTM

In [None]:
# Asumsikan best_weights sudah ada
selected_indices_aco = np.where(best_weights_aco > 0.5)[0]  # Ambil indeks dengan bobot > 0.5

# Memfilter X_train dan X_test berdasarkan fitur terpilih
X_train_selected_aco = X_train[:, selected_indices_aco]
X_test_selected_aco = X_test[:, selected_indices_aco]

# Definisikan model
model_aco_lstm = Sequential([
    # Sesuaikan input_dim dengan ukuran vocabulary atau num_words dan output_dim dengan dimensi embedding
    Embedding(input_dim=5000, output_dim=128, input_length=X_train_selected_aco.shape[1]),
    LSTM(128, dropout=0.2, recurrent_dropout=0.2),
    Dense(y_train.shape[1], activation='softmax')  # Gunakan 'sigmoid' untuk binary, 'softmax' untuk multiclass
])

model_aco_lstm.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])  # Sesuaikan loss function

# Pelatihan model
history = model_aco_lstm.fit(X_train_selected_aco, y_train, epochs=10, batch_size=64, validation_split=0.2)

# Evaluasi model
loss, accuracy = model_aco_lstm.evaluate(X_test_selected_aco, y_test)
print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")

# CSO-LSTM

In [None]:
# Asumsikan best_weights sudah ada
selected_indices_cso = np.where(best_weights_cso > 0.5)[0]  # Ambil indeks dengan bobot > 0.5

# Memfilter X_train dan X_test berdasarkan fitur terpilih
X_train_selected_cso = X_train[:, selected_indices_cso]
X_test_selected_cso = X_test[:, selected_indices_cso]

# Definisikan model
model_cso_lstm = Sequential([
    # Sesuaikan input_dim dengan ukuran vocabulary atau num_words dan output_dim dengan dimensi embedding
    Embedding(input_dim=5000, output_dim=128, input_length=X_train_selected_cso.shape[1]),
    LSTM(128, dropout=0.2, recurrent_dropout=0.2),
    Dense(y_train.shape[1], activation='softmax')  # Gunakan 'sigmoid' untuk binary, 'softmax' untuk multiclass
])

model_cso_lstm.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])  # Sesuaikan loss function

# Pelatihan model
history = model_cso_lstm.fit(X_train_selected_cso, y_train, epochs=10, batch_size=64, validation_split=0.2)

# Evaluasi model
loss, accuracy = model_cso_lstm.evaluate(X_test_selected_cso, y_test)
print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")