In [15]:
import numpy as np 
import pandas as pd

# Data Preparation

In [30]:
hd=pd.read_csv("data_1.csv")
bd=pd.read_csv("data_2.csv")

In [31]:
data= pd.concat([hd, bd], ignore_index=True)

In [32]:
data.columns

Index(['_id', 'time_taken', 'typing_speed', 'mouse_distance', 'country',
       'city', 'is_proxy', 'IsBot'],
      dtype='object')

In [33]:
data["is_proxy"]="false"

In [34]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 329 entries, 0 to 328
Data columns (total 8 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   _id             329 non-null    object 
 1   time_taken      329 non-null    float64
 2   typing_speed    329 non-null    float64
 3   mouse_distance  329 non-null    float64
 4   country         329 non-null    object 
 5   city            329 non-null    object 
 6   is_proxy        329 non-null    object 
 7   IsBot           329 non-null    object 
dtypes: float64(3), object(5)
memory usage: 20.7+ KB


In [35]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import xgboost as xgb
from catboost import CatBoostClassifier
from sklearn.metrics import classification_report, accuracy_score
import tensorflow as tf
from tensorflow.keras import layers
import joblib
label_enc = LabelEncoder()
data['country'] = label_enc.fit_transform(data['country'])
joblib.dump(label_enc, 'country_label_encoder.joblib')
label_enc = LabelEncoder()
data['city'] = label_enc.fit_transform(data['city'])
joblib.dump(label_enc, 'city_label_encoder.joblib')
label_enc = LabelEncoder()
data['is_proxy'] = label_enc.fit_transform(data['is_proxy'])
joblib.dump(label_enc, 'proxy_label_encoder.joblib')

data['IsBot'] = data['IsBot'].map({'Yes': 1, 'No': 0})

X = data.drop('IsBot', axis=1)
y = data['IsBot']

In [22]:
X.info()
X=X.drop("_id",axis=1)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 329 entries, 0 to 328
Data columns (total 7 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   _id             329 non-null    object 
 1   time_taken      329 non-null    float64
 2   typing_speed    329 non-null    float64
 3   mouse_distance  329 non-null    float64
 4   country         329 non-null    int64  
 5   city            329 non-null    int64  
 6   is_proxy        329 non-null    int64  
dtypes: float64(3), int64(3), object(1)
memory usage: 18.1+ KB


# Splitting

In [23]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, stratify=y, random_state=42)

# XGB training

In [24]:
xgb_model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss')
xgb_model.fit(X_train, y_train)
xgb_model.save_model('demo_weights_xgboost.json')
xgb_preds = xgb_model.predict(X_test)
print("XGBoost Accuracy:", accuracy_score(y_test, xgb_preds))
print(classification_report(y_test, xgb_preds))

XGBoost Accuracy: 0.963855421686747
              precision    recall  f1-score   support

           0       0.97      0.94      0.95        31
           1       0.96      0.98      0.97        52

    accuracy                           0.96        83
   macro avg       0.96      0.96      0.96        83
weighted avg       0.96      0.96      0.96        83



# CatBoost Training

In [25]:
catboost_model = CatBoostClassifier(verbose=0)
catboost_model.fit(X_train, y_train)
catboost_model.save_model('demo_weights_catboost.json')
catboost_preds = catboost_model.predict(X_test)
print("CatBoost Accuracy:", accuracy_score(y_test, catboost_preds))
print(classification_report(y_test, catboost_preds))

CatBoost Accuracy: 0.9759036144578314
              precision    recall  f1-score   support

           0       1.00      0.94      0.97        31
           1       0.96      1.00      0.98        52

    accuracy                           0.98        83
   macro avg       0.98      0.97      0.97        83
weighted avg       0.98      0.98      0.98        83



# Neural Network Based approach

In [26]:
nueral_model=tf.keras.Sequential([
        layers.InputLayer(input_shape=(X.shape[1],)),
        layers.Dense(32),
        layers.Dense(16, activation='relu'),
        layers.Dense(8),
        layers.Dense(1, activation='sigmoid')  
    ])
nueral_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
nueral_model.fit(X_train, y_train, epochs=20, batch_size=32, verbose=0)
nueral_model.save_weights("demo_weights_neural.weights.h5")
nueral_model_preds=nueral_model.predict(X_test)
nueral_model_preds_test = (nueral_model_preds > 0.5).astype(int) 

print("Neural Network Accuracy:", accuracy_score(y_test, nueral_model_preds_test))
print(classification_report(y_test, nueral_model_preds_test))



[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
Neural Network Accuracy: 0.7108433734939759
              precision    recall  f1-score   support

           0       0.57      0.94      0.71        31
           1       0.94      0.58      0.71        52

    accuracy                           0.71        83
   macro avg       0.75      0.76      0.71        83
weighted avg       0.80      0.71      0.71        83



# Mixture of Experts(MoE) Approach

In [27]:
xgb_preds_proba_train = xgb_model.predict_proba(X_train)[:, 1]  
catboost_preds_proba_train = catboost_model.predict_proba(X_train)[:, 1]
nueral_model_preds_proba_train = nueral_model.predict(X_train)[:, 0] 

xgb_preds_proba_test = xgb_model.predict_proba(X_test)[:, 1]
catboost_preds_proba_test = catboost_model.predict_proba(X_test)[:, 1]
nueral_model_preds_proba_test = nueral_model.predict(X_test)[:, 0] 

train_expert_predictions = np.column_stack((xgb_preds_proba_train, catboost_preds_proba_train,nueral_model_preds_proba_train))
test_expert_predictions = np.column_stack((xgb_preds_proba_test, catboost_preds_proba_test,nueral_model_preds_proba_test))


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 


In [28]:
def build_gate_model():
    model = tf.keras.Sequential([
        layers.InputLayer(input_shape=(3,)),
        layers.Dense(32),
        layers.Dense(16, activation='relu'),
        layers.Dense(8),
        layers.Dense(1, activation='sigmoid')  
    ])
    
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

gate_model = build_gate_model()

gate_model.fit(train_expert_predictions, y_train, epochs=20, batch_size=32, verbose=0)
gate_model.save_weights("demo_weights_gate.weights.h5")
gate_preds_proba_test = gate_model.predict(test_expert_predictions)
gate_preds_test = (gate_preds_proba_test > 0.5).astype(int) 

print("Neural Network Gate Model (MoE) Accuracy:", accuracy_score(y_test, gate_preds_test))
print(classification_report(y_test, gate_preds_test))



[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
Neural Network Gate Model (MoE) Accuracy: 0.963855421686747
              precision    recall  f1-score   support

           0       1.00      0.90      0.95        31
           1       0.95      1.00      0.97        52

    accuracy                           0.96        83
   macro avg       0.97      0.95      0.96        83
weighted avg       0.97      0.96      0.96        83

