In [52]:
import pandas as pd
df = pd.read_csv("code_dataset.csv")

In [53]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
df['label_enc'] = le.fit_transform(df['label'])  # New column with numeric labels


In [54]:
print(df['label_enc'],df['label'])

0      0
1      8
2      8
3      6
4      8
      ..
895    1
896    2
897    2
898    2
899    7
Name: label_enc, Length: 900, dtype: int32 0       0
1       s
2       s
3       3
4       s
       ..
895    0s
896     1
897     1
898     1
899    3f
Name: label, Length: 900, dtype: object


In [55]:
X = df.drop(['label', 'label_enc'], axis=1).values  # Feature columns
y = df['label_enc'].values  # Encoded target


In [56]:
print(df['label'].value_counts())


label
s     198
2     132
3     108
1     108
3f     90
0      84
2s     78
1f     66
0s     36
Name: count, dtype: int64


In [57]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [58]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = Sequential([
    Dense(128, activation='relu', ),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(len(le.classes_), activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])


In [59]:
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

# Numeric labels (already encoded with LabelEncoder)
class_weights = compute_class_weight('balanced', classes=np.unique(y), y=y)
class_weights_dict = dict(enumerate(class_weights))

# Then pass during training:
model.fit(X_train, y_train, epochs=100, validation_split=0.2,
          batch_size=16, class_weight=class_weights_dict)


Epoch 1/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.1284 - loss: 7.9555 - val_accuracy: 0.1181 - val_loss: 2.5462
Epoch 2/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.1310 - loss: 2.5154 - val_accuracy: 0.1667 - val_loss: 2.0881
Epoch 3/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.1696 - loss: 2.1461 - val_accuracy: 0.1667 - val_loss: 2.0734
Epoch 4/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.1466 - loss: 2.0486 - val_accuracy: 0.1944 - val_loss: 2.0675
Epoch 5/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.1377 - loss: 2.1352 - val_accuracy: 0.1597 - val_loss: 2.0893
Epoch 6/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.1812 - loss: 2.0017 - val_accuracy: 0.1736 - val_loss: 2.0573
Epoch 7/100
[1m36/36[0m [32m━━━

<keras.src.callbacks.history.History at 0x298abaa6810>

In [60]:
loss, acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {acc:.2f}")


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6440 - loss: 0.8506  
Test Accuracy: 0.64


In [61]:
import numpy as np

sample = np.array([[5,2,7,7,7,1,1,1,4,12]])  # Example feature vector
pred = model.predict(sample)
print("Predicted flag:", le.inverse_transform([pred.argmax()])[0])


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step
Predicted flag: s


In [62]:
df['label'].value_counts()


label
s     198
2     132
3     108
1     108
3f     90
0      84
2s     78
1f     66
0s     36
Name: count, dtype: int64

In [63]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Load your dataset
df = pd.read_csv("code_dataset.csv")

# Simplify label (convert '3f' → '3', '2s' → '2', etc.)
df['label_simple'] = df['label'].str[0]

# Encode simplified labels
le = LabelEncoder()
df['label_enc'] = le.fit_transform(df['label_simple'])

# Features and target
X = df.drop(columns=['label', 'label_simple', 'label_enc']).values
y = df['label_enc'].values

# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Train Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Evaluate
y_pred = rf.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=le.classes_))

Accuracy: 0.9777777777777777

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        22
           1       1.00      1.00      1.00        35
           2       1.00      0.90      0.95        40
           3       1.00      1.00      1.00        38
           s       0.92      1.00      0.96        45

    accuracy                           0.98       180
   macro avg       0.98      0.98      0.98       180
weighted avg       0.98      0.98      0.98       180

