In [6]:
import pandas as pd
import numpy as np
import tensorflow as tf

from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [3]:
%pip install "tensorflow==2.16.1"

Collecting tensorflow==2.16.1
  Downloading tensorflow-2.16.1-cp312-cp312-win_amd64.whl.metadata (3.5 kB)
Collecting tensorflow-intel==2.16.1 (from tensorflow==2.16.1)
  Downloading tensorflow_intel-2.16.1-cp312-cp312-win_amd64.whl.metadata (5.0 kB)
Collecting absl-py>=1.0.0 (from tensorflow-intel==2.16.1->tensorflow==2.16.1)
  Downloading absl_py-2.3.1-py3-none-any.whl.metadata (3.3 kB)
Collecting astunparse>=1.6.0 (from tensorflow-intel==2.16.1->tensorflow==2.16.1)
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=23.5.26 (from tensorflow-intel==2.16.1->tensorflow==2.16.1)
  Downloading flatbuffers-25.9.23-py2.py3-none-any.whl.metadata (875 bytes)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow-intel==2.16.1->tensorflow==2.16.1)
  Downloading gast-0.6.0-py3-none-any.whl.metadata (1.3 kB)
Collecting google-pasta>=0.1.1 (from tensorflow-intel==2.16.1->tensorflow==2.16.1)
  Downloading google_pasta-0.2.0-py3-none-any.whl.metada

In [4]:
from tensorflow import keras

In [7]:
p_train = r"C:\Users\TSJ20\xwechat_files\wxid_c8919vr6p4qg22_429f\msg\file\2025-10\scale_train_data.csv"
p_test  = r"C:\Users\TSJ20\xwechat_files\wxid_c8919vr6p4qg22_429f\msg\file\2025-10\scale_test_data.csv"
train = pd.read_csv(p_train)
test  = pd.read_csv(p_test)

In [8]:
target = 'NSP' if 'NSP' in train.columns else 'CLASS'

In [20]:
y_train = train[target].to_numpy(dtype=int)
X_train = train.loc[:, train.columns != target].to_numpy(dtype='float32')

y_test  = test[target].to_numpy(dtype=int)
X_test  = test.loc[:,  test.columns  != target].to_numpy(dtype='float32')

In [22]:
y_train = y_train - 1
y_test  = y_test  - 1

In [24]:
vals, cnts = np.unique(y_train, return_counts=True)
class_weight = {int(v): float(cnts.max()/c) for v,c in zip(vals, cnts)}
print("class_weight:", class_weight)

class_weight: {0: 1.0, 1: 5.974226804123711, 2: 8.585185185185185}


In [28]:
num_classes = int(np.unique(y_train).size)
print("num_classes =", num_classes, " labels in train:", np.unique(y_train))

num_classes = 3  labels in train: [0 1 2]


In [30]:
inputs = keras.Input(shape=(X_train.shape[1],))
x = keras.layers.BatchNormalization()(inputs)
x = keras.layers.Dense(128, activation="relu")(x)
x = keras.layers.Dropout(0.3)(x)
x = keras.layers.Dense(64, activation="relu")(x)
x = keras.layers.Dropout(0.2)(x)
outputs = keras.layers.Dense(num_classes, activation="softmax")(x)
model = keras.Model(inputs, outputs)

model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=1e-3),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

In [32]:
es = keras.callbacks.EarlyStopping(
    monitor="val_accuracy", patience=10, restore_best_weights=True
)
hist = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=200, batch_size=32,
    class_weight=class_weight,
    callbacks=[es], verbose=1
)


Epoch 1/200
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.4882 - loss: 2.1512 - val_accuracy: 0.1275 - val_loss: 2.5602
Epoch 2/200
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8706 - loss: 1.3268 - val_accuracy: 0.1309 - val_loss: 2.7095
Epoch 3/200
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8445 - loss: 1.1088 - val_accuracy: 0.3121 - val_loss: 1.5427
Epoch 4/200
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9025 - loss: 0.9116 - val_accuracy: 0.4161 - val_loss: 1.3846
Epoch 5/200
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9109 - loss: 0.8831 - val_accuracy: 0.6242 - val_loss: 0.8527
Epoch 6/200
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9059 - loss: 0.8138 - val_accuracy: 0.7752 - val_loss: 0.4331
Epoch 7/200
[1m38/38[0m [32m━━━

In [34]:
proba = model.predict(X_test)
pred = proba.argmax(axis=1)

print("Accuracy:", accuracy_score(y_test, pred))
print(classification_report(y_test, pred, digits=2))
print("Confusion matrix:\n", confusion_matrix(y_test, pred))

[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
Accuracy: 0.9310344827586207
              precision    recall  f1-score   support

           0       0.99      0.96      0.97       496
           1       0.85      0.82      0.83       101
           2       0.60      0.90      0.72        41

    accuracy                           0.93       638
   macro avg       0.81      0.89      0.84       638
weighted avg       0.94      0.93      0.93       638

Confusion matrix:
 [[474  12  10]
 [  3  83  15]
 [  1   3  37]]
