In [1]:
import pandas as pd

train = pd.read_csv("/kaggle/input/scc-dataset/train.csv")
test = pd.read_csv("/kaggle/input/scc-dataset/test.csv")

train.head(), test.head()


(   sample_id  signal_strength  response_level category
 0        901       722.566585      153.933763  Group_C
 1       1799       210.432525      454.613761  Group_B
 2       1129       152.569777      431.993189  Group_B
 3        965       670.294068       -6.559720  Group_C
 4        395        49.418875      444.775273  Group_B,
    sample_id  signal_strength  response_level
 0       1369       565.801728      444.857592
 1         66       629.499478       68.269946
 2        701        88.635053      309.727322
 3        939       333.633206      125.831212
 4       1622       667.619312      -17.018523)

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

X = train[["signal_strength", "response_level"]]
y = train["category"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [3]:
from sklearn.metrics import accuracy_score, classification_report

def evaluate_model(model, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    acc = accuracy_score(y_test, preds)
    print(model.__class__.__name__, "Accuracy:", acc)
    print(classification_report(y_test, preds))
    return acc


In [4]:
from sklearn.linear_model import LogisticRegression

log_reg = LogisticRegression(multi_class="multinomial", max_iter=1000)
acc_lr = evaluate_model(log_reg, X_train, X_test, y_train, y_test)


LogisticRegression Accuracy: 0.8961937716262975
              precision    recall  f1-score   support

     Group_A       0.77      0.72      0.75        61
     Group_B       0.96      0.93      0.95       141
     Group_C       0.88      0.97      0.92        87

    accuracy                           0.90       289
   macro avg       0.87      0.87      0.87       289
weighted avg       0.90      0.90      0.90       289



In [5]:
test_scaled = scaler.transform(test[["signal_strength", "response_level"]])


In [6]:
test_preds = log_reg.predict(test_scaled)


In [7]:
submission = pd.DataFrame({
    "sample_id": test["sample_id"],
    "category": test_preds
})

submission.to_csv("submission.csv", index=False)
