### 3. Simple/Basic approach  (Logistic Regression)

3.1 Read and load the data.  

In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

data = pd.read_csv('../data/manipulated_data_set/full_data.csv')

3.2 Prepare data, split train and test set.  

In [9]:
X = data.drop(columns=["NSP", "CLASS"])   # X features, (indicators)
y_nsp = data["NSP"] - 1                   # let NSP start from 0

X_train, X_test, y_train, y_test = train_test_split(
    X, y_nsp, test_size=0.3, random_state=42, stratify=y_nsp
)


3.3 Regularization of data.  
* For Logistic Regression, data scale may not affect a lot.  
* Regularization can make more accurate precision.  

In [10]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


3.4 __Train and test the Model__  

In [11]:
log_reg_nsp = LogisticRegression(max_iter=1000, multi_class="multinomial")
log_reg_nsp.fit(X_train_scaled, y_train)

y_pred = log_reg_nsp.predict(X_test_scaled)

print("=== NSP classification Result. ===")
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

from sklearn.metrics import f1_score

f1_macro = f1_score(y_test, y_pred, average='macro')   
f1_weighted = f1_score(y_test, y_pred, average='weighted')

print("F1-score (macro):", f1_macro)
print("F1-score (weighted):", f1_weighted)

=== NSP classification Result. ===
Accuracy: 0.8934169278996865
              precision    recall  f1-score   support

         0.0       0.94      0.95      0.94       497
         1.0       0.64      0.72      0.67        88
         2.0       0.92      0.68      0.78        53

    accuracy                           0.89       638
   macro avg       0.83      0.78      0.80       638
weighted avg       0.90      0.89      0.89       638

F1-score (macro): 0.8004133302021853
F1-score (weighted): 0.8939735529174895


