In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

In [2]:
file_path = r'C:\Users\sjh\Desktop\JUPITER\Earthquake Damage Data111.xlsx'

df = pd.read_excel(file_path)

# View data structure
print("The first few rows of data:")
print(df.head())
print("\nData information:")
print(df.info())

The first few rows of data:
   ID        P1        P2        P3        P4    P5        P6       BC1  BC2  \
0   1  0.733800  0.588313  0.824660  0.420993  0.00  0.599542  0.257812    0   
1   2  0.738113  0.604497  0.477239  0.252442  0.75  0.544622  0.148438    0   
2   3  0.742815  0.602485  0.755395  0.434462  0.75  0.560641  0.093750    1   
3   4  0.739635  0.602389  0.768629  0.484940  0.75  0.556064  0.148438    0   
4   5  0.740052  0.605121  0.783227  0.486592  0.75  0.542334  0.148438    0   

   BC3  BC4  BC5  BC6       BC7       BC8  DS  
0  0.0    0  0.8  1.0  0.000000  0.827586   4  
1  0.0    1  0.8  1.0  0.666667  0.931034   1  
2  1.0    1  0.8  1.0  0.000000  0.655172   1  
3  0.0    0  0.8  1.0  0.000000  0.965517   1  
4  0.0    1  0.8  0.5  0.666667  0.931034   1  

Data information:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30590 entries, 0 to 30589
Data columns (total 16 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 

In [3]:
# Extracting features and labels
X = df.iloc[1:, :-1].values
y = df.iloc[1:, -1].values

# Check data type
print("\nCheck data type:")
print("Data type of X:", X.dtype)
print("Data type of y:", y.dtype)

# Label conversion
y = y - 1

# Data Normalization
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Divide the training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to PyTorch Tensor
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# Creating a Data Loader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)


Check data type:
Data type of X: float64
Data type of y: int64


In [4]:
# Defining the ANN model
class ANN(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, num_classes):
        super(ANN, self).__init__()
        self.layer1 = nn.Linear(input_size, hidden_size1)
        self.relu1 = nn.ReLU()
        self.layer2 = nn.Linear(hidden_size1, hidden_size2)
        self.relu2 = nn.ReLU()
        self.layer3 = nn.Linear(hidden_size2, num_classes)
    
    def forward(self, x):
        x = self.layer1(x)
        x = self.relu1(x)
        x = self.layer2(x)
        x = self.relu2(x)
        x = self.layer3(x)
        return x

# Initialize the model
input_size = X_train.shape[1]
hidden_size1 = 64
hidden_size2 = 32
num_classes = 4
model = ANN(input_size, hidden_size1, hidden_size2, num_classes)

# Defining loss function and optimizer
class_counts = np.bincount(y_train) 
class_weights = 1.0 / class_counts  
class_weights = torch.tensor(class_weights, dtype=torch.float32)
criterion = nn.CrossEntropyLoss(weight=class_weights)
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [5]:
# Training
num_epochs = 100
for epoch in range(num_epochs):
    for batch_X, batch_y in train_loader:
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [10/100], Loss: 0.7543
Epoch [20/100], Loss: 0.5439
Epoch [30/100], Loss: 0.4519
Epoch [40/100], Loss: 0.6145
Epoch [50/100], Loss: 0.5625
Epoch [60/100], Loss: 0.5582
Epoch [70/100], Loss: 0.4049
Epoch [80/100], Loss: 1.3003
Epoch [90/100], Loss: 0.4836
Epoch [100/100], Loss: 0.2011


In [6]:
# Evaluate the model on the test set
with torch.no_grad():
    outputs = model(X_test_tensor)
    _, predicted = torch.max(outputs, 1)
    y_pred = predicted.numpy()

# Restore original label range
y_pred_original = y_pred + 1
y_test_original = y_test + 1

# Calculating evaluation metrics
conf_matrix = confusion_matrix(y_test_original, y_pred_original)
accuracy = accuracy_score(y_test_original, y_pred_original)
precision = precision_score(y_test_original, y_pred_original, average='macro')
recall = recall_score(y_test_original, y_pred_original, average='macro')
f1 = f1_score(y_test_original, y_pred_original, average='macro')


print("Confusion Matrix:")
print(conf_matrix)
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

Confusion Matrix:
[[3900  117  252 1278]
 [   9    8    0    0]
 [  19    2    3    8]
 [ 148    2   17  355]]
Accuracy: 0.6972866949983655
Precision: 0.3115492082473091
Recall: 0.48687440276941296
F1 Score: 0.3170329313027475


In [7]:
print("\nPrecision (recall) for each category:")
for i in range(1, 5):
    class_idx = i - 1
    tp = conf_matrix[class_idx, class_idx]
    actual = conf_matrix[class_idx, :].sum()
    accuracy = tp / actual if actual != 0 else 0.0
    print(f"DS={i} 's accuracy: {accuracy:.4f}")


Precision (recall) for each category:
DS=1 's accuracy: 0.7031
DS=2 's accuracy: 0.4706
DS=3 's accuracy: 0.0938
DS=4 's accuracy: 0.6801
