In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

In [2]:
# Reading Data
file_path = r'C:\Users\sjh\Desktop\JUPITER\Earthquake Damage Data111.xlsx'
df = pd.read_excel(file_path)

# View data structure
print("The first few rows of data:")
print(df.head())
print("\nData information:")
print(df.info())

The first few rows of data:
   ID        P1        P2        P3        P4    P5        P6       BC1  BC2  \
0   1  0.733800  0.588313  0.824660  0.420993  0.00  0.599542  0.257812    0   
1   2  0.738113  0.604497  0.477239  0.252442  0.75  0.544622  0.148438    0   
2   3  0.742815  0.602485  0.755395  0.434462  0.75  0.560641  0.093750    1   
3   4  0.739635  0.602389  0.768629  0.484940  0.75  0.556064  0.148438    0   
4   5  0.740052  0.605121  0.783227  0.486592  0.75  0.542334  0.148438    0   

   BC3  BC4  BC5  BC6       BC7       BC8  DS  
0  0.0    0  0.8  1.0  0.000000  0.827586   4  
1  0.0    1  0.8  1.0  0.666667  0.931034   1  
2  1.0    1  0.8  1.0  0.000000  0.655172   1  
3  0.0    0  0.8  1.0  0.000000  0.965517   1  
4  0.0    1  0.8  0.5  0.666667  0.931034   1  

Data information:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30590 entries, 0 to 30589
Data columns (total 16 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 

In [3]:
# Extracting features and labels
X = df.iloc[1:, :-1].values
y = df.iloc[1:, -1].values

# Checking Data Types
print("\nCheck data type:")
print("Data type of X:", X.dtype)
print("Data type of y:", y.dtype)


Check data type:
Data type of X: float64
Data type of y: int64


In [4]:
# Label conversion
y = y - 1

# Data Standardization
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [5]:
# Divide the training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# Convert to PyTorch Tensor
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# Creating a DataLoader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

In [7]:
# Defining the ANN model
class ANN(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, num_classes):
        super(ANN, self).__init__()
        self.layer1 = nn.Linear(input_size, hidden_size1)
        self.relu1 = nn.ReLU()
        self.layer2 = nn.Linear(hidden_size1, hidden_size2)
        self.relu2 = nn.ReLU()
        self.layer3 = nn.Linear(hidden_size2, num_classes)
    
    def forward(self, x):
        x = self.layer1(x)
        x = self.relu1(x)
        x = self.layer2(x)
        x = self.relu2(x)
        x = self.layer3(x)
        return x

In [8]:
# Initialize the model
input_size = X_train.shape[1]
hidden_size1 = 64
hidden_size2 = 32
num_classes = 4
model = ANN(input_size, hidden_size1, hidden_size2, num_classes)

# Defining loss function and optimizer
class_counts = np.bincount(y_train) 
class_weights = 1.0 / class_counts  
class_weights = torch.tensor(class_weights, dtype=torch.float32)
criterion = nn.CrossEntropyLoss(weight=class_weights)
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [9]:
# Training the model
num_epochs = 100
for epoch in range(num_epochs):
    for batch_X, batch_y in train_loader:
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [10/100], Loss: 0.7377
Epoch [20/100], Loss: 0.5767
Epoch [30/100], Loss: 0.5676
Epoch [40/100], Loss: 0.2392
Epoch [50/100], Loss: 0.5560
Epoch [60/100], Loss: 0.6896
Epoch [70/100], Loss: 0.7379
Epoch [80/100], Loss: 0.4333
Epoch [90/100], Loss: 0.7769
Epoch [100/100], Loss: 0.6711


In [10]:
# Evaluating the Model
with torch.no_grad():
    outputs = model(X_test_tensor)
    _, predicted = torch.max(outputs, 1)
    y_pred = predicted.numpy()

# Restore original label range
y_pred_original = y_pred + 1
y_test_original = y_test + 1

# Calculating evaluation metrics
conf_matrix = confusion_matrix(y_test_original, y_pred_original)
accuracy = accuracy_score(y_test_original, y_pred_original)
precision = precision_score(y_test_original, y_pred_original, average='macro')
recall = recall_score(y_test_original, y_pred_original, average='macro')
f1 = f1_score(y_test_original, y_pred_original, average='macro')

print("Confusion Matrix:")
print(conf_matrix)
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

Confusion Matrix:
[[3891   55  358 1243]
 [  11    6    0    0]
 [  21    1    1    9]
 [ 141    3   24  354]]
Accuracy: 0.6949983654789147
Precision: 0.3181932932328931
Recall: 0.440953086198486
F1 Score: 0.3233911823433907


In [11]:
# The precision of each category
print("\nPrecision (recall) for each category:")
for i in range(1, 5):
    class_idx = i - 1
    tp = conf_matrix[class_idx, class_idx]
    actual = conf_matrix[class_idx, :].sum()
    accuracy = tp / actual if actual != 0 else 0.0
    print(f"DS={i} 's accuracy: {accuracy:.4f}")


Precision (recall) for each category:
DS=1 's accuracy: 0.7015
DS=2 's accuracy: 0.3529
DS=3 's accuracy: 0.0312
DS=4 's accuracy: 0.6782


In [12]:
# Define a function to run the model and return the evaluation results
def run_model_with_seed(seed):
    # Setting the random seed
    np.random.seed(seed)
    torch.manual_seed(seed)
    
    # Re-partition the training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)
    
    # Convert to PyTorch Tensor
    X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train, dtype=torch.long)
    X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
    y_test_tensor = torch.tensor(y_test, dtype=torch.long)
    
    # Creating a DataLoader
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    
    # Initialize the model
    model = ANN(input_size, hidden_size1, hidden_size2, num_classes)
    
    # Defining loss function and optimizer
    class_counts = np.bincount(y_train)
    class_weights = 1.0 / class_counts
    class_weights = torch.tensor(class_weights, dtype=torch.float32)
    criterion = nn.CrossEntropyLoss(weight=class_weights)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    # Training the model
    for epoch in range(num_epochs):
        for batch_X, batch_y in train_loader:
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
    
    # Evaluating the Model
    with torch.no_grad():
        outputs = model(X_test_tensor)
        _, predicted = torch.max(outputs, 1)
        y_pred = predicted.numpy()
    
    # Restore original label range
    y_pred_original = y_pred + 1
    y_test_original = y_test + 1
    
    # Calculating evaluation metrics
    accuracy = accuracy_score(y_test_original, y_pred_original)
    precision = precision_score(y_test_original, y_pred_original, average='macro')
    recall = recall_score(y_test_original, y_pred_original, average='macro')
    f1 = f1_score(y_test_original, y_pred_original, average='macro')
    
    return accuracy, precision, recall, f1

In [13]:
# Generate five random seeds and run the model
num_runs = 5
results = []

for i in range(num_runs):
    seed = np.random.randint(1000)
    print(f"\nRunning with seed {seed}")
    accuracy, precision, recall, f1 = run_model_with_seed(seed)
    results.append((accuracy, precision, recall, f1))
    print(f"Run {i+1} - Accuracy: {accuracy}, Precision: {precision}, Recall: {recall}, F1: {f1}")

# Calculate the average result
avg_accuracy = np.mean([res[0] for res in results])
avg_precision = np.mean([res[1] for res in results])
avg_recall = np.mean([res[2] for res in results])
avg_f1 = np.mean([res[3] for res in results])

print("\nAverage Results Over 5 Runs:")
print(f"Average Accuracy: {avg_accuracy:.4f}")
print(f"Average Precision: {avg_precision:.4f}")
print(f"Average Recall: {avg_recall:.4f}")
print(f"Average F1 Score: {avg_f1:.4f}")


Running with seed 565
Run 1 - Accuracy: 0.7052958483164433, Precision: 0.3204610497470962, Recall: 0.5282315119408327, F1: 0.3342312762544939

Running with seed 260
Run 2 - Accuracy: 0.6696632886564237, Precision: 0.3189022582016385, Recall: 0.5057292752414704, F1: 0.3236281164694247

Running with seed 10
Run 3 - Accuracy: 0.6605099705786205, Precision: 0.3090696794995485, Recall: 0.4986520169314402, F1: 0.3085548228200709

Running with seed 265
Run 4 - Accuracy: 0.6655769859431186, Precision: 0.3164185844546312, Recall: 0.5244288639759069, F1: 0.3197456414789542

Running with seed 53
Run 5 - Accuracy: 0.7069303694017652, Precision: 0.34980493379423155, Recall: 0.48167394278459186, F1: 0.36286229360702765

Average Results Over 5 Runs:
Average Accuracy: 0.6816
Average Precision: 0.3229
Average Recall: 0.5077
Average F1 Score: 0.3298
