In [1]:
import numpy as np
import torch 
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import GridSearchCV

In [2]:
data_dict=torch.load('data_for_classificationV2.pt')
data=data_dict['reshaped_set']

In [3]:
# Support Vector Machine
x=data[:,:-1].numpy()
y=data[:,-1].numpy()
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.30, random_state=4500)
print(f'Training set size:{X_train.shape[0]}')
print(f'Testing set size:{X_test.shape[0]}')

print(f'Non-zero in Training:{np.count_nonzero(y_train)}')
print(f'Zero  in Training:{y_train.size-np.count_nonzero(y_train)}')

print(f'Non-zero in test:{np.count_nonzero(y_test)}')
print(f'Zero  in test:{y_test.size-np.count_nonzero(y_test)}')



Training set size:4200
Testing set size:1800
Non-zero in Training:2052
Zero  in Training:2148
Non-zero in test:948
Zero  in test:852


In [4]:
param_grid = {'C': [0.01,0.1, 1, 10, 100], 'gamma': [10,1, 0.1, 0.01, 0.001, 0.0001, 0.00001], 'kernel': ['rbf']}

grid = GridSearchCV(SVC(), param_grid, refit=True, verbose=2)
grid.fit(X_train, y_train)

print(grid.best_params_)
print(grid.best_estimator_)

y_pred = grid.predict(X_test)

Fitting 5 folds for each of 35 candidates, totalling 175 fits
[CV] END .......................C=0.01, gamma=10, kernel=rbf; total time=   0.8s
[CV] END .......................C=0.01, gamma=10, kernel=rbf; total time=   0.8s
[CV] END .......................C=0.01, gamma=10, kernel=rbf; total time=   0.8s
[CV] END .......................C=0.01, gamma=10, kernel=rbf; total time=   0.8s
[CV] END .......................C=0.01, gamma=10, kernel=rbf; total time=   0.8s
[CV] END ........................C=0.01, gamma=1, kernel=rbf; total time=   0.5s
[CV] END ........................C=0.01, gamma=1, kernel=rbf; total time=   0.5s
[CV] END ........................C=0.01, gamma=1, kernel=rbf; total time=   0.5s
[CV] END ........................C=0.01, gamma=1, kernel=rbf; total time=   0.5s
[CV] END ........................C=0.01, gamma=1, kernel=rbf; total time=   0.5s
[CV] END ......................C=0.01, gamma=0.1, kernel=rbf; total time=   0.4s
[CV] END ......................C=0.01, gamma=0.

In [9]:
clf = SVC(kernel='rbf', C=10, gamma=0.01)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))
train_accuracy = clf.score(X_train, y_train)
test_accuracy = clf.score(X_test, y_test)
print(f'Training Accuracy: {train_accuracy}')
print(f'Test Accuracy: {test_accuracy}')

[[814  38]
 [ 97 851]]
              precision    recall  f1-score   support

         0.0       0.89      0.96      0.92       852
         1.0       0.96      0.90      0.93       948

    accuracy                           0.93      1800
   macro avg       0.93      0.93      0.92      1800
weighted avg       0.93      0.93      0.93      1800

Training Accuracy: 0.9452380952380952
Test Accuracy: 0.925


# Train SVM by Individual BC set

In [10]:
data_org=data_dict['original_set']

In [11]:
data_org[0].shape
ut_data=data_org[0]
uc_data=data_org[1]
sh_data=data_org[-1]
N,T,D=sh_data.shape
ut_data=ut_data.reshape(N*T,D)
uc_data=uc_data.reshape(N*T,D)
sh_data=sh_data.reshape(N*T,D)

In [12]:
xut,yut=[ut_data[:,:-1].numpy(),ut_data[:,-1].numpy()]
X_train_ut, X_test_ut, y_train_ut, y_test_ut = train_test_split(xut, yut, test_size=0.20, random_state=4500)

In [13]:
xuc,yuc=[uc_data[:,:-1].numpy(),uc_data[:,-1].numpy()]
X_train_uc, X_test_uc, y_train_uc, y_test_uc = train_test_split(xuc, yuc, test_size=0.20, random_state=4500)

In [14]:
xsh,ysh=[sh_data[:,:-1].numpy(),sh_data[:,-1].numpy()]
X_train_sh, X_test_sh, y_train_sh, y_test_sh = train_test_split(xsh, ysh, test_size=0.20, random_state=4500)

In [15]:
def trainSVC(X_train,X_test,y_train,y_test):
    clf = SVC(kernel='rbf', C=10, gamma=0.01)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    #print(confusion_matrix(y_test, y_pred))
    #print(classification_report(y_test, y_pred))
    train_accuracy = clf.score(X_train, y_train)
    test_accuracy = clf.score(X_test, y_test)
    print(f'Training Accuracy: {train_accuracy}')
    print(f'Test Accuracy: {test_accuracy}')
    return clf

In [16]:
clf1=trainSVC(X_train_ut,X_test_ut,y_train_ut,y_test_ut)
clf2=trainSVC(X_train_uc,X_test_uc,y_train_uc,y_test_uc)
clf3=trainSVC(X_train_sh,X_test_sh,y_train_sh,y_test_sh)

Training Accuracy: 0.9625
Test Accuracy: 0.945
Training Accuracy: 0.9475
Test Accuracy: 0.9225
Training Accuracy: 0.966875
Test Accuracy: 0.955


In [17]:
print('Accuracy of Training set for SVM trained by individual BC:')
print(f'Training Accuracy of UT_SVM: UT:{clf1.score(X_train_ut, y_train_ut)}, UC:{clf1.score(X_train_uc, y_train_uc)}, PS:{clf1.score(X_train_sh, y_train_sh)}')
print(f'Training Accuracy of UC_SVM: UT:{clf2.score(X_train_ut, y_train_ut)}, UC:{clf2.score(X_train_uc, y_train_uc)}, PS:{clf2.score(X_train_sh, y_train_sh)}')
print(f'Training Accuracy of PS_SVM: UT:{clf3.score(X_train_ut, y_train_ut)}, UC:{clf3.score(X_train_uc, y_train_uc)}, PS:{clf3.score(X_train_sh, y_train_sh)}')

Accuracy of Training set for SVM trained by individual BC:
Training Accuracy of UT_SVM: UT:0.9625, UC:0.73125, PS:0.6175
Training Accuracy of UC_SVM: UT:0.4675, UC:0.9475, PS:0.88125
Training Accuracy of PS_SVM: UT:0.6275, UC:0.809375, PS:0.966875


In [18]:
print('Accuracy of Testing set for SVM trained by individual BC:')
print(f'Testing Accuracy of UT_SVM: UT:{clf1.score(X_test_ut, y_test_ut)}, UC:{clf1.score(X_test_uc, y_test_uc)}, PS:{clf1.score(X_test_sh, y_test_sh)}')
print(f'Testing Accuracy of UC_SVM: UT:{clf2.score(X_test_ut, y_test_ut)}, UC:{clf2.score(X_test_uc, y_test_uc)}, PS:{clf2.score(X_test_sh, y_test_sh)}')
print(f'Testing Accuracy of PS_SVM: UT:{clf3.score(X_test_ut, y_test_ut)}, UC:{clf3.score(X_test_uc, y_test_uc)}, PS:{clf3.score(X_test_sh, y_test_sh)}')

Accuracy of Testing set for SVM trained by individual BC:
Testing Accuracy of UT_SVM: UT:0.945, UC:0.7125, PS:0.6125
Testing Accuracy of UC_SVM: UT:0.45, UC:0.9225, PS:0.8825
Testing Accuracy of PS_SVM: UT:0.64, UC:0.835, PS:0.955


In [83]:
fabrics_name=[r'Nodal Connection Degree Skewness, $\alpha_{N_d}$',
        r'Nodal Connection Degree Kurtosis, $k_{N_d}$',
        r'Nodal Connection Degree Entropy, $H_{N_d}$',
        
        r'Nodal Clustering Coefficient Skewness, $\alpha_{C_v}$',
        r'Nodal Clustering Coefficient Kurtosis, $k_{C_v}$',
        r'Nodal Clustering Coefficient Entropy, $H_{C_v}$',

        r'Area Strength Skewness, $\alpha_{S}$',
        r'Area Strength Kurtosis, $k_{S}$',
        r'Area Strength Entropy, $H_{S}$',

        r'Area Betweenness Centrality Skewness, $\alpha_{g_A}$',
        r'Area Betweenness Centrality Kurtosis, $k_{g_A}$',
        r'Area Betweenness Centrality Entropy, $H_{g_A}$',

        r'Eccentrincity Skewness, $\alpha_{E}$',
        r'Eccentrincity Kurtosis, $k_{E}$',
        r'Eccentrincity Entropy, $H_{E}$', 

        r'Closeness Centrality Skewness, $\alpha_{C}$',
        r'Closeness Centrality Kurtosis, $k_{C}$',
        r'Closeness Centrality Entropy, $H_{C}$', ]

In [86]:
from sklearn.feature_selection import SelectKBest, f_classif

# Example data setup
# X: feature matrix with shape (n_samples, n_features)
# y: target vector with shape (n_samples,)

# Number of features to select
k = 10

# Create the SelectKBest with the ANOVA F-test
selector = SelectKBest(score_func=f_classif, k=k)

# Fit to the training data
selector.fit(x, y)

# Get the scores and p-values of the features
scores = selector.scores_
p_values = selector.pvalues_

# Print the scores and p-values for each feature
for i in range(x.shape[1]):
    print(f"Feature {fabrics_name[i]}: Score = {scores[i]}, p-value = {p_values[i]}")

# Get the mask of the selected features
features_selected_mask = selector.get_support()
print("\nSelected features mask:", features_selected_mask)

# If you want to see which features are selected, and their corresponding scores and p-values:
print("\nSelected features:")
for i, (score, p_value, is_selected) in enumerate(zip(scores, p_values, features_selected_mask)):
    if is_selected:
        print(f"Feature {i+1}: Score = {score}, p-value = {p_value}, Selected = {is_selected}")

Feature Nodal Connection Degree Skewness, $\alpha_{N_d}$: Score = 82.71120876072689, p-value = 1.2682734662223597e-19
Feature Nodal Connection Degree Kurtosis, $k_{N_d}$: Score = 1002.6151150782578, p-value = 1.2953816000267733e-203
Feature Nodal Connection Degree Entropy, $H_{N_d}$: Score = 462.21289426606074, p-value = 7.867209043163438e-99
Feature Nodal Clustering Coefficient Skewness, $\alpha_{C_v}$: Score = 1083.19746774805, p-value = 1.5544857624036107e-218
Feature Nodal Clustering Coefficient Kurtosis, $k_{C_v}$: Score = 907.6615711977605, p-value = 8.275473869847075e-186
Feature Nodal Clustering Coefficient Entropy, $H_{C_v}$: Score = 1737.8720794792864, p-value = 0.0
Feature Area Strength Skewness, $\alpha_{S}$: Score = 1376.0145812640994, p-value = 2.367281760217134e-271
Feature Area Strength Kurtosis, $k_{S}$: Score = 777.0695425553807, p-value = 6.5104077183726155e-161
Feature Area Strength Entropy, $H_{S}$: Score = 1645.6447455803434, p-value = 3.70965e-318
Feature Area Be

In [80]:
x_selec=x[:,features_selected_mask]
y_selec=y

In [81]:
Xs_train, Xs_test, ys_train, ys_test = train_test_split(x_selec, y_selec, test_size=0.30, random_state=2355)

In [82]:
clf = SVC(kernel='rbf', C=60, gamma=0.01)
clf.fit(Xs_train, ys_train)
ys_pred = clf.predict(Xs_test)
print(confusion_matrix(ys_test, ys_pred))
print(classification_report(ys_test, ys_pred))
train_accuracys = clf.score(Xs_train, ys_train)
test_accuracys = clf.score(Xs_test, ys_test)
print(f'Training Accuracy: {train_accuracys}')
print(f'Test Accuracy: {test_accuracys}')

[[836  46]
 [ 95 823]]
              precision    recall  f1-score   support

         0.0       0.90      0.95      0.92       882
         1.0       0.95      0.90      0.92       918

    accuracy                           0.92      1800
   macro avg       0.92      0.92      0.92      1800
weighted avg       0.92      0.92      0.92      1800

Training Accuracy: 0.9314285714285714
Test Accuracy: 0.9216666666666666
