Code implementation of - **S. Dua, J. Singh, and H. Parthasarathy, “Image forgery detection based on statistical features of block dct coefficients,” Procedia Computer Science, vol. 171, pp. 369–378, 2020, third International Conference on Computing and Network Communications (CoCoNet’19). [Online]. 
Available: https://www.sciencedirect.com/science/article/pii/S1877050920310048**

In [2]:
import cv2
import pandas as pd
import numpy as np
import os
from scipy.fftpack import fft, dct
from sklearn import svm
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from skimage.util import view_as_windows

In [3]:
def get_patches(image_mat):
    """
    Extract patches rom an image
    :param image_mat: The image as a matrix
    :param stride: The stride of the patch extraction process
    :returns: The patches
    """
    stride=8 #stride is same as window's breadth so that it gives non-overlapping blocks.
    window_shape = (8, 8)
    image_mat=np.array(image_mat)
    
    windows = view_as_windows(image_mat, window_shape, step=stride)
#     print('windows shape:',windows.shape)

    patches = []
    for m in range(windows.shape[0]):
        for n in range(windows.shape[1]):
#             print("window shape: ",windows[m][n].shape)
            patches += [windows[m][n]]
    return patches

In [4]:
def std_and_ones(type_of_sub_image_blocks):
    ac_dct_stack=[]
    number_of_ones=[]

    for block in type_of_sub_image_blocks:
        dct_block = dct(block, type=2, n=None, axis=-1, norm=None, overwrite_x=False)
        dct_block_row = dct_block.flatten() # 2d dct array to 1d row array.
        ac_dct = dct_block_row[1:] # only AC component, removing the first DC comp.
        ac_dct_stack.append(ac_dct)

    ac_dct_stack=np.asarray(ac_dct_stack) #1536X63
    ac_dct_stack=ac_dct_stack.T # 63X1536

#     print("AC stacked shape: ", ac_dct_stack.shape)

    ac_dct_std = np.std(ac_dct_stack, axis=1) # row wise standard-deviation.

    for i in range(ac_dct_stack.shape[0]):
        count_one=0
        for j in range(ac_dct_stack.shape[1]):
            if(ac_dct_stack[i][j]>0):   # row wise counting number of ones.
                count_one+=1
        number_of_ones.append(count_one)

    number_of_ones=np.asarray(number_of_ones)
    
    return(ac_dct_std, number_of_ones)

In [5]:
def feature_sub_image(sub_image):
    sub_image_blocks = get_patches(sub_image) #Gives the 8x8 patches/blocks of sub_image.

    sub_image_cropped = sub_image[4:,4:] #removing 4 rows and 4 cols.
    sub_image_cropped_blocks = get_patches(sub_image_cropped)

    STD_full_image, ONE_full_image = std_and_ones(sub_image_blocks)
    STD_cropped_image, ONE_cropped_image = std_and_ones(sub_image_cropped_blocks)

    #             print("STD_full image shape: ",STD_full_image.shape)
    #             print("one_full image shape: ",ONE_full_image.shape)
    #             print("STD_crop image shape: ",STD_cropped_image.shape)
    #             print("One_crop image shape: ",ONE_cropped_image.shape)
    
    #63x4 stacked F-sub-image
    F_sub_image=np.column_stack((STD_full_image, ONE_full_image, STD_cropped_image, ONE_cropped_image))
    
    F_sub_image_flat=F_sub_image.T.flatten() #column wise flattening, 63*4=252 features
    return(F_sub_image_flat)

In [6]:
#main function to extract the features.
def feature_extraction(path_to_folder, class_label):
    data_list=[]
    for file_name in os.listdir(path_to_folder):
        path_to_img = os.path.join(path_to_folder,file_name)
        img = cv2.imread(path_to_img)
        
        if np.shape(img) == ():
            continue
        
        img = cv2.cvtColor(img, cv2.COLOR_BGR2YCrCb) #changing to YCrCb color space.
        img_y = img[:,:,0] # the Y channel only.
        img_cr = img[:,:,1] # the Cr channel only.
        img_cb = img[:,:,2] # the Cb channel only.
        
        Fy = feature_sub_image(img_y)
        Fcr = feature_sub_image(img_cr)
        Fcb = feature_sub_image(img_cb)
#         print("fy shape: ",Fy.shape)
#         print("fcr shape: ",Fcr.shape)
#         print("fcb shape: ",Fcb.shape)
        
        final_feature = np.concatenate((Fy, Fcb, Fcr), axis=None) #63*4*3=756 flattened features.
#         print("final feature shape: ",final_feature.shape)
        
        final_feature=list(final_feature)
        final_feature.insert(0,file_name)
        final_feature.insert(1,class_label)
        data_list.append(final_feature)
        
        
    return(data_list)

In [50]:
# #CASIA V1.0 dataset
# au_path = "YOUR_PATH/CASIA 1.0 dataset/Au"
# tp_path1 = "YOUR_PATH/CASIA 1.0 dataset/Modified Tp/Tp/CM"
# tp_path2 = "YOUR_PATH/CASIA 1.0 dataset/Modified Tp/Tp/Sp"
# output_name='CASIA1_features.csv'

# data_list1 = feature_extraction(au_path, 0)
# data_list2 = feature_extraction(tp_path1, 1)
# data_list3 = feature_extraction(tp_path2, 1)

# df = pd.DataFrame(data_list1)
# df = df.append(pd.DataFrame(data_list2), ignore_index=True)
# df = df.append(pd.DataFrame(data_list3), ignore_index=True)
#  --------------------------------------------------------------------------------------

# CASIA V2.0 Dataset
au_path="/kaggle/input/casia-20-image-tampering-detection-dataset/CASIA2/Au"
tp_path="/kaggle/input/casia-20-image-tampering-detection-dataset/CASIA2/Tp"
output_name='CASIA2_features.csv'

data_list1 = feature_extraction(au_path, 0)
data_list2 = feature_extraction(tp_path, 1)

df = pd.DataFrame(data_list1)
df = df.append(pd.DataFrame(data_list2), ignore_index=True)

#  --------------------------------------------------------------------------------------

# Keep this as it is
df.rename(columns = {0: "image_names", 1: "label"}, inplace = True)

scaler_norm = MinMaxScaler(feature_range=(0, 1), *, copy=True, clip=False) 
df.iloc[:,2:] = scaler_norm.fit_transform(df.iloc[:,2:].to_numpy()) # Normalising the values in dataframe.

df.to_csv(output_name, index=False)

In [51]:
df.shape

(12614, 758)

## SVM CLASSIFICATION CASIA 2.0

In [7]:
df=pd.read_csv('/Users/andre/Desktop/conda/grad/CASIA3_features.csv')
df

Unnamed: 0,image_names,label,2,3,4,5,6,7,8,9,...,748,749,750,751,752,753,754,755,756,757
0,Au_pla_30322.jpg,0,0.203030,0.114708,0.091051,0.071826,0.061546,0.051228,0.040522,0.381164,...,0.137913,0.175814,0.117100,0.132071,0.121396,0.130337,0.085628,0.153904,0.128289,0.180267
1,Au_ani_10197.jpg,0,0.153429,0.099287,0.123705,0.130321,0.138604,0.127693,0.166308,0.380715,...,0.135115,0.161243,0.316789,0.230002,0.134294,0.220743,0.041452,0.219506,0.135881,0.163205
2,Au_nat_00098.jpg,0,0.075379,0.058618,0.055116,0.050434,0.045391,0.043785,0.042841,0.510323,...,0.054707,0.106605,0.117100,0.090954,0.054628,0.088147,0.041755,0.087304,0.060476,0.108803
3,Au_nat_30407.jpg,0,0.320666,0.322521,0.298455,0.272717,0.269061,0.261953,0.242288,0.416608,...,0.138931,0.174114,0.117100,0.126838,0.129995,0.136364,0.110741,0.141432,0.139423,0.168398
4,Au_arc_20034.jpg,0,0.298256,0.227842,0.216022,0.196337,0.181617,0.196589,0.182347,0.488406,...,0.425445,0.338514,0.747706,0.544231,0.421345,0.487443,0.247201,0.510352,0.409919,0.336301
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14330,Au_txt_0096.jpg,0,0.737459,0.788444,0.730644,0.605657,0.520775,0.621279,0.482222,0.478789,...,0.422056,0.480904,0.017669,0.398091,0.413878,0.385524,0.300337,0.400253,0.417836,0.446856
14331,Au_txt_0097.jpg,0,0.365549,0.491332,0.494500,0.448025,0.376877,0.439603,0.321650,0.216863,...,0.543118,0.539361,0.017669,0.408998,0.552653,0.423191,0.607424,0.459151,0.540875,0.501497
14332,Au_txt_0098.jpg,0,0.229045,0.217076,0.171976,0.127476,0.077273,0.076050,0.055724,0.403313,...,0.417910,0.477007,0.017669,0.423995,0.439184,0.395864,0.429696,0.432552,0.439306,0.493263
14333,Au_txt_0099.jpg,0,0.406170,0.497929,0.483714,0.448247,0.398195,0.450239,0.341832,0.483110,...,0.540630,0.632112,0.017669,0.439673,0.535510,0.433530,0.525309,0.478784,0.526012,0.588323


In [7]:
newpicst = "/Users/andre/Desktop/conda/grad/tp"
newpicsa = "/Users/andre/Desktop/conda/grad/au"
newimt = feature_extraction(newpicst,1)
newima = feature_extraction(newpicsa,0)
ndf = pd.DataFrame(newimt)
ndf = ndf.append(pd.DataFrame(newima), ignore_index=True)
ndf.rename(columns = {0: "image_names", 1: "label"}, inplace = True)
scaler_norm = MinMaxScaler(feature_range=(0, 1),copy=True, clip=True) 
ndf.iloc[:,2:] = scaler_norm.fit_transform(ndf.iloc[:,2:].to_numpy())


In [8]:
output_name='CASIA1_features.csv'
ndf.to_csv(output_name, index=False)

In [26]:
new = df.append(ndf, ignore_index=True)
new.reset_index()
new

Unnamed: 0,image_names,label,2,3,4,5,6,7,8,9,...,748,749,750,751,752,753,754,755,756,757
0,Au_pla_30322.jpg,0,0.203030,0.114708,0.091051,0.071826,0.061546,0.051228,0.040522,0.381164,...,,,,,,,,,,
1,Au_ani_10197.jpg,0,0.153429,0.099287,0.123705,0.130321,0.138604,0.127693,0.166308,0.380715,...,,,,,,,,,,
2,Au_nat_00098.jpg,0,0.075379,0.058618,0.055116,0.050434,0.045391,0.043785,0.042841,0.510323,...,,,,,,,,,,
3,Au_nat_30407.jpg,0,0.320666,0.322521,0.298455,0.272717,0.269061,0.261953,0.242288,0.416608,...,,,,,,,,,,
4,Au_arc_20034.jpg,0,0.298256,0.227842,0.216022,0.196337,0.181617,0.196589,0.182347,0.488406,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1716,Au_txt_0096.jpg,0,,,,,,,,,...,0.422056,0.480904,0.017669,0.398091,0.413878,0.385524,0.300337,0.400253,0.417836,0.446856
1717,Au_txt_0097.jpg,0,,,,,,,,,...,0.543118,0.539361,0.017669,0.408998,0.552653,0.423191,0.607424,0.459151,0.540875,0.501497
1718,Au_txt_0098.jpg,0,,,,,,,,,...,0.417910,0.477007,0.017669,0.423995,0.439184,0.395864,0.429696,0.432552,0.439306,0.493263
1719,Au_txt_0099.jpg,0,,,,,,,,,...,0.540630,0.632112,0.017669,0.439673,0.535510,0.433530,0.525309,0.478784,0.526012,0.588323


In [8]:
trainset = df.drop("image_names", axis=1)

In [9]:
array=trainset.values
x_feature=array[:,1:]
y_label=array[:,0].astype('int')
print(x_feature.shape)
print(y_label.shape)

(14335, 756)
(14335,)


In [10]:
x_feature

array([[0.2030304 , 0.11470844, 0.09105144, ..., 0.15390372, 0.12828947,
        0.18026706],
       [0.15342884, 0.0992869 , 0.12370499, ..., 0.21950611, 0.13588057,
        0.16320475],
       [0.07537936, 0.05861842, 0.05511573, ..., 0.08730357, 0.06047571,
        0.10880316],
       ...,
       [0.22904502, 0.21707648, 0.17197567, ..., 0.43255225, 0.43930636,
        0.49326347],
       [0.40616975, 0.4979294 , 0.48371449, ..., 0.47878404, 0.52601156,
        0.58832335],
       [0.4481609 , 0.54358333, 0.57104341, ..., 0.31792274, 0.26837324,
        0.33008982]])

In [11]:
X_train,X_test,Y_train,Y_test=train_test_split(x_feature,y_label,test_size=0.20,random_state=7)

In [12]:
X_train

array([[0.28053323, 0.27715118, 0.26767285, ..., 0.16762285, 0.15460526,
        0.19114738],
       [0.21305225, 0.19484471, 0.20423936, ..., 0.10152158, 0.07135627,
        0.11275964],
       [0.18857999, 0.14335036, 0.14375774, ..., 0.07782489, 0.08147773,
        0.0615727 ],
       ...,
       [0.25852926, 0.20422386, 0.17813738, ..., 0.61486655, 0.58881579,
        0.39070227],
       [0.26715803, 0.23526538, 0.23992976, ..., 0.09753056, 0.10374494,
        0.07739862],
       [0.3292156 , 0.34775   , 0.35261075, ..., 0.12222499, 0.09741903,
        0.10583581]])

In [13]:

model_SVC=svm.SVC(kernel='rbf',C=100,gamma=0.001)

kfold=KFold(n_splits=10, shuffle=True)
cv_results=cross_val_score(model_SVC,X_train,Y_train,cv=kfold,scoring='accuracy')
msg="%s %f (%f)" % ('Training Accuracy: ',cv_results.mean(),cv_results.std())
print(msg)

Training Accuracy:  0.946199 (0.007796)


In [13]:
# SVM hypertuning using GridSeachCV
model_SVC=svm.SVC()

kfold=KFold(n_splits=10)
param_grid = {'C': [1, 10, 100, 500, 1000], 
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
              'kernel': ['rbf']} 

grid=GridSearchCV(estimator=model_SVC,param_grid=param_grid,scoring='accuracy',cv=kfold,verbose=3)
grid_result=grid.fit(X_train,Y_train)

print("Best: %f using %s" % (grid_result.best_score_,grid_result.best_params_))

Fitting 10 folds for each of 25 candidates, totalling 250 fits
[CV 1/10] END .........C=1, gamma=1, kernel=rbf;, score=0.915 total time=  23.9s
[CV 2/10] END .........C=1, gamma=1, kernel=rbf;, score=0.900 total time=  23.6s
[CV 3/10] END .........C=1, gamma=1, kernel=rbf;, score=0.900 total time=  23.6s
[CV 4/10] END .........C=1, gamma=1, kernel=rbf;, score=0.909 total time=  23.8s
[CV 5/10] END .........C=1, gamma=1, kernel=rbf;, score=0.922 total time=  23.9s
[CV 6/10] END .........C=1, gamma=1, kernel=rbf;, score=0.897 total time=  23.7s
[CV 7/10] END .........C=1, gamma=1, kernel=rbf;, score=0.909 total time=  23.8s
[CV 8/10] END .........C=1, gamma=1, kernel=rbf;, score=0.909 total time=  25.4s
[CV 9/10] END .........C=1, gamma=1, kernel=rbf;, score=0.917 total time=  25.8s
[CV 10/10] END ........C=1, gamma=1, kernel=rbf;, score=0.914 total time=  24.6s
[CV 1/10] END .......C=1, gamma=0.1, kernel=rbf;, score=0.958 total time=  15.4s
[CV 2/10] END .......C=1, gamma=0.1, kernel=rb

[CV 2/10] END .......C=100, gamma=1, kernel=rbf;, score=0.905 total time=  17.4s
[CV 3/10] END .......C=100, gamma=1, kernel=rbf;, score=0.911 total time=  17.5s
[CV 4/10] END .......C=100, gamma=1, kernel=rbf;, score=0.920 total time=  17.4s
[CV 5/10] END .......C=100, gamma=1, kernel=rbf;, score=0.930 total time=  17.4s
[CV 6/10] END .......C=100, gamma=1, kernel=rbf;, score=0.901 total time=  17.2s
[CV 7/10] END .......C=100, gamma=1, kernel=rbf;, score=0.908 total time=  17.1s
[CV 8/10] END .......C=100, gamma=1, kernel=rbf;, score=0.906 total time=  17.5s
[CV 9/10] END .......C=100, gamma=1, kernel=rbf;, score=0.917 total time=  17.1s
[CV 10/10] END ......C=100, gamma=1, kernel=rbf;, score=0.910 total time=  17.4s
[CV 1/10] END .....C=100, gamma=0.1, kernel=rbf;, score=0.967 total time=   7.4s
[CV 2/10] END .....C=100, gamma=0.1, kernel=rbf;, score=0.959 total time=   7.8s
[CV 3/10] END .....C=100, gamma=0.1, kernel=rbf;, score=0.956 total time=   7.4s
[CV 4/10] END .....C=100, ga

[CV 4/10] END ......C=1000, gamma=1, kernel=rbf;, score=0.917 total time=  17.0s
[CV 5/10] END ......C=1000, gamma=1, kernel=rbf;, score=0.929 total time=  17.0s
[CV 6/10] END ......C=1000, gamma=1, kernel=rbf;, score=0.904 total time=  17.0s
[CV 7/10] END ......C=1000, gamma=1, kernel=rbf;, score=0.902 total time=  16.8s
[CV 8/10] END ......C=1000, gamma=1, kernel=rbf;, score=0.905 total time=  16.8s
[CV 9/10] END ......C=1000, gamma=1, kernel=rbf;, score=0.912 total time=  16.9s
[CV 10/10] END .....C=1000, gamma=1, kernel=rbf;, score=0.907 total time=  17.2s
[CV 1/10] END ....C=1000, gamma=0.1, kernel=rbf;, score=0.955 total time=   6.8s
[CV 2/10] END ....C=1000, gamma=0.1, kernel=rbf;, score=0.949 total time=   6.7s
[CV 3/10] END ....C=1000, gamma=0.1, kernel=rbf;, score=0.950 total time=   6.9s
[CV 4/10] END ....C=1000, gamma=0.1, kernel=rbf;, score=0.959 total time=   6.9s
[CV 5/10] END ....C=1000, gamma=0.1, kernel=rbf;, score=0.965 total time=   7.0s
[CV 6/10] END ....C=1000, ga

In [14]:
model_SVC = svm.SVC(C=500,gamma=0.01, kernel='rbf')
model_SVC.fit(X_train,Y_train) 

predictions=model_SVC.predict(X_test)

print(accuracy_score(Y_test,predictions))
print(confusion_matrix(Y_test,predictions))
print(classification_report(Y_test,predictions))

0.9703522846180677
[[1598   64]
 [  21 1184]]
              precision    recall  f1-score   support

           0       0.99      0.96      0.97      1662
           1       0.95      0.98      0.97      1205

    accuracy                           0.97      2867
   macro avg       0.97      0.97      0.97      2867
weighted avg       0.97      0.97      0.97      2867



In [15]:
newpicst = "/Users/andre/Desktop/conda/grad/TP2"
newpicsa = "/Users/andre/Desktop/conda/grad/AU1"

In [17]:
newimt = feature_extraction(newpicst,1)
newima = feature_extraction(newpicsa,0)

In [None]:
ndf=pd.read_csv('/Users/andre/Desktop/conda/grad/CASIA1_features.csv')

In [18]:
ndf = pd.DataFrame(newimt)
ndf = ndf.append(pd.DataFrame(newima), ignore_index=True)

In [19]:
ndf.rename(columns = {0: "image_names", 1: "label"}, inplace = True)
ndf

Unnamed: 0,image_names,label,2,3,4,5,6,7,8,9,...,748,749,750,751,752,753,754,755,756,757
0,IMG-20221227-WA0005.jpg,1,38.911957,16.765043,13.368419,11.749223,4.636841,3.150059,2.240882,603.136192,...,510.0,1582.0,6580.0,1518.0,528.0,1363.0,288.0,1532.0,550.0,1726.0
1,IMG-20221227-WA0006.jpg,1,97.736536,44.515916,29.83056,23.716125,15.108461,10.965616,8.42125,936.242326,...,1133.0,2147.0,6580.0,1948.0,1206.0,1793.0,747.0,1992.0,1189.0,2243.0
2,IMG-20221227-WA0007.jpg,1,96.319536,62.205731,46.072099,36.326072,21.511189,10.580042,5.773477,636.511367,...,1045.0,2215.0,6580.0,1899.0,1139.0,1672.0,688.0,2054.0,1096.0,2272.0
3,IMG-20221227-WA0008.jpg,1,54.224094,28.502323,22.263876,16.088467,9.036193,6.537947,4.322591,708.68512,...,767.0,2139.0,6580.0,1780.0,713.0,1720.0,466.0,1922.0,775.0,2217.0
4,IMG-20221227-WA0009.jpg,1,112.196142,64.840849,44.610874,30.77079,18.351991,9.99431,4.630016,606.053022,...,1746.0,3023.0,6580.0,2492.0,1755.0,2399.0,1126.0,2723.0,1857.0,3040.0
5,IMG-20221227-WA0010.jpg,1,94.222711,43.317604,22.843536,17.738706,10.957244,10.321636,9.141439,1266.505886,...,574.0,1742.0,6580.0,1376.0,606.0,1438.0,346.0,1527.0,636.0,1804.0
6,IMG-20221227-WA0011.jpg,1,81.67857,38.362623,21.829407,15.610875,10.192574,6.535511,5.421925,1023.192287,...,992.0,2273.0,6580.0,1885.0,947.0,1856.0,592.0,1981.0,964.0,2352.0
7,IMG-20221227-WA0012.jpg,1,60.577699,39.242977,29.50641,19.599184,12.39863,7.318635,4.685373,623.205664,...,866.0,2162.0,6580.0,1833.0,877.0,1740.0,550.0,2014.0,903.0,2270.0
8,IMG-20221227-WA0013.jpg,1,105.14613,58.874346,36.537279,23.891657,14.367548,7.384969,4.18914,752.000627,...,1461.0,2912.0,6580.0,2223.0,1488.0,2315.0,926.0,2448.0,1502.0,2904.0
9,IMG-20221227-WA0014.jpg,1,93.474098,37.30607,22.90793,14.620706,9.295993,7.180328,6.481602,1061.93974,...,1113.0,2236.0,6580.0,1901.0,1087.0,1791.0,675.0,1998.0,1130.0,2324.0


In [20]:
ndf.drop(columns = ndf.columns[0], axis = 1, inplace= True)
ndf

Unnamed: 0,label,2,3,4,5,6,7,8,9,10,...,748,749,750,751,752,753,754,755,756,757
0,1,38.911957,16.765043,13.368419,11.749223,4.636841,3.150059,2.240882,603.136192,39.515967,...,510.0,1582.0,6580.0,1518.0,528.0,1363.0,288.0,1532.0,550.0,1726.0
1,1,97.736536,44.515916,29.83056,23.716125,15.108461,10.965616,8.42125,936.242326,97.812302,...,1133.0,2147.0,6580.0,1948.0,1206.0,1793.0,747.0,1992.0,1189.0,2243.0
2,1,96.319536,62.205731,46.072099,36.326072,21.511189,10.580042,5.773477,636.511367,94.14657,...,1045.0,2215.0,6580.0,1899.0,1139.0,1672.0,688.0,2054.0,1096.0,2272.0
3,1,54.224094,28.502323,22.263876,16.088467,9.036193,6.537947,4.322591,708.68512,50.848855,...,767.0,2139.0,6580.0,1780.0,713.0,1720.0,466.0,1922.0,775.0,2217.0
4,1,112.196142,64.840849,44.610874,30.77079,18.351991,9.99431,4.630016,606.053022,111.732527,...,1746.0,3023.0,6580.0,2492.0,1755.0,2399.0,1126.0,2723.0,1857.0,3040.0
5,1,94.222711,43.317604,22.843536,17.738706,10.957244,10.321636,9.141439,1266.505886,95.532885,...,574.0,1742.0,6580.0,1376.0,606.0,1438.0,346.0,1527.0,636.0,1804.0
6,1,81.67857,38.362623,21.829407,15.610875,10.192574,6.535511,5.421925,1023.192287,80.272459,...,992.0,2273.0,6580.0,1885.0,947.0,1856.0,592.0,1981.0,964.0,2352.0
7,1,60.577699,39.242977,29.50641,19.599184,12.39863,7.318635,4.685373,623.205664,58.804506,...,866.0,2162.0,6580.0,1833.0,877.0,1740.0,550.0,2014.0,903.0,2270.0
8,1,105.14613,58.874346,36.537279,23.891657,14.367548,7.384969,4.18914,752.000627,104.421292,...,1461.0,2912.0,6580.0,2223.0,1488.0,2315.0,926.0,2448.0,1502.0,2904.0
9,1,93.474098,37.30607,22.90793,14.620706,9.295993,7.180328,6.481602,1061.93974,92.841984,...,1113.0,2236.0,6580.0,1901.0,1087.0,1791.0,675.0,1998.0,1130.0,2324.0


In [21]:
scaler_norm = MinMaxScaler(feature_range=(0, 1),copy=True, clip=True) 
ndf.iloc[:,1:] = scaler_norm.fit_transform(ndf.iloc[:,1:].to_numpy()) # Normalising the values in dataframe.
ndf

Unnamed: 0,label,2,3,4,5,6,7,8,9,10,...,748,749,750,751,752,753,754,755,756,757
0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.045213,0.249948,0.312862,0.154197,0.047932,0.139184,0.045685,0.15987,0.051898,0.272579
1,1,0.321377,0.197926,0.156438,0.125702,0.13378,0.137909,0.122255,0.502143,0.321838,...,0.145455,0.368025,0.312862,0.210245,0.159225,0.197066,0.201015,0.221982,0.15553,0.377746
2,1,0.313635,0.324095,0.31078,0.258158,0.215578,0.131106,0.069879,0.050312,0.301601,...,0.131295,0.382236,0.312862,0.203858,0.148227,0.180778,0.181049,0.230354,0.140448,0.383645
3,1,0.083655,0.083713,0.084533,0.04558,0.056204,0.059781,0.041179,0.15911,0.062566,...,0.086565,0.366353,0.312862,0.188347,0.078299,0.187239,0.105922,0.21253,0.088388,0.372457
4,1,0.400374,0.342889,0.296894,0.199805,0.175218,0.12077,0.04726,0.004397,0.398688,...,0.244087,0.551097,0.312862,0.281152,0.249343,0.278638,0.329272,0.320686,0.263866,0.53987
5,1,0.30218,0.18938,0.090041,0.062914,0.080746,0.126546,0.136501,1.0,0.309254,...,0.055511,0.283386,0.312862,0.135688,0.060735,0.14928,0.065313,0.159195,0.065845,0.288446
6,1,0.233647,0.15404,0.080404,0.040563,0.070977,0.059738,0.062925,0.633216,0.225006,...,0.122767,0.394357,0.312862,0.202033,0.11671,0.205546,0.148562,0.220497,0.11904,0.399919
7,1,0.118367,0.160318,0.153358,0.082457,0.099161,0.073556,0.048355,0.030254,0.106487,...,0.102494,0.37116,0.312862,0.195255,0.10522,0.189931,0.134349,0.224953,0.109147,0.383238
8,1,0.361858,0.300334,0.220171,0.127546,0.124315,0.074727,0.038539,0.224406,0.358325,...,0.19823,0.5279,0.312862,0.24609,0.205515,0.267331,0.261591,0.283554,0.206293,0.512205
9,1,0.29809,0.146504,0.090653,0.030162,0.059523,0.071116,0.083886,0.691626,0.294399,...,0.142237,0.386625,0.312862,0.204119,0.139691,0.196796,0.17665,0.222792,0.145962,0.394223


In [22]:
array=ndf.values
tx_feature=array[:,1:]
ty_label=array[:,0].astype('int')
print(tx_feature.shape)
print(ty_label.shape)

(29, 756)
(29,)


In [23]:
tx_feature

array([[0.        , 0.        , 0.        , ..., 0.15987038, 0.0518975 ,
        0.27257933],
       [0.32137696, 0.1979264 , 0.15643811, ..., 0.22198218, 0.15553033,
        0.37774614],
       [0.31363545, 0.32409474, 0.31077986, ..., 0.23035377, 0.14044762,
        0.38364524],
       ...,
       [0.52856409, 0.51693817, 0.46698368, ..., 0.00324062, 0.        ,
        0.01688365],
       [0.62834118, 0.42380497, 0.4023209 , ..., 0.0305158 , 0.01459617,
        0.06143206],
       [0.73658201, 0.77211436, 0.78319276, ..., 1.        , 1.        ,
        1.        ]])

In [24]:
ty_label

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0])

In [25]:
test_predictions=model_SVC.predict(tx_feature)

In [26]:
test_predictions

array([0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0,
       1, 1, 0, 1, 1, 1, 0])

In [27]:
print(accuracy_score(ty_label,test_predictions))
print(confusion_matrix(ty_label,test_predictions))
print(classification_report(ty_label,test_predictions))

0.3793103448275862
[[ 4 11]
 [ 7  7]]
              precision    recall  f1-score   support

           0       0.36      0.27      0.31        15
           1       0.39      0.50      0.44        14

    accuracy                           0.38        29
   macro avg       0.38      0.38      0.37        29
weighted avg       0.38      0.38      0.37        29

