In [23]:
import os
import skimage
from skimage import io, color ,feature

import numpy as np
import cv2
import re
from skimage.feature import graycomatrix, graycoprops

import pandas as pd

In [24]:
from google.colab import drive
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [25]:
# -------------------- Utility function ------------------------
def normalize_label(str_):
    str_ = str_.replace(" ", "")
    str_ = str_.translate(str_.maketrans("","", "()"))
    str_ = str_.split("_")
    return ''.join(str_[:2])

def normalize_desc(folder, sub_folder):
    text = folder + " - " + sub_folder
    text = re.sub(r'\d+', '', text)
    text = text.replace(".", "")
    text = text.strip()
    return text

def print_progress(val, val_len, folder, sub_folder, filename, bar_size=10):
    progr = "#"*round((val)*bar_size/val_len) + " "*round((val_len - (val))*bar_size/val_len)
    if val == 0:
        print("", end = "\n")
    else:
        print("[%s] folder : %s/%s/ ----> file : %s" % (progr, folder, sub_folder, filename), end="\r")


In [26]:
dataset_dir = "/content/drive/My Drive/IIITNR_PROJECT/dataset"
os.listdir(dataset_dir)

['test', 'Train']

In [27]:
#initialization of list to store img ,label and desc

imgs = []  # List to store image matrices
labels = []  # List to store labels
descs = []  # List to store descriptions

#Iterate through the directory structure

for folder in os.listdir(dataset_dir):
    for sub_folder in os.listdir(os.path.join(dataset_dir, folder)):
        sub_folder_files = os.listdir(os.path.join(dataset_dir, folder, sub_folder))
        len_sub_folder = len(sub_folder_files) - 1

        #process each img file

        for i, filename in enumerate(sub_folder_files):
          img = cv2.imread(os.path.join(dataset_dir, folder, sub_folder, filename))
          #Convert the image to grayscale

          gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
          # Crop the central part of the image:
          h, w = gray.shape
          ymin, ymax, xmin, xmax = h//3, h*2//3, w//3, w*2//3
          crop = gray[ymin:ymax, xmin:xmax]

          #resize the croped image
          resize = cv2.resize(crop, (0,0), fx=0.5, fy=0.5)

          # applying threshold
          # Perform thresholding after resizing
          _, binary = cv2.threshold(resize, 127, 255, cv2.THRESH_BINARY_INV)

          imgs.append(binary)
          labels.append(sub_folder)
          descs.append(normalize_desc(folder, sub_folder))

          print_progress(i, len_sub_folder, folder, sub_folder, filename)



[##########] folder : test/Black Soil/ ----> file : images434.jpg
[##########] folder : test/Clay soil/ ----> file : images72.jpg
[##########] folder : test/Alluvial soil/ ----> file : image7 (2).jpeg
[##########] folder : test/Red soil/ ----> file : images202.jpg

[##########] folder : Train/Clay soil/ ----> file : images72.jpg
[##########] folder : Train/Red soil/ ----> file : red3.jpg


In [28]:
len(imgs)

1561

In [29]:
# ----------------- calculate greycomatrix() & greycoprops() for angle 0, 45, 90, 135 ----------------------------------
def calc_glcm_all_agls(img, label, props, dists=[5], agls=[0, np.pi/4, np.pi/2, 3*np.pi/4], lvl=256, sym=True, norm=True):

    glcm = graycomatrix(img,
                        distances=dists,
                        angles=agls,
                        levels=lvl,
                        symmetric=sym,
                        normed=norm)
    feature = []
    glcm_props = [propery for name in props for propery in graycoprops(glcm, name)[0]]
    for item in glcm_props:
            feature.append(item)
    feature.append(label)

    return feature



In [30]:
# ----------------- call calc_glcm_all_agls() for all properties ----------------------------------
properties = ['dissimilarity', 'correlation', 'homogeneity', 'contrast', 'ASM', 'energy']

glcm_all_agls = []
for img, label in zip(imgs, labels):
    glcm_all_agls.append(
            calc_glcm_all_agls(img,
                                label,
                                props=properties)
                            )

columns = []
angles = ['0', '45', '90','135']
for name in properties :
    for ang in angles:
        columns.append(name + "_" + ang)

columns.append("label")


In [31]:
# Create the pandas DataFrame for GLCM features data
glcm_df = pd.DataFrame(glcm_all_agls,
                      columns = columns)

In [32]:
#knowing dataset
type(glcm_df)
len(glcm_df)
glcm_df.shape

(1561, 25)

In [33]:
 glcm_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1561 entries, 0 to 1560
Data columns (total 25 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   dissimilarity_0    1561 non-null   float64
 1   dissimilarity_45   1561 non-null   float64
 2   dissimilarity_90   1561 non-null   float64
 3   dissimilarity_135  1561 non-null   float64
 4   correlation_0      1561 non-null   float64
 5   correlation_45     1561 non-null   float64
 6   correlation_90     1561 non-null   float64
 7   correlation_135    1561 non-null   float64
 8   homogeneity_0      1561 non-null   float64
 9   homogeneity_45     1561 non-null   float64
 10  homogeneity_90     1561 non-null   float64
 11  homogeneity_135    1561 non-null   float64
 12  contrast_0         1561 non-null   float64
 13  contrast_45        1561 non-null   float64
 14  contrast_90        1561 non-null   float64
 15  contrast_135       1561 non-null   float64
 16  ASM_0              1561 

In [34]:
glcm_df['label'].unique()

array(['Black Soil', 'Clay soil', 'Alluvial soil', 'Red soil'],
      dtype=object)

Building SVM on this dataset

In [35]:
glcm_df.head(20)

Unnamed: 0,dissimilarity_0,dissimilarity_45,dissimilarity_90,dissimilarity_135,correlation_0,correlation_45,correlation_90,correlation_135,homogeneity_0,homogeneity_45,...,contrast_135,ASM_0,ASM_45,ASM_90,ASM_135,energy_0,energy_45,energy_90,energy_135,label
0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,Black Soil
1,13.47561,11.909341,13.526087,12.376374,-0.02714,0.013803,-0.027244,-0.024871,0.947155,0.953297,...,3155.975275,0.898498,0.908121,0.898133,0.906464,0.947891,0.952954,0.947699,0.952084,Black Soil
2,1.287879,1.366071,1.225962,1.366071,-0.002532,-0.002686,-0.00241,-0.002686,0.99495,0.994643,...,348.348214,0.989937,0.989329,0.990419,0.989329,0.994956,0.99465,0.995198,0.99465,Black Soil
3,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,Black Soil
4,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,Black Soil
5,6.947115,7.285714,7.083333,7.058036,0.041734,-0.014493,-0.014085,0.04656,0.972757,0.971429,...,1799.799107,0.945069,0.944082,0.945602,0.944057,0.972146,0.971639,0.972421,0.971626,Black Soil
6,18.658537,19.848901,19.734783,19.848901,0.004514,-0.040495,-0.040253,-0.040495,0.92683,0.922162,...,5061.46978,0.858681,0.853411,0.854202,0.853411,0.92665,0.923802,0.92423,0.923802,Black Soil
7,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,Black Soil
8,1.635926,1.622727,1.549479,1.622727,-0.003218,-0.003192,-0.003047,-0.003192,0.993585,0.993636,...,413.795455,0.987231,0.987333,0.987903,0.987333,0.993595,0.993647,0.993933,0.993647,Black Soil
9,17.73913,14.478022,12.439024,13.31044,0.011518,0.031333,0.065846,0.096177,0.930436,0.943224,...,3394.162088,0.864898,0.887834,0.90138,0.892775,0.929999,0.942249,0.94941,0.944867,Black Soil


In [36]:
import matplotlib.pyplot as plt

In [37]:
#important libraries and import
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.preprocessing import LabelEncoder, StandardScaler

In [38]:
# Extract features (X) and labels (y) from the DataFrame
X = glcm_df.drop(columns=['label'])  # Features
y = glcm_df['label']  # Labels

In [39]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [40]:
sc = StandardScaler()
x_train = sc.fit_transform(X_train)
x_test = sc.fit_transform(X_test)

In [41]:
svc = SVC()
svc.fit(x_train ,y_train)
pred_svc = svc.predict(x_test)
pred_svc

array(['Alluvial soil', 'Black Soil', 'Black Soil', 'Clay soil',
       'Clay soil', 'Alluvial soil', 'Black Soil', 'Alluvial soil',
       'Alluvial soil', 'Black Soil', 'Red soil', 'Alluvial soil',
       'Alluvial soil', 'Alluvial soil', 'Clay soil', 'Black Soil',
       'Black Soil', 'Clay soil', 'Alluvial soil', 'Black Soil',
       'Alluvial soil', 'Clay soil', 'Alluvial soil', 'Clay soil',
       'Alluvial soil', 'Black Soil', 'Alluvial soil', 'Clay soil',
       'Alluvial soil', 'Alluvial soil', 'Clay soil', 'Alluvial soil',
       'Alluvial soil', 'Clay soil', 'Alluvial soil', 'Alluvial soil',
       'Alluvial soil', 'Alluvial soil', 'Alluvial soil', 'Black Soil',
       'Black Soil', 'Clay soil', 'Alluvial soil', 'Black Soil',
       'Alluvial soil', 'Alluvial soil', 'Clay soil', 'Alluvial soil',
       'Red soil', 'Clay soil', 'Alluvial soil', 'Alluvial soil',
       'Alluvial soil', 'Alluvial soil', 'Alluvial soil', 'Black Soil',
       'Alluvial soil', 'Clay soil', 'Alluvi

In [42]:
print(classification_report(y_test ,pred_svc))

               precision    recall  f1-score   support

Alluvial soil       0.63      0.86      0.73       184
   Black Soil       0.65      0.78      0.71       111
    Clay soil       0.51      0.40      0.45        72
     Red soil       0.48      0.12      0.19       102

     accuracy                           0.61       469
    macro avg       0.57      0.54      0.52       469
 weighted avg       0.58      0.61      0.56       469



In [54]:
##finding best parameter for our SVC model
from sklearn.model_selection import GridSearchCV
params = {
    'C' : [0.1 ,0.8,0.9,1,1.1,1.2,1.3,1.4,3,5,7,8,9,10,12],
    'kernel' : ['linear' , 'rbf'],
    'gamma' : [0.1 ,0.8,0.9,1,1.1,1.2,1.3,1.4,2,4,6,8,10]

}

grid_svc= GridSearchCV(svc , param_grid = params , scoring = 'accuracy' , cv = 10)

In [55]:
grid_svc.fit(x_train , y_train)

In [56]:
grid_svc.best_params_

{'C': 10, 'gamma': 8, 'kernel': 'rbf'}

In [58]:
svc2 = SVC( C = 1.3 , gamma = 1.4 , kernel = 'rbf')
svc2.fit(x_train , y_train)
pred_svc2 = svc2.predict(x_test)
print(classification_report(y_test , pred_svc2))

               precision    recall  f1-score   support

Alluvial soil       0.88      0.80      0.84       184
   Black Soil       0.65      0.88      0.75       111
    Clay soil       0.72      0.64      0.68        72
     Red soil       0.57      0.49      0.53       102

     accuracy                           0.73       469
    macro avg       0.71      0.70      0.70       469
 weighted avg       0.73      0.73      0.72       469

