In [1]:
from PIL import Image
import os
import numpy as np
import pandas as pd
import csv
from IPython.display import clear_output

In [11]:
from sklearn.model_selection import train_test_split

In [4]:
def generated_images(image_folder,color):
  images,images_name = [],[]
  i = 0
  for filename in os.listdir(image_folder):
    if filename.endswith(('.png', '.jpg', '.jpeg', '.bmp')):
      image_path = os.path.join(image_folder, filename)
      image = Image.open(image_path).convert(color).resize((256,256))  # Convert to RGB
      image_array = np.array(image).flatten()
      images.append(image_array)
      images_name.append(filename)
      print(f"Image no. {i}")
      clear_output(wait=True)
      i += 1
  return images,images_name

In [None]:
image_folder = r'soil_classification-2025/train'
images,images_name = generated_images(image_folder,'L')
with open('images.csv', 'w', newline='') as csvfile:
	writer = csv.writer(csvfile)
	writer.writerow(['Image Name'] + [f'Pixel {i}' for i in range(256*256)])  # Header
	for name, image in zip(images_name, images):
		writer.writerow([name] + list(image))

Image no. 1213


In [3]:
images_df = pd.read_csv('images_grey.csv')

In [4]:
labels = pd.read_csv(r'soil_classification-2025\train_labels.csv')
labels.rename(columns={'image_id': 'Image Name'}, inplace=True)

In [5]:
images_df = images_df.merge(labels, on='Image Name', how='left')


In [15]:
images_df.drop(columns=['Image Name'],inplace = True)
# images_df.rename(columns={'soil_type_x': 'soil_type'}, inplace=True)
images_df

Unnamed: 0,Pixel 0,Pixel 1,Pixel 2,Pixel 3,Pixel 4,Pixel 5,Pixel 6,Pixel 7,Pixel 8,Pixel 9,...,Pixel 65527,Pixel 65528,Pixel 65529,Pixel 65530,Pixel 65531,Pixel 65532,Pixel 65533,Pixel 65534,Pixel 65535,soil_type
0,0,2,2,1,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Clay soil
1,230,230,230,229,229,230,229,229,201,198,...,209,202,221,219,198,204,202,196,192,Alluvial soil
2,141,140,160,175,144,156,142,126,133,156,...,142,149,144,126,98,110,108,112,127,Red soil
3,207,198,182,226,230,172,166,189,160,178,...,158,141,167,109,106,138,98,139,192,Clay soil
4,69,102,27,36,69,62,56,68,36,50,...,28,23,8,16,31,28,15,18,23,Black Soil
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1209,226,223,227,227,227,227,226,218,214,223,...,147,147,176,145,163,192,192,183,187,Alluvial soil
1210,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Clay soil
1211,117,142,117,71,79,40,49,134,105,47,...,103,67,40,20,37,41,16,12,13,Black Soil
1212,161,163,165,168,169,164,164,164,166,164,...,128,81,105,135,140,161,167,167,142,Alluvial soil


In [16]:
x,y = images_df.drop(columns=['soil_type']),images_df['soil_type']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [9]:
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix

In [17]:
model = SVC(kernel='linear', C=1.0, random_state=42)
model.fit(x_train, y_train)
y_pred = model.predict(x_test)
print(classification_report(y_test, y_pred))

               precision    recall  f1-score   support

Alluvial soil       0.92      0.95      0.93        96
   Black Soil       0.93      0.93      0.93        46
    Clay soil       0.94      0.71      0.81        41
     Red soil       0.85      0.95      0.90        60

     accuracy                           0.91       243
    macro avg       0.91      0.89      0.89       243
 weighted avg       0.91      0.91      0.90       243



In [18]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

In [19]:
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)
model_scaled = SVC(kernel='linear', C=1.0, random_state=42)
model_scaled.fit(x_train_scaled, y_train)
y_pred_scaled = model_scaled.predict(x_test_scaled)
print(classification_report(y_test, y_pred_scaled))

               precision    recall  f1-score   support

Alluvial soil       0.91      0.96      0.93        96
   Black Soil       0.93      0.93      0.93        46
    Clay soil       1.00      0.71      0.83        41
     Red soil       0.85      0.95      0.90        60

     accuracy                           0.91       243
    macro avg       0.92      0.89      0.90       243
 weighted avg       0.92      0.91      0.91       243



In [None]:
from sklearn.ensemble import RandomForestClassifier

In [20]:
forest_model = RandomForestClassifier(n_estimators=100, random_state=42)
forest_model.fit(x_train, y_train)
y_pred_forest = forest_model.predict(x_test)
print(classification_report(y_test, y_pred_forest))

               precision    recall  f1-score   support

Alluvial soil       0.93      0.95      0.94        96
   Black Soil       0.88      0.93      0.91        46
    Clay soil       1.00      0.80      0.89        41
     Red soil       0.86      0.90      0.88        60

     accuracy                           0.91       243
    macro avg       0.92      0.90      0.90       243
 weighted avg       0.91      0.91      0.91       243



In [29]:
y_pred_bagged = np.array([sorted(i,key=i.count, reverse=True)[0] for i in zip(y_pred, y_pred_scaled, y_pred_forest)])
print(classification_report(y_test, y_pred_bagged))

               precision    recall  f1-score   support

Alluvial soil       0.92      0.95      0.93        96
   Black Soil       0.93      0.93      0.93        46
    Clay soil       0.94      0.71      0.81        41
     Red soil       0.85      0.95      0.90        60

     accuracy                           0.91       243
    macro avg       0.91      0.89      0.89       243
 weighted avg       0.91      0.91      0.90       243



In [23]:
a = list()
a = [1,2,3,4,5,6,6,1,2,2]
sorted(a,key=a.count, reverse=True)[1]

2