In [63]:
# Soil Fertility
# Use elemetal soil analysis to determine the fertility of the soil.

# INPUT
# N - ratio of Nitrogen (NH4+) content in soil
# P - ratio of Phosphorous (P) content in soil
# K - ratio of Potassium (K) content in soil
# pH - soil acidity (pH)
# EC - electrical conductivity
# OC - organic carbon
# S - sulfur (S)
# Zn - Zinc (Zn)
# Fe - Iron (Fe)
# Cu - Copper (Cu)
# Mn - Manganese (Mn)
# B - Boron (B)

# OUTPUT
# Class fertility (0 "Less Fertile", 1 "Fertile", 2 "Highly Fertile")


In [65]:
# Exploratory data analysis
import pandas as pd
import numpy as np

# Scikit-Learn
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.model_selection import train_test_split


In [66]:
soil = pd.read_csv('soil-fertility.csv')
soil.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 880 entries, 0 to 879
Data columns (total 13 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   N       880 non-null    int64  
 1   P       880 non-null    float64
 2   K       880 non-null    int64  
 3   pH      880 non-null    float64
 4   EC      880 non-null    float64
 5   OC      880 non-null    float64
 6   S       880 non-null    float64
 7   Zn      880 non-null    float64
 8   Fe      880 non-null    float64
 9   Cu      880 non-null    float64
 10  Mn      880 non-null    float64
 11  B       880 non-null    float64
 12  Output  880 non-null    int64  
dtypes: float64(10), int64(3)
memory usage: 89.5 KB


In [67]:
soil.head()

Unnamed: 0,N,P,K,pH,EC,OC,S,Zn,Fe,Cu,Mn,B,Output
0,138,8.6,560,7.46,0.62,0.7,5.9,0.24,0.31,0.77,8.71,0.11,0
1,213,7.5,338,7.62,0.75,1.06,25.4,0.3,0.86,1.54,2.89,2.29,0
2,163,9.6,718,7.59,0.51,1.11,14.3,0.3,0.86,1.57,2.7,2.03,0
3,157,6.8,475,7.64,0.58,0.94,26.0,0.34,0.54,1.53,2.65,1.82,0
4,270,9.9,444,7.63,0.4,0.86,11.8,0.25,0.76,1.69,2.43,2.26,1


In [68]:
X_soil = soil.drop('Output', axis = 1)
y_soil = soil['Output']
y_soil

0      0
1      0
2      0
3      0
4      1
      ..
875    1
876    1
877    1
878    2
879    0
Name: Output, Length: 880, dtype: int64

In [69]:
# Splitting the dataset
# 80%: Training data
# 20%: Testing data
X_train, X_test, y_train, y_test = train_test_split(X_soil, y_soil, test_size=0.2, random_state=42)


In [70]:
#Support Vector Machine with 4 different kernel

# Define a list of kernel names
kernel_list = ['linear', 'poly', 'rbf', 'sigmoid']

# Iterate through different kernels using a loop
for kernel in kernel_list:
    # Create and train the SVM model
    svc_model = SVC(kernel=kernel)
    svc_model.fit(X_train, y_train)
    
    # Make predictions on the test set
    y_pred = svc_model.predict(X_test)
    
    # Calculate and print accuracy for each kernel
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy (Kernel={kernel}): {accuracy}")
    
    # Calculate and print confusion matrix for each kernel
    cm = confusion_matrix(y_test, y_pred)
    print(f'Confusion Matrix (Kernel = {kernel}):\n{cm}')
    
    # Calculate and print classification report for each kernel
    report = classification_report(y_test, y_pred)
    print(f'Classification Report (Kernel={kernel}):\n{report}\n')

Accuracy (Kernel=linear): 0.8352272727272727
Confusion Matrix (Kernel = linear):
[[71  7  0]
 [12 76  0]
 [ 0 10  0]]
Classification Report (Kernel=linear):
              precision    recall  f1-score   support

           0       0.86      0.91      0.88        78
           1       0.82      0.86      0.84        88
           2       0.00      0.00      0.00        10

    accuracy                           0.84       176
   macro avg       0.56      0.59      0.57       176
weighted avg       0.79      0.84      0.81       176


Accuracy (Kernel=poly): 0.8465909090909091
Confusion Matrix (Kernel = poly):
[[71  7  0]
 [10 78  0]
 [ 0 10  0]]
Classification Report (Kernel=poly):
              precision    recall  f1-score   support

           0       0.88      0.91      0.89        78
           1       0.82      0.89      0.85        88
           2       0.00      0.00      0.00        10

    accuracy                           0.85       176
   macro avg       0.57      0.60     

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [71]:
# K-Nearest Neighbors with 4 different n_neighbors

# Define a list of k values for neighbors
k_values = [20, 40, 60, 80]

# Iterate through different k values using a loop
for k in k_values:
    # Create and train the KNN model
    knn_model = KNeighborsClassifier(n_neighbors=k)
    knn_model.fit(X_train, y_train)
    
    # Make predictions on the test set
    y_pred = knn_model.predict(X_test)
    
    # Calculate and print accuracy for each k value
    accuracy = accuracy_score(y_test, y_pred)
    print(f'Accuracy (k={k}): {accuracy}')
    
    # Calculate and print confusion matrix for each k value
    cm = confusion_matrix(y_test, y_pred)
    print(f'Confusion Matrix (k={k}):\n{cm}')
    
    # Calculate and print classification report for each k value
    report = classification_report(y_test, y_pred)
    print(f'Classification Report (k={k}):\n{report}\n')

Accuracy (k=20): 0.8352272727272727
Confusion Matrix (k=20):
[[71  7  0]
 [12 76  0]
 [ 0 10  0]]
Classification Report (k=20):
              precision    recall  f1-score   support

           0       0.86      0.91      0.88        78
           1       0.82      0.86      0.84        88
           2       0.00      0.00      0.00        10

    accuracy                           0.84       176
   macro avg       0.56      0.59      0.57       176
weighted avg       0.79      0.84      0.81       176


Accuracy (k=40): 0.8465909090909091
Confusion Matrix (k=40):
[[73  5  0]
 [12 76  0]
 [ 1  9  0]]
Classification Report (k=40):
              precision    recall  f1-score   support

           0       0.85      0.94      0.89        78
           1       0.84      0.86      0.85        88
           2       0.00      0.00      0.00        10

    accuracy                           0.85       176
   macro avg       0.56      0.60      0.58       176
weighted avg       0.80      0.85   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [72]:
# Random Forest with 4 different n_estimators

# Define a list of different numbers of estimators
estimator_values = [50, 100, 150, 200]

# Iterate through different numbers of estimators using a loop
for n_estimators in estimator_values:
    # Create and train the Random Forest model
    rf_model = RandomForestClassifier(n_estimators=n_estimators, random_state=42)
    rf_model.fit(X_train, y_train)
    
    # Make predictions on the test set
    y_pred = rf_model.predict(X_test)
    
    # Calculate and print accuracy for each number of estimators
    accuracy = accuracy_score(y_test, y_pred)
    print(f'Accuracy (Estimators={n_estimators}): {accuracy}')
    
    # Calculate and print confusion matrix for each number of estimators
    cm = confusion_matrix(y_test, y_pred)
    print(f'Confusion Matrix (Estimators={n_estimators}):\n{cm}')
    
    # Calculate and print classification report for each number of estimators
    report = classification_report(y_test, y_pred)
    print(f'Classification Report (Estimators={n_estimators}):\n{report}\n')

Accuracy (Estimators=50): 0.8806818181818182
Confusion Matrix (Estimators=50):
[[75  3  0]
 [ 8 80  0]
 [ 0 10  0]]
Classification Report (Estimators=50):
              precision    recall  f1-score   support

           0       0.90      0.96      0.93        78
           1       0.86      0.91      0.88        88
           2       0.00      0.00      0.00        10

    accuracy                           0.88       176
   macro avg       0.59      0.62      0.61       176
weighted avg       0.83      0.88      0.85       176




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy (Estimators=100): 0.8863636363636364
Confusion Matrix (Estimators=100):
[[76  2  0]
 [ 8 80  0]
 [ 0 10  0]]
Classification Report (Estimators=100):
              precision    recall  f1-score   support

           0       0.90      0.97      0.94        78
           1       0.87      0.91      0.89        88
           2       0.00      0.00      0.00        10

    accuracy                           0.89       176
   macro avg       0.59      0.63      0.61       176
weighted avg       0.84      0.89      0.86       176




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy (Estimators=150): 0.875
Confusion Matrix (Estimators=150):
[[75  3  0]
 [ 9 79  0]
 [ 0 10  0]]
Classification Report (Estimators=150):
              precision    recall  f1-score   support

           0       0.89      0.96      0.93        78
           1       0.86      0.90      0.88        88
           2       0.00      0.00      0.00        10

    accuracy                           0.88       176
   macro avg       0.58      0.62      0.60       176
weighted avg       0.83      0.88      0.85       176




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy (Estimators=200): 0.875
Confusion Matrix (Estimators=200):
[[75  3  0]
 [ 9 79  0]
 [ 0 10  0]]
Classification Report (Estimators=200):
              precision    recall  f1-score   support

           0       0.89      0.96      0.93        78
           1       0.86      0.90      0.88        88
           2       0.00      0.00      0.00        10

    accuracy                           0.88       176
   macro avg       0.58      0.62      0.60       176
weighted avg       0.83      0.88      0.85       176




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
