In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score
import pickle  # Import pickle module

In [2]:
soil = pd.read_csv("dataset1.csv")
soil

Unnamed: 0,N,P,K,pH,EC,OC,S,Zn,Fe,Cu,Mn,B,Output
0,138,8.6,560,7.46,0.62,0.70,5.90,0.24,0.31,0.77,8.71,0.11,0
1,213,7.5,338,7.62,0.75,1.06,25.40,0.30,0.86,1.54,2.89,2.29,0
2,163,9.6,718,7.59,0.51,1.11,14.30,0.30,0.86,1.57,2.70,2.03,0
3,157,6.8,475,7.64,0.58,0.94,26.00,0.34,0.54,1.53,2.65,1.82,0
4,270,9.9,444,7.63,0.40,0.86,11.80,0.25,0.76,1.69,2.43,2.26,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
875,351,10.7,623,7.96,0.51,0.29,7.24,0.36,4.69,0.69,11.03,0.69,1
876,264,9.0,486,7.24,0.47,0.10,3.92,0.35,8.26,0.45,7.98,0.40,1
877,276,9.2,370,7.62,0.62,0.49,6.64,0.42,3.57,0.63,6.48,0.32,1
878,320,13.8,391,7.38,0.65,1.07,5.43,0.58,4.58,1.02,13.25,0.53,2


In [3]:
x = soil.drop('Output', axis=1)
y = soil['Output']


In [4]:
soil.isnull().sum()

N         0
P         0
K         0
pH        0
EC        0
OC        0
S         0
Zn        0
Fe        0
Cu        0
Mn        0
B         0
Output    0
dtype: int64

In [5]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=0)

In [6]:
classifier = RandomForestClassifier(n_estimators=10, criterion="entropy")
classifier.fit(x_train, y_train)

In [7]:
x_pred = classifier.predict(x_train)
y_pred = classifier.predict(x_test)

In [8]:
r2 = r2_score(y_train, x_pred) * 100
print("Train R^2 Score:", r2)

Train R^2 Score: 96.88676622949991


In [9]:
mean = mean_squared_error(y_test, y_pred) * 100
print("Test Mean Squared Error:", mean)

Test Mean Squared Error: 16.666666666666664


In [10]:
r2 = r2_score(y_test, y_pred) * 100
print("Test R^2 Score:", r2)

Test R^2 Score: 55.252513579105496


In [11]:
pred = classifier.predict(x_test)
print("Test Predictions:", pred)


Test Predictions: [0 0 1 1 1 1 0 1 0 0 1 0 0 1 1 1 1 0 0 1 1 0 1 0 0 0 1 1 1 0 1 1 1 0 0 0 1
 1 1 1 1 1 0 0 0 1 1 1 0 1 1 0 1 1 1 1 1 0 0 1 0 1 1 0 0 1 1 1 0 1 1 1 1 0
 0 1 0 1 0 0 1 1 0 1 1 1 1 1 0 0 1 1 0 0 1 1 0 1 0 1 0 1 1 0 1 0 0 0 0 0 0
 1 0 0 0 0 0 0 1 0 0 0 1 1 1 1 0 1 0 0 1 0 0 0 1 1 0 1 1 0 1 1 1 0 1 0 1 1
 1 1 1 1 1 0 1 0 0 0 1 1 1 0 0 0 1 1 1 1 1 1 0 1 0 1 0 1 0 1 1 0 0 1 1 1 1
 0 1 0 0 0 1 0 0 1 1 0 1 0 1 0 0 1 1 0 0 0 0 1 0 1 0 0 0 0 0 1 1 0 1 0 0 1
 0 0 0 1 0 0 0 1 1 1 0 1 1 1 1 1 0 0 0 0 0 1 0 1 0 1 0 0 0 0 1 1 0 0 1 1 1
 0 1 0 1 1]


In [12]:
test_input = [[138, 8.6, 560, 7.46, 0.62, 0.70, 5.90, 0.24, 0.31, 0.77, 8.71, 0.11]]
test_pred = classifier.predict(test_input)
print("Prediction for Test Input:", test_pred)

Prediction for Test Input: [0]




In [13]:
ac = accuracy_score(y_test, y_pred)
print("Test Accuracy:", ac * 100)

Test Accuracy: 84.46969696969697


In [14]:
with open("classifi.pkl", "wb") as model_file:
    pickle.dump(classifier, model_file)

In [15]:
with open("classifi.pkl", "rb") as model_file:
    loaded_classifier = pickle.load(model_file)

In [16]:
test_pred_loaded = loaded_classifier.predict(test_input)
print("Prediction for Test Input using Loaded Model:", test_pred_loaded)

Prediction for Test Input using Loaded Model: [0]


