In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler

In [3]:
dataset = pd.read_csv('plant_health_data.csv')
dataset.head()

Unnamed: 0,Timestamp,Plant_ID,Soil_Moisture,Ambient_Temperature,Soil_Temperature,Humidity,Light_Intensity,Soil_pH,Nitrogen_Level,Phosphorus_Level,Potassium_Level,Chlorophyll_Content,Electrochemical_Signal,Plant_Health_Status
0,2024-10-03 10:54:53.407995,1,27.521109,22.240245,21.900435,55.291904,556.172805,5.581955,10.00365,45.806852,39.076199,35.703006,0.941402,High Stress
1,2024-10-03 16:54:53.407995,1,14.835566,21.706763,18.680892,63.949181,596.136721,7.135705,30.712562,25.394393,17.944826,27.993296,0.164899,High Stress
2,2024-10-03 22:54:53.407995,1,17.086362,21.180946,15.392939,67.837956,591.124627,5.656852,29.337002,27.573892,35.70653,43.646308,1.081728,High Stress
3,2024-10-04 04:54:53.407995,1,15.336156,22.593302,22.778394,58.190811,241.412476,5.584523,16.966621,26.180705,26.257746,37.838095,1.186088,High Stress
4,2024-10-04 10:54:53.407995,1,39.822216,28.929001,18.100937,63.772036,444.49383,5.919707,10.944961,37.898907,37.654483,48.265812,1.609805,High Stress


In [4]:
dataset.describe()

Unnamed: 0,Plant_ID,Soil_Moisture,Ambient_Temperature,Soil_Temperature,Humidity,Light_Intensity,Soil_pH,Nitrogen_Level,Phosphorus_Level,Potassium_Level,Chlorophyll_Content,Electrochemical_Signal
count,1200.0,1200.0,1200.0,1200.0,1200.0,1200.0,1200.0,1200.0,1200.0,1200.0,1200.0,1200.0
mean,5.5,25.106918,23.99913,19.957794,54.853165,612.637265,6.524102,30.106751,30.264484,30.112088,34.749591,0.987764
std,2.873479,8.677725,3.441561,2.932073,8.784916,228.318853,0.581755,11.514396,11.466846,11.668085,8.766995,0.575116
min,1.0,10.000724,18.001993,15.00371,40.028758,200.615482,5.507392,10.00365,10.01769,10.000606,20.025511,0.002376
25%,3.0,17.131893,21.101766,17.353027,47.019694,416.878983,6.026042,20.249774,20.894445,19.585561,27.46335,0.487982
50%,5.5,25.168333,23.889044,19.911473,54.692069,617.240221,6.540524,30.13859,30.019385,30.495054,34.433427,0.981647
75%,8.0,32.370231,27.042634,22.596851,62.451053,811.47469,7.030039,40.184737,40.131459,40.108296,42.232637,1.473142
max,10.0,39.993164,29.990886,24.995929,69.968871,999.856262,7.497823,49.951136,49.9807,49.981945,49.990811,1.996116


In [213]:
features = dataset.drop(columns= ['Timestamp', 'Plant_ID', 'Plant_Health_Status'], axis= 1)
target = dataset['Plant_Health_Status']

In [214]:
dataset['Plant_Health_Status'].value_counts()

Plant_Health_Status
High Stress        500
Moderate Stress    401
Healthy            299
Name: count, dtype: int64

In [215]:
dataset.isnull().sum()

Timestamp                 0
Plant_ID                  0
Soil_Moisture             0
Ambient_Temperature       0
Soil_Temperature          0
Humidity                  0
Light_Intensity           0
Soil_pH                   0
Nitrogen_Level            0
Phosphorus_Level          0
Potassium_Level           0
Chlorophyll_Content       0
Electrochemical_Signal    0
Plant_Health_Status       0
dtype: int64

In [216]:
# from sklearn.preprocessing import StandardScaler
# scaling = StandardScaler()
# features = scaling.fit_transform(features)

In [217]:
target

0           High Stress
1           High Stress
2           High Stress
3           High Stress
4           High Stress
             ...       
1195    Moderate Stress
1196        High Stress
1197    Moderate Stress
1198            Healthy
1199    Moderate Stress
Name: Plant_Health_Status, Length: 1200, dtype: object

In [218]:
from sklearn.model_selection import train_test_split

Xtrain, Xtest, ytrain, ytest = train_test_split(features, target, test_size= 0.4, random_state= 42)

In [219]:
from imblearn.over_sampling import SMOTE
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(Xtrain, ytrain)

In [220]:
y_train_resampled.value_counts()

Plant_Health_Status
Moderate Stress    297
Healthy            297
High Stress        297
Name: count, dtype: int64

In [221]:
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(n_estimators= 100, criterion= 'entropy', max_depth= 3, random_state=42)
model.fit(X_train_resampled, y_train_resampled)

In [222]:
train_predict = model.predict(X_train_resampled)
test_predict = model.predict(Xtest)


In [223]:
from sklearn.metrics import accuracy_score
print(f'Training accuracu : {accuracy_score(train_predict, y_train_resampled)}')
print(f'Testing accuracy : {accuracy_score(ytest, test_predict)}')

Training accuracu : 0.9977553310886644
Testing accuracy : 0.9979166666666667


In [224]:
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_train_resampled, train_predict))

[[297   0   0]
 [  0 297   0]
 [  2   0 295]]


In [225]:
from sklearn.metrics import confusion_matrix
print(confusion_matrix(ytest, test_predict))

[[112   0   0]
 [  0 203   0]
 [  0   1 164]]


In [226]:
from sklearn.metrics import accuracy_score, precision_score, f1_score, recall_score
print('Accuracy Score',accuracy_score(ytest, test_predict)*100,'%')
print('Precision Macro Score',precision_score(ytest, test_predict,average = 'macro')*100,'%')
print('Recall_Score',recall_score(ytest, test_predict, average = 'macro')*100,'%')
print('F_Score',f1_score(ytest, test_predict, average = 'macro')*100,'%')

Accuracy Score 99.79166666666667 %
Precision Macro Score 99.83660130718954 %
Recall_Score 99.7979797979798 %
F_Score 99.8167827955062 %


In [228]:
from joblib import dump
dump(model, 'plant_health_model.joblib')

['plant_health_model.joblib']