In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report

In [2]:
# Load the dataset
data = pd.read_csv('ginger_fungal_susceptibility_dataset.csv')

# Display the first few rows of the dataset
print(data.head())

   temperature   humidity    rainfall  soil_moisture        ph crop_rotation  \
0    25.618102  83.100757  148.408880      41.715125  6.247282           yes   
1    34.260715  80.581112  168.358915      53.315744  6.165824            no   
2    30.979909  72.941158  263.636848      25.003556  5.852308           yes   
3    28.979877  88.241886  135.001097      25.191319  6.714533            no   
4    22.340280  58.018981  267.412421      42.272734  6.453248            no   

  irrigation previous_fungal_infection fungal_species fungal_risk  
0     medium                       yes       Fusarium      Medium  
1     medium                        no            NaN      Medium  
2     medium                       yes    Rhizoctonia        High  
3       high                       yes       Fusarium        High  
4     medium                        no       Fusarium         Low  


In [3]:
# Preprocessing
# Encoding categorical features
label_encoders = {}

# Encode 'crop_rotation', 'irrigation', 'previous_fungal_infection', and 'fungal_species'
for column in ['crop_rotation', 'irrigation', 'previous_fungal_infection', 'fungal_species']:
    le = LabelEncoder()
    data[column] = le.fit_transform(data[column])
    label_encoders[column] = le

In [4]:
# Encode the target variable 'fungal_risk'
le_fungal_risk = LabelEncoder()
data['fungal_risk_encoded'] = le_fungal_risk.fit_transform(data['fungal_risk'])

# Features and target
features = data[['temperature', 'humidity', 'rainfall', 'soil_moisture', 'ph', 
                 'crop_rotation', 'irrigation', 'previous_fungal_infection', 'fungal_species']]
target = data['fungal_risk_encoded']

In [5]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Model training
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Evaluation
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, target_names=le_fungal_risk.classes_)

print(f"Model Accuracy: {accuracy:.2f}")
print("\nClassification Report:\n", report)

Model Accuracy: 1.00

Classification Report:
               precision    recall  f1-score   support

        High       1.00      0.98      0.99        65
         Low       1.00      1.00      1.00       215
      Medium       1.00      1.00      1.00       220

    accuracy                           1.00       500
   macro avg       1.00      0.99      1.00       500
weighted avg       1.00      1.00      1.00       500



In [9]:
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import cross_val_score

# Perform 5-Fold Cross-Validation
cv_scores = cross_val_score(model, features, target, cv=5)

print(f"Cross-Validation Scores: {cv_scores}")
print(f"Mean Cross-Validation Accuracy: {cv_scores.mean():.2f}")
print(f"Standard Deviation of Cross-Validation Accuracy: {cv_scores.std():.2f}")

# Train the model on the entire dataset after cross-validation
model.fit(features, target)

Cross-Validation Scores: [1.    0.996 1.    0.998 0.998]
Mean Cross-Validation Accuracy: 1.00
Standard Deviation of Cross-Validation Accuracy: 0.00


In [10]:
# Save the trained model and encoders using pickle
import pickle

with open('fungal_risk_model.pkl', 'wb') as f:
    pickle.dump(model, f)

with open('label_encoders.pkl', 'wb') as f:
    pickle.dump(label_encoders, f)

with open('fungal_risk_encoder.pkl', 'wb') as f:
    pickle.dump(le_fungal_risk, f)

print("Model and encoders saved successfully!")

Model and encoders saved successfully!
