In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
import pickle

# Load the dataset
data = pd.read_csv('../data/soil_data.csv')

# Print the columns to check the features
print(data.columns)

# Define features and target variable
features = ['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']
X = data[features]
y = data['label']  # Soil type names should be in the 'label' column

# Handle missing values
imputer = SimpleImputer(strategy='mean')
X = imputer.fit_transform(X)

# Initialize and fit the StandardScaler
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Save the fitted scaler to a .pkl file
with open('scaler.pkl', 'wb') as file:
    pickle.dump(scaler, file)

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a RandomForestClassifier model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict using the model
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Display predicted soil types
print("Predicted Soil Types:")
print(pd.Series(y_pred).value_counts())

# Loading the scaler from the .pkl file (example usage)
with open('scaler.pkl', 'rb') as file:
    loaded_scaler = pickle.load(file)

# Use the loaded scaler to transform data (e.g., new incoming data or test set)
# Example: Transforming X_test again using the loaded scaler
X_test_transformed = loaded_scaler.transform(X_test)

Index(['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall', 'label'], dtype='object')
Accuracy: 0.9931818181818182
Classification Report:
              precision    recall  f1-score   support

       apple       1.00      1.00      1.00        23
      banana       1.00      1.00      1.00        21
   blackgram       1.00      1.00      1.00        20
    chickpea       1.00      1.00      1.00        26
     coconut       1.00      1.00      1.00        27
      coffee       1.00      1.00      1.00        17
      cotton       1.00      1.00      1.00        17
      grapes       1.00      1.00      1.00        14
        jute       0.92      1.00      0.96        23
 kidneybeans       1.00      1.00      1.00        20
      lentil       0.92      1.00      0.96        11
       maize       1.00      1.00      1.00        21
       mango       1.00      1.00      1.00        19
   mothbeans       1.00      0.96      0.98        24
    mungbean       1.00      1.00      1.00 