## Only one dataset Random Forest

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import json
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import (BaggingClassifier, 
                              GradientBoostingClassifier, AdaBoostClassifier, 
                              StackingClassifier)

# Load dataset
crop = pd.read_csv("Crop_recommendation.csv.csv")
crop_dict = {
    'rice': 1, 'maize': 2, 'jute': 3, 'cotton': 4, 'coconut': 5,
    'papaya': 6, 'orange': 7, 'apple': 8, 'muskmelon': 9, 'watermelon': 10,
    'grapes': 11, 'mango': 12, 'banana': 13, 'pomegranate': 14,
    'lentil': 15, 'blackgram': 16, 'mungbean': 17, 'mothbeans': 18,
    'pigeonpeas': 19, 'kidneybeans': 20, 'chickpea': 21, 'coffee': 22
}

# Data preprocessing
crop.drop(['rainfall'], axis=1, inplace=True)
crop['name'] = crop['label'].map(crop_dict)
crop.drop(['label'], axis=1, inplace=True)
x = crop.drop('name', axis=1)
y = crop['name']

# Train-test split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=10)

# Scaling
ms = MinMaxScaler()
ms.fit(x_train)
x_train = ms.transform(x_train)
x_test = ms.transform(x_test)

# Initialize models including stacking
models = {
        'Logistic Regression': LogisticRegression(),
        'Naive Bayes': GaussianNB(),
        'Support Vector Machine': SVC(),
        'K-Neighbors Classifier': KNeighborsClassifier(),
        'Decision Tree': DecisionTreeClassifier(),
        'Random Forest': RandomForestClassifier(),
        'Bagging': BaggingClassifier(),
        'Gradient Boosting': GradientBoostingClassifier(),
        'AdaBoost': AdaBoostClassifier(),
        
        # Stacking Model
        'Stacking': StackingClassifier(
            estimators=[
                ('rf', RandomForestClassifier(n_estimators=200)),
                ('svc', SVC(kernel='linear', probability=True))
            ], 
            final_estimator=LogisticRegression()
        )
}

# Store the accuracy of each model in a dictionary
model_scores = {}

# Train and evaluate each model
for name, model in models.items():
    model.fit(x_train, y_train)
    y_pred1 = model.predict(x_test)
    score = accuracy_score(y_test, y_pred1)
    model_scores[name] = score

    # Cross-Validation to evaluate the model
    cross_val_scores = cross_val_score(model, x_train, y_train, cv=5)
       
# Sort the models by accuracy score in descending order
sorted_model_scores = dict(sorted(model_scores.items(), key=lambda val: val[1], reverse=True))

# Print the sorted models with their accuracy scores
print("\nSorted Model Accuracy Scores:")
for model, score in sorted_model_scores.items():
    print(f"{model} with accuracy: {score:.4f}")


# Final model
rd = RandomForestClassifier()
rd.fit(x_train, y_train)
# Calculate predictions on the test set
y_pred = rd.predict(x_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)

# Print the accuracy
print(f"Accuracy of the Random Forest model on the test set: {accuracy*100:.2f}%")


# Function to load sensor data from a JSON file and calculate the average
def get_sensor_data_from_json(json_file):
    with open(json_file, 'r') as file:
        sensor_data = json.load(file)

    # Extract values from the JSON file and group them based on the sequence provided
    nitrogen = []
    phosphorus = []
    potassium = []
    temperature = []
    humidity = []
    ph = []
    # Ignore the soil moisture column (5th column)
    
    for i in range(0, len(sensor_data), 7):  # Assuming each entry has 7 values
        nitrogen.append(sensor_data[i])
        phosphorus.append(sensor_data[i+1])
        potassium.append(sensor_data[i+2])
        ph.append(sensor_data[i+3])
        temperature.append(sensor_data[i+5])
        humidity.append(sensor_data[i+6])

    # Calculate the average for each attribute
    avg_nitrogen = np.mean(nitrogen)
    avg_phosphorus = np.mean(phosphorus)
    avg_potassium = np.mean(potassium)
    avg_temperature = np.mean(temperature)
    avg_humidity = np.mean(humidity)
    avg_ph = np.mean(ph)
    
    return avg_nitrogen, avg_phosphorus, avg_potassium, avg_temperature, avg_humidity, avg_ph


# Recommendation function that returns probabilities for multiple crops
def recommendation(N, P, K, temperature, humidity, ph):
    features = np.array([[N, P, K, temperature, humidity, ph]])
    transformed_features = ms.transform(features)
    probabilities = rd.predict_proba(transformed_features)  # Get probability for each class
    
    return probabilities[0]  # Return probabilities for the first (and only) sample

# Get sensor input from JSON file and average the data
json_file = 'fake.json'  # Replace with your actual JSON file path
sensor_data = get_sensor_data_from_json(json_file)

# Use the averaged sensor data to get probabilities
N, P, K, temperature, humidity, ph = sensor_data  # Unpacking averaged sensor data
probabilities = recommendation(N, P, K, temperature, humidity, ph)

# Find the index of the crop with the highest probability
max_index = np.argmax(probabilities)  # Index of the highest probability
highest_probability = probabilities[max_index]

# Get the corresponding crop name
best_crop = [name for name, val in crop_dict.items() if val == max_index+1][0]

# Output the crop with the highest probability
print(f"The most suitable crop is '{best_crop}' with a probability of {highest_probability*100:.2f}%")





Sorted Model Accuracy Scores:
Random Forest with accuracy: 0.9705
Stacking with accuracy: 0.9659
Naive Bayes with accuracy: 0.9636
Bagging with accuracy: 0.9591
Gradient Boosting with accuracy: 0.9591
Decision Tree with accuracy: 0.9523
K-Neighbors Classifier with accuracy: 0.9455
Support Vector Machine with accuracy: 0.9409
Logistic Regression with accuracy: 0.8659
AdaBoost with accuracy: 0.1523
Accuracy of the Random Forest model on the test set: 96.59%
The most suitable crop is 'rice' with a probability of 89.00%




In [3]:
import joblib

# Save the trained Random Forest model
joblib.dump(rd, 'rec_crop.pkl')

print("Model saved successfully as rec_crop.pkl")


Model saved successfully as rec_crop.pkl


In [5]:
import json
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Function to read sensor data from JSON
def get_sensor_data_from_json(json_file):
    try:
        with open(json_file, 'r') as file:
            sensor_data = json.load(file)  # Load JSON data

        # Ensure JSON structure is valid
        if "data" not in sensor_data or not isinstance(sensor_data["data"], list):
            raise ValueError("Invalid JSON structure: Expected 'data' key with a list of readings.")

        # Extract feature values (ignoring soil moisture)
        nitrogen, phosphorus, potassium, temperature, humidity, ph = [], [], [], [], [], []

        for entry in sensor_data["data"]:
            nitrogen.append(entry["N"])
            phosphorus.append(entry["P"])
            potassium.append(entry["K"])
            temperature.append(entry["temperature"])
            humidity.append(entry["humidity"])
            ph.append(entry["ph"])

        # Compute average values
        avg_n = np.mean(nitrogen)
        avg_p = np.mean(phosphorus)
        avg_k = np.mean(potassium)
        avg_temp = np.mean(temperature)
        avg_humid = np.mean(humidity)
        avg_ph = np.mean(ph)

        return avg_n, avg_p, avg_k, avg_temp, avg_humid, avg_ph

    except (FileNotFoundError, json.JSONDecodeError, KeyError, ValueError) as e:
        print(f"Error reading JSON file: {e}")
        return None

# Load dataset
df = pd.read_csv("Crop_recommendation.csv")

# Features and labels
X = df[['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']]
y = df['label']

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train Random Forest Model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Get sensor data from JSON
json_file = 'final.json'
sensor_data = get_sensor_data_from_json(json_file)

if sensor_data:
    N, P, K, temperature, humidity, ph = sensor_data

    # Default rainfall value (as it's missing in sensor data)
    avg_rainfall = df['rainfall'].mean()

    # Make prediction
    input_features = [[N, P, K, temperature, humidity, ph, avg_rainfall]]
    prediction_probs = rf_model.predict_proba(input_features)
    predicted_label_index = np.argmax(prediction_probs)
    most_probable_crop = label_encoder.inverse_transform([predicted_label_index])[0]
    probability = prediction_probs[0][predicted_label_index]

    # Output result
    print(f"The most suitable crop is '{most_probable_crop}' with {probability*100:.2f}% probability.")


The most suitable crop is 'rice' with 72.00% probability.




In [5]:
import json
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Function to read sensor data from JSON
def get_sensor_data_from_json(json_file):
    try:
        with open(json_file, 'r') as file:
            sensor_data = json.load(file)  # Load JSON data

        # Ensure JSON structure is valid
        if "data" not in sensor_data or not isinstance(sensor_data["data"], list):
            raise ValueError("Invalid JSON structure: Expected 'data' key with a list of readings.")

        # Extract feature values (ignoring soil moisture)
        nitrogen, phosphorus, potassium, temperature, humidity, ph = [], [], [], [], [], []

        for entry in sensor_data["data"]:
            nitrogen.append(entry["N"])
            phosphorus.append(entry["P"])
            potassium.append(entry["K"])
            temperature.append(entry["temperature"])
            humidity.append(entry["humidity"])
            ph.append(entry["ph"])

        # Compute average values
        avg_n = np.mean(nitrogen)
        avg_p = np.mean(phosphorus)
        avg_k = np.mean(potassium)
        avg_temp = np.mean(temperature)
        avg_humid = np.mean(humidity)
        avg_ph = np.mean(ph)

        return avg_n, avg_p, avg_k, avg_temp, avg_humid, avg_ph

    except (FileNotFoundError, json.JSONDecodeError, KeyError, ValueError) as e:
        print(f"Error reading JSON file: {e}")
        return None

# Load dataset
df = pd.read_csv("Crop_recommendation.csv")

# Features and labels
X = df[['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']]
y = df['label']

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train Random Forest Model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Get sensor data from JSON
json_file = 'final.json'
sensor_data = get_sensor_data_from_json(json_file)

if sensor_data:
    N, P, K, temperature, humidity, ph = sensor_data

    # Default rainfall value (as it's missing in sensor data)
    avg_rainfall = df['rainfall'].mean()

    # Make prediction
    input_features = [[N, P, K, temperature, humidity, ph, avg_rainfall]]
    prediction_probs = rf_model.predict_proba(input_features)
    predicted_label_index = np.argmax(prediction_probs)
    most_probable_crop = label_encoder.inverse_transform([predicted_label_index])[0]
    probability = prediction_probs[0][predicted_label_index]

    # Output result
    print(f"The most suitable crop is '{most_probable_crop}' with {probability*100:.2f}% probability.")


The most suitable crop is 'rice' with 72.00% probability.




In [9]:
import joblib

# Save the trained Random Forest model
joblib.dump(rf_model, 'final_crop.pkl')

print("Model saved successfully as final_crop.pkl")

Model saved successfully as final_crop.pkl
