<a href="https://colab.research.google.com/github/sushmithashenoy07/AgriMitra/blob/main/crop_recommendation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:
crop_data_file = '../datasets/Crop_recommendation.csv'
market_data_file = '../datasets/cropmarketprice.csv'

In [3]:
crop_data = pd.read_csv(crop_data_file)
market_data = pd.read_csv(market_data_file)

In [None]:
print("Crop Data Columns:", crop_data.columns)
print("Market Data Columns:", market_data.columns)


In [5]:
# Merge the two datasets on 'label' (from crop_data) and 'Commodity' (from market_data)
merged_data = pd.merge(crop_data, market_data, left_on='label', right_on='Commodity')


In [6]:
# Features from crop data: N, P, K, temperature, humidity, ph, rainfall
X = merged_data[['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']]

# Target variable: Modal price from market data
y = merged_data['Modal_x0020_Price']

In [None]:
print("Crop Data Preview:")
crop_data.head()

In [None]:
print("Market Data Preview:")
market_data.head()

In [9]:
crop_data['label'] = crop_data['label'].str.strip()
market_data['Commodity'] = market_data['Commodity'].str.strip()

In [10]:
# Merge the two datasets again
merged_data = pd.merge(crop_data, market_data, left_on='label', right_on='Commodity')

In [None]:
merged_data

In [None]:
# Check if the merged data has rows
print(f"Merged Data Shape: {merged_data.shape}")
print("Merged Data Preview:")
print(merged_data.head())

In [None]:
# Print unique crop names from both datasets
print("Unique crop names in crop data (label column):")
print(crop_data['label'].unique())

print("\nUnique commodities in market data (Commodity column):")
print(market_data['Commodity'].unique())


In [14]:
crop_to_commodity_mapping = {
    'rice': 'Paddy(Dhan)(Common)',
    'maize': 'Maize',
    'chickpea': 'Kabuli Chana(Chickpeas-White)',
    'kidneybeans': 'Rajma',
    'pigeonpeas': 'Arhar (Tur/Red Gram)(Whole)',
    'mothbeans': 'Moth Beans',
    'mungbean': 'Green Gram (Moong)(Whole)',
    'blackgram': 'Black Gram (Urd Beans)(Whole)',
    'lentil': 'Lentil (Masur)(Whole)',
    'pomegranate': 'Pomegranate',
    'banana': 'Banana',
    'mango': 'Mango',
    'grapes': 'Grapes',
    'watermelon': 'Water Melon',
    'muskmelon': 'Karbuja(Musk Melon)',
    'apple': 'Apple',
    'orange': 'Orange',
    'papaya': 'Papaya',
    'coconut': 'Coconut',
    'cotton': 'Cotton',
    'jute': 'Jute',
    'coffee': 'Coffee',
    'pigeonpeas': 'Arhar (Tur/Red Gram)(Whole)',
    'mustard': 'Mustard',
    'soybean': 'Soyabean',
    'groundnut': 'Groundnut',
    'wheat': 'Wheat',
    'millets': 'Millets',
    'sugarcane': 'Sugarcane',
    'potato': 'Potato',
    'onion': 'Onion',
    'tomato': 'Tomato',
    'brinjal': 'Brinjal',
    'cabbage': 'Cabbage',
    'cauliflower': 'Cauliflower',
    'bhindi': 'Bhindi(Ladies Finger)',
    'pumpkin': 'Pumpkin',
    'bottle_gourd': 'Bottle gourd',
    'ridge_gourd': 'Ridgeguard(Tori)',
    'bitter_gourd': 'Bitter gourd',
    'chillies': 'Green Chilli',
    'garlic': 'Garlic',
    'ginger': 'Ginger(Green)',
    'turmeric': 'Turmeric',
    'spinach': 'Spinach',
    'coriander': 'Coriander(Leaves)',
    'mint': 'Mint(Pudina)',
    'beetroot': 'Beetroot',
    'carrot': 'Carrot',
    'peas': 'Green Peas',
    'guava': 'Guava',
    'pineapple': 'Pineapple',
    'lime': 'Lime',
    'mousambi': 'Mousambi(Sweet Lime)',
    'jackfruit': 'Jack Fruit',
    'cashew': 'Cashewnuts',
    'sapota': 'Chikoos(Sapota)',
    'fig': 'Fig(Anjura/Anjeer)',
    'amla': 'Amla(Nelli Kai)',
    'jamun': 'Jamun(Narale Hannu)',
    'custard_apple': 'Custard Apple (Sharifa)',
}

In [15]:
crop_data['label_mapped'] = crop_data['label'].map(crop_to_commodity_mapping)

In [None]:
# Check if there are any unmapped values
unmapped = crop_data[crop_data['label_mapped'].isna()]
print(f"Unmapped crops: {unmapped['label'].unique()}")

In [17]:
crop_data = crop_data.dropna(subset=['label_mapped'])

In [18]:
# Merge crop data with market data on the mapped labels
merged_data = pd.merge(crop_data, market_data, left_on='label_mapped', right_on='Commodity')

In [None]:
# Check the result of the merge
print(f"Merged Data Shape: {merged_data.shape}")
print("Merged Data Preview:")
merged_data.head()

In [20]:
# Save the merged data to a new CSV file
merged_data.to_csv('../datasets/data.csv', index=False)

In [21]:
# Feature selection for machine learning
# X = merged_data[['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']]  # Features from crop data
X = merged_data[['temperature', 'humidity', 'ph', 'rainfall','State','District','Market','Grade','Variety']]  # Features from crop data
y = merged_data['Modal_x0020_Price']  # Target: Market price

In [22]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Model Training

## Neural Network Model for Crop prediction

In [23]:
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split

In [24]:
df = pd.read_csv("../datasets/data.csv")

In [None]:
df.shape

In [None]:
df.sample(2)

In [27]:
input_cols = ['temperature', 'humidity', 'ph', 'rainfall', 'State', 'District', 'Market', 'Grade', 'Variety','N','P','K']
target_cols = ['label' , 'Modal_x0020_Price']

In [28]:
class CropPricePredictor(tf.keras.Model):
    def __init__(self, input_shape,output_shape):
        super(CropPricePredictor, self).__init__()
        
        # self.input_layer = tf.keras.layers.Input(input_shape=input_shape)
        
        self.dense1 = tf.keras.layers.Dense(256,input_shape=input_shape, activation='leaky_relu')
        self.dense2 = tf.keras.layers.Dense(256,activation="leaky_relu")

        self.dense3 = tf.keras.layers.Dense(512, activation='relu')
        self.dense4 = tf.keras.layers.Dense(512, activation='relu')
        
        self.dense5 = tf.keras.layers.Dense(1024, activation='relu')
        self.dense6 = tf.keras.layers.Dense(1024, activation='relu')
        
        # crop layers
        self.output_crop_1 = tf.keras.layers.Dense(512,activation="relu")
        self.output_crop_2 = tf.keras.layers.Dense(256,activation="relu")
        self.output_crop_3 = tf.keras.layers.Dense(64,activation="relu")

        self.output_crop = tf.keras.layers.Dense(output_shape, activation='softmax', name='crop_output')
        
        # price layers
        self.output_price_1 = tf.keras.layers.Dense(512, activation='relu')
        self.output_price_2 = tf.keras.layers.Dense(512, activation='relu')
        self.output_price_3 = tf.keras.layers.Dense(128, activation='relu')

        self.output_price = tf.keras.layers.Dense(1, name='price_output',activation='linear')
        
        self.dropout = tf.keras.layers.Dropout(0.2)

    def call(self, inputs):        
        x = self.dense1(inputs)
        x = self.dropout(self.dense2(x))

        x = self.dropout(self.dense3(x))
        x = self.dropout(self.dense4(x))

        x = self.dropout(self.dense5(x))
        x = self.dropout(self.dense6(x))

        # Predict crop
        crop = self.dropout(self.output_crop_1(x))
        crop = self.dropout(self.output_crop_2(crop))
        crop = self.dropout(self.output_crop_3(crop))

        crop_output = self.output_crop(crop)

        # Predict price
        price = self.dropout(self.output_crop_1(x))
        price = self.dropout(self.output_crop_2(price))
        price = self.dropout(self.output_crop_3(price))

        price_output = self.output_crop(price)

        return {'price_output': price_output, 'crop_output': crop_output}

In [29]:
model = CropPricePredictor(input_shape=(len(input_cols),),output_shape=df['label'].unique().shape[0])

In [30]:
model.build(input_shape=(None, len(input_cols)))

In [None]:
model.summary()

In [32]:
df = df[target_cols + input_cols]

In [None]:
df.sample(3)

In [34]:
from sklearn.preprocessing import LabelEncoder

label_encoders = {}

for col in ['State', 'District', 'Market', 'Grade', 'Variety','label']:
    label_encoders[col] = LabelEncoder()
    df[col] = label_encoders[col].fit_transform(df[col].astype(str))
    

In [None]:
df.sample(3)

In [36]:
x = df[input_cols]
y_crop = tf.keras.utils.to_categorical(df['label'])
y_price = df['Modal_x0020_Price']

In [37]:
X_train, X_test, y_crop_train, y_crop_test, y_price_train, y_price_test = train_test_split(x, y_crop, y_price, test_size=0.3, random_state=42)

In [38]:
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

In [39]:
model.compile(optimizer='adam', 
              loss={'price_output': 'mse', 'crop_output': 'categorical_crossentropy'}, 
              metrics={'price_output': 'mse', 'crop_output': 'accuracy'})

In [None]:
model.fit(X_train, {'price_output': y_price_train, 'crop_output': y_crop_train},
                    epochs=50, batch_size=64, validation_split=0.2)

In [None]:
model.evaluate(X_test, {'price_output': y_price_test, 'crop_output': y_crop_test})

In [None]:
model.save('../models/crop_price_predictor-100', save_format='tf')

In [None]:
# label	Modal_x0020_Price	temperature	    humidity	ph	        rainfall	State	District	Market	Grade	Variety	    N	    P	    K
# 0	    7560.0	            21.377847	    92.720437	5.573241	106.141702	20	    12	        35	    3	    23	        30	    122	    197

input_values = [21.377847,92.720437,5.573241,106.141702,20,12,35,3,23,30,122,197]
input_tensor = tf.constant([input_values], dtype=tf.float32)

model(input_tensor)

## Using separate models to predict

In [3]:

import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

In [4]:
class CropPredictor(tf.keras.Model):
    def __init__(self, input_shape, output_shape):
        super(CropPredictor, self).__init__()
        self.dense1 = tf.keras.layers.Dense(256,input_shape=input_shape, activation='leaky_relu')
        self.dense2 = tf.keras.layers.Dense(256,activation="leaky_relu")

        self.dense3 = tf.keras.layers.Dense(512, activation='relu')
        self.dense4 = tf.keras.layers.Dense(512, activation='relu')
        
        self.dense5 = tf.keras.layers.Dense(1024, activation='relu')
        self.dense6 = tf.keras.layers.Dense(1024, activation='relu')

        self.output_1 = tf.keras.layers.Dense(128,activation="relu")
        self.output_2 = tf.keras.layers.Dense(64,activation="relu")
        self.output_layer = tf.keras.layers.Dense(output_shape, activation='softmax')

        self.dropout = tf.keras.layers.Dropout(0.2)

    def call(self, x):
        x = self.dense1(x)
        x = self.dropout(self.dense2(x))

        x = self.dropout(self.dense3(x))
        x = self.dropout(self.dense4(x))

        x = self.dropout(self.dense5(x))
        x = self.dropout(self.dense6(x))

        x = self.dropout(self.output_1(x))
        x = self.dropout(self.output_2(x))

        return self.output_layer(x)

In [5]:
class PricePredictor(tf.keras.Model):
    def __init__(self, input_shape):
        super(PricePredictor, self).__init__()
        self.dense1 = tf.keras.layers.Dense(128,input_shape=input_shape, activation='leaky_relu')
        self.dense2 = tf.keras.layers.Dense(256,activation="relu")

        self.dense3 = tf.keras.layers.Dense(512, activation='relu')
        self.dense4 = tf.keras.layers.Dense(512, activation='relu')
        
        # self.dense5 = tf.keras.layers.Dense(1024, activation='relu')
        # self.dense6 = tf.keras.layers.Dense(1024, activation='relu')

        self.output_1 = tf.keras.layers.Dense(128,activation="relu")
        self.output_2 = tf.keras.layers.Dense(32,activation="relu")
        self.output_layer = tf.keras.layers.Dense(1, activation='linear')

        self.dropout = tf.keras.layers.Dropout(0.2)

    def call(self, x):
        x = self.dense1(x)
        x = self.dropout(self.dense2(x))

        x = self.dropout(self.dense3(x))
        x = self.dropout(self.dense4(x))

        # x = self.dropout(self.dense5(x))
        # x = self.dropout(self.dense6(x))

        x = self.dropout(self.output_1(x))
        x = self.dropout(self.output_2(x))

        return self.output_layer(x)

In [6]:
input_cols = ['temperature', 'humidity', 'ph', 'rainfall', 'State', 'District', 'Market', 'Grade', 'Variety','N','P','K']
target_cols = ['label' , 'Modal_x0020_Price']

In [None]:
df = pd.read_csv("../datasets/data.csv")

df = df[target_cols + input_cols]
df.sample(2)

In [8]:
from sklearn.preprocessing import LabelEncoder

label_encoders = {}

for col in ['State', 'District', 'Market', 'Grade', 'Variety','label']:
    label_encoders[col] = LabelEncoder()
    df[col] = label_encoders[col].fit_transform(df[col].astype(str))

In [9]:
import pickle
with open('../models/label_encoders.pkl', 'wb') as f:
    pickle.dump(label_encoders, f)

In [11]:
x = df[input_cols]
y_crop = tf.keras.utils.to_categorical(df['label'])
y_price = df['Modal_x0020_Price']

In [12]:
crop_model = CropPredictor(input_shape=(len(input_cols),), output_shape=df['label'].unique().shape[0])
price_model = PricePredictor(input_shape=(len(input_cols)+1,))

In [13]:
crop_model.build(input_shape=(None, len(input_cols)))
price_model.build(input_shape=(None, len(input_cols)+1))

In [None]:
crop_model.summary()

In [None]:
price_model.summary()

In [16]:
x_crop_train, x_crop_test, y_crop_train, y_crop_test = train_test_split(x, y_crop, test_size=0.3, random_state=42)
x_price_train, x_price_test, y_price_train, y_price_test = train_test_split(df[input_cols + ["label"]], y_price, test_size=0.3, random_state=42)

In [68]:
crop_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
price_model.compile(optimizer='adam', loss='mse', metrics=['mse'])

In [69]:
early_stopping_crop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
early_stopping_price = tf.keras.callbacks.EarlyStopping(monitor='mse', patience=5, restore_best_weights=True)

In [None]:
crop_model.fit(x_crop_train, y_crop_train, epochs=100, batch_size=64, validation_split=0.2, callbacks=[early_stopping_crop])

In [None]:
price_model.fit(x_price_train, y_price_train, epochs=100, batch_size=64, validation_split=0.2, callbacks=[early_stopping_price])

In [None]:
crop_model.evaluate(x_crop_test, y_crop_test)
price_model.evaluate(x_price_test, y_price_test)

In [None]:
crop_model.save('../models/crop_predictor-100', save_format='tf')
price_model.save('../models/price_predictor-100', save_format='tf')

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

x_train, x_test, y_train, y_test = train_test_split(df[input_cols], df['Modal_x0020_Price'], test_size=0.3, random_state=42)

booster = RandomForestRegressor(n_estimators=100, random_state=42)
booster.fit(x_price_train, y_price_train)

y_pred = booster.predict(x_price_test)
mse = mean_squared_error(y_price_test, y_pred)
print(f"MSE: {mse}")

In [80]:
import pickle

with open('../models/price_predictor.pkl', 'wb') as f:
    pickle.dump(booster, f)