In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import LabelEncoder


In [2]:
df = pd.read_csv('cropPrice.csv')

In [3]:
df.head()

Unnamed: 0,State,District,Market,Commodity,Variety,Grade,Arrival_Date,Min Price,Max Price,Modal Price
0,Gujarat,Amreli,Damnagar,Bhindi(Ladies Finger),Bhindi,FAQ,27-07-2023,4100.0,4500.0,4350.0
1,Gujarat,Amreli,Damnagar,Brinjal,Other,FAQ,27-07-2023,2200.0,3000.0,2450.0
2,Gujarat,Amreli,Damnagar,Cabbage,Cabbage,FAQ,27-07-2023,2350.0,3000.0,2700.0
3,Gujarat,Amreli,Damnagar,Cauliflower,Cauliflower,FAQ,27-07-2023,7000.0,7500.0,7250.0
4,Gujarat,Amreli,Damnagar,Coriander(Leaves),Coriander,FAQ,27-07-2023,8400.0,9000.0,8850.0


In [4]:
df['Arrival_Date'] = pd.to_datetime(df['Arrival_Date'], format='%d-%m-%Y')

In [5]:
df['Year'] = df['Arrival_Date'].dt.year
df['Month'] = df['Arrival_Date'].dt.month

In [6]:
df.head()

Unnamed: 0,State,District,Market,Commodity,Variety,Grade,Arrival_Date,Min Price,Max Price,Modal Price,Year,Month
0,Gujarat,Amreli,Damnagar,Bhindi(Ladies Finger),Bhindi,FAQ,2023-07-27,4100.0,4500.0,4350.0,2023,7
1,Gujarat,Amreli,Damnagar,Brinjal,Other,FAQ,2023-07-27,2200.0,3000.0,2450.0,2023,7
2,Gujarat,Amreli,Damnagar,Cabbage,Cabbage,FAQ,2023-07-27,2350.0,3000.0,2700.0,2023,7
3,Gujarat,Amreli,Damnagar,Cauliflower,Cauliflower,FAQ,2023-07-27,7000.0,7500.0,7250.0,2023,7
4,Gujarat,Amreli,Damnagar,Coriander(Leaves),Coriander,FAQ,2023-07-27,8400.0,9000.0,8850.0,2023,7


In [7]:
label_encoders = {}
label_cols = ['State', 'District', 'Market', 'Commodity', 'Variety', 'Grade']
for col in label_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le 

In [8]:
X = df[['State', 'District', 'Market', 'Commodity', 'Variety', 'Grade', 'Year', 'Month']]
y = df['Modal Price']

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [11]:
y_pred = model.predict(X_test)
rmse = mean_squared_error(y_test, y_pred, squared=False)
print(f'Root Mean Squared Error: {rmse}')

Root Mean Squared Error: 2354.091340526439




In [12]:
import joblib

In [13]:
joblib.dump(model, 'crop_price_predictor_model.pkl')
joblib.dump(label_encoders, 'label_encoders.pkl')

['label_encoders.pkl']

In [14]:
def predict_crop_price(state, district, crop_name, input_date):
    # Load the saved model and encoders
    model = joblib.load('crop_price_predictor_model.pkl')
    label_encoders = joblib.load('label_encoders.pkl')
    
    # Convert the input date to Year and Month
    arrival_date = pd.to_datetime(input_date, format='%d-%m-%Y')
    year = arrival_date.year
    month = arrival_date.month
    
    # Example default inputs for other required features
    input_data = {
        'State': state,       # User-provided state
        'District': district, # User-provided district
        'Market': 'Damnagar', # Default or user-specified market (if available)
        'Commodity': crop_name,  # User-provided crop
        'Variety': 'Other',  # Default variety (or ask for more specific input)
        'Grade': 'FAQ',
        'Year': year,
        'Month': month
    }
    
    # Apply label encoding for categorical features
    for col in label_encoders:
        input_data[col] = label_encoders[col].transform([input_data[col]])[0]
    
    # Convert input_data to DataFrame format as expected by the model
    input_df = pd.DataFrame([input_data])
    
    # Predict the price using the trained model
    predicted_price = model.predict(input_df)
    
    return predicted_price[0]


In [15]:
state = 'Gujarat'
district = 'Amreli'
crop_name = 'Bhindi(Ladies Finger)'
input_date = '27-07-2025'  # Future date

predicted_price = predict_crop_price(state, district, crop_name, input_date)
print(f"Predicted Price for {crop_name} in {district}, {state} on {input_date}: {predicted_price}")

Predicted Price for Bhindi(Ladies Finger) in Amreli, Gujarat on 27-07-2025: 3348.2869047619047
