In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.impute import SimpleImputer
import numpy as np


In [2]:
# Load the dataset into a Pandas dataframe
df = pd.read_csv('./price_predication.csv')


In [3]:
# Encode categorical features
le_district = LabelEncoder()
df['District'] = le_district.fit_transform(df['District'])
le_crop = LabelEncoder()
df['Crop'] = le_crop.fit_transform(df['Crop'])


In [4]:
df['Price Date'] = pd.to_datetime(df['Price Date'], format='%b-%Y')


# Extract month and year from 'Price Date' column
df['Month'] = df['Price Date'].dt.month
df['Year'] = df['Price Date'].dt.year



# Drop the 'Price Date' column
df = df.drop('Price Date', axis=1)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df[['District', 'Crop', 'Month', 'Year']], df['Crop Price (Rs per quintal)'], test_size=0.2, random_state=42)


In [5]:
imputer = SimpleImputer()
X_train = imputer.fit_transform(X_train)
X_test = imputer.transform(X_test)

In [6]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [7]:
rf = RandomForestRegressor(n_estimators=400, max_depth=12, random_state=42)
rf.fit(X_train, y_train)

y_pred = rf.predict(X_test)

In [8]:
# Calculate  metrics
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error : {:.2f}".format(mse))
print("Root Mean Squared Error : {:.2f}".format(rmse))
print("Mean Absolute Error : {:.2f}".format(mae))
print("R-squared : {:.2f}".format(r2)) 

Mean Squared Error : 3719.39
Root Mean Squared Error : 60.99
Mean Absolute Error : 51.69
R-squared : 1.00


In [None]:
import joblib
# Save the model
model_filename = './Save_Model_Price/random_forest_model.pkl'
joblib.dump(rf, model_filename)
print(f"Model saved as {model_filename}")

In [9]:
district_name = 'Coimbatore'
crop_name = 'Banana'
today = pd.Timestamp.today()

In [10]:
next_six_months = pd.date_range(today, periods=12, freq='MS').strftime("%m-%Y").tolist()

next_six_months_df = pd.DataFrame({'District': [le_district.transform([district_name])[0]]*12,
                                   'Crop': [le_crop.transform([crop_name])[0]]*12,
                                   'Month': [int(month.split('-')[0]) for month in next_six_months],
                                   'Year': [int(month.split('-')[1]) for month in next_six_months]})


In [11]:
next_six_months_df = imputer.transform(next_six_months_df)
next_six_months_df = scaler.transform(next_six_months_df)
next_six_months_predictions = rf.predict(next_six_months_df)
print ("Look Here --\n\n" , next_six_months_predictions)

Look Here --

 [1848.61795103 1844.87699271 1820.93632212 1814.17673009 1841.11625962
 1839.99108071 1845.99912283 1843.84305199 1833.18883485 1837.67613213
 1843.89228432 1849.33510743]


In [12]:
next_six_months_table = pd.DataFrame({'Month-Year': next_six_months,
                                      'Predicted Crop Price (Rs per quintal)': next_six_months_predictions.astype(int)})

print(next_six_months_table)

   Month-Year  Predicted Crop Price (Rs per quintal)
0     11-2023                                   1848
1     12-2023                                   1844
2     01-2024                                   1820
3     02-2024                                   1814
4     03-2024                                   1841
5     04-2024                                   1839
6     05-2024                                   1845
7     06-2024                                   1843
8     07-2024                                   1833
9     08-2024                                   1837
10    09-2024                                   1843
11    10-2024                                   1849
