In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.impute import SimpleImputer


In [2]:
# Load the dataset into a Pandas dataframe
df = pd.read_csv('./price_predication.csv')



In [3]:
# Encode categorical features
le_district = LabelEncoder()
df['District'] = le_district.fit_transform(df['District'])
le_crop = LabelEncoder()
df['Crop'] = le_crop.fit_transform(df['Crop'])


In [4]:
df['Price Date'] = pd.to_datetime(df['Price Date'], format='%b-%Y')


# Extract month and year from 'Price Date' column
df['Month'] = df['Price Date'].dt.month
df['Year'] = df['Price Date'].dt.year



# Drop the 'Price Date' column
df = df.drop('Price Date', axis=1)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df[['District', 'Crop', 'Month', 'Year']], df['Crop Price (Rs per quintal)'], test_size=0.4, random_state=42)


In [5]:
# Handle missing values
imputer = SimpleImputer()
X_train = imputer.fit_transform(X_train)
X_test = imputer.transform(X_test)

# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)



In [6]:
xgb = XGBRegressor(n_estimators=400, max_depth=12, random_state=42)
xgb.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = xgb.predict(X_test)

In [7]:
# Calculate evaluation metrics
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Print the evaluation metrics
print("Mean Squared Error : {:.2f}".format(mse))
print("Root Mean Squared Error : {:.2f}".format(rmse))
print("Mean Absolute Error : {:.2f}".format(mae))
print("R-squared : {:.2f}".format(r2)) 


Mean Squared Error : 5568.18
Root Mean Squared Error : 74.62
Mean Absolute Error : 61.06
R-squared : 1.00
