In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.impute import SimpleImputer

In [2]:
# dataset load
df = pd.read_csv('./Price_Predication.csv')


In [3]:
# Encode district and crop
le_district = LabelEncoder()
df['District'] = le_district.fit_transform(df['District'])
le_crop = LabelEncoder()
df['Crop'] = le_crop.fit_transform(df['Crop'])

In [4]:
# format change
df['Price Date'] = pd.to_datetime(df['Price Date'], format='%b-%Y')


# Extract month and year from 'Price Date' column

df['Month'] = df['Price Date'].dt.month
df['Year'] = df['Price Date'].dt.year

# # Drop 'Price Date' column
df = df.drop('Price Date', axis=1)


# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df[['District', 'Crop', 'Month', 'Year']], df['Crop Price (Rs per quintal)'], test_size=0.2, random_state=42)


In [5]:
# Handle missing values
imputer = SimpleImputer()
X_train = imputer.fit_transform(X_train)
X_test = imputer.transform(X_test)

In [6]:
# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [7]:
# Create a linear regression model
linear_reg = LinearRegression()

# Train the linear regression model on the training set
linear_reg.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = linear_reg.predict(X_test)

In [8]:
# Calculate evaluation metrics
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Print the evaluation metrics
print("Mean Squared Error : {:.2f}".format(mse))
print("Root Mean Squared Error : {:.2f}".format(rmse))
print("Mean Absolute Error : {:.2f}".format(mae))
print("R-squared : {:.2f}".format(r2))

Mean Squared Error : 16844968.71
Root Mean Squared Error : 4104.26
Mean Absolute Error : 2913.80
R-squared : 0.22
