In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


In [3]:

data = pd.read_csv("Vegetable_market.csv")
X = data[['Vegetable','Season','Month', 'Temp', 'Deasaster Happen in last 3month', 'Vegetable condition']]
y = data['Price per kg']

In [4]:
categorical_features = ['Vegetable', 'Season', 'Month', 'Deasaster Happen in last 3month', 'Vegetable condition']
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(drop='first', handle_unknown='ignore'), categorical_features)
    ])


In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])

In [7]:
model.fit(X_train, y_train)

In [13]:
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
# Random Forest Regressor model
rf_model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(random_state=42))
])

rf_model.fit(X_train, y_train)
rf_y_pred = rf_model.predict(X_test)

rf_mae = mean_absolute_error(y_test, rf_y_pred)
rf_mse = mean_squared_error(y_test, rf_y_pred)
rf_r2 = r2_score(y_test, rf_y_pred)

print("Random Forest Regressor Metrics:")
print(f"Mean Absolute Error: {rf_mae:.2f}")
print(f"Mean Squared Error: {rf_mse:.2f}")
print(f"R-squared Score: {rf_r2:.2f}")
rf_accuracy = rf_model.score(X_test, y_test)
print(f"Accuracy: {rf_accuracy:.2f}")

Random Forest Regressor Metrics:
Mean Absolute Error: 11.12
Mean Squared Error: 277.86
R-squared Score: 0.91
Accuracy: 0.91




In [25]:
# XGBoost model
xgb_model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', XGBRegressor(random_state=42))
])

xgb_model.fit(X_train, y_train)
xgb_y_pred = xgb_model.predict(X_test)

xgb_mae = mean_absolute_error(y_test, xgb_y_pred)
xgb_mse = mean_squared_error(y_test, xgb_y_pred)
xgb_r2 = r2_score(y_test, xgb_y_pred)

print("\nXGBoost Regressor Metrics:")
print(f"Mean Absolute Error: {xgb_mae:.2f}")
print(f"Mean Squared Error: {xgb_mse:.2f}")
print(f"R-squared Score: {xgb_r2:.2f}")
xg_accuracy = xgb_model.score(X_test, y_test)
print(f"Accuracy: {xg_accuracy:.2f}")


XGBoost Regressor Metrics:
Mean Absolute Error: 8.21
Mean Squared Error: 168.56
R-squared Score: 0.95
Accuracy: 0.95




In [23]:
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import OneHotEncoder, StandardScaler
numeric_features = ['Temp']
numeric_transformer = Pipeline(steps=[
    ('scaler', StandardScaler())
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', OneHotEncoder(drop='first', handle_unknown='ignore'), categorical_features)
    ])

# Splitting data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Constructing MLPRegressor model
mlpmodel = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', MLPRegressor(hidden_layer_sizes=(100, ), activation='relu', solver='adam', max_iter=1000)) 
    # You can adjust parameters as needed
])

# Fitting the model
mlpmodel.fit(X_train, y_train)

# Making predictions on the test set
y_predmlp = mlpmodel.predict(X_test)

mlpmae = mean_absolute_error(y_test, y_predmlp)
mlpmse = mean_squared_error(y_test, y_predmlp)
mlpr2 = r2_score(y_test, y_predmlp)
print("\nXGBoost Regressor Metrics:")
print(f"Mean Absolute Error: {mlpmae:.2f}")
print(f"Mean Squared Error: {mlpmse:.2f}")
print(f"R-squared Score: {mlpr2:.2f}")
mlpaccuracy = mlpmodel.score(X_test, y_test)
print(f"Accuracy: {mlpaccuracy:.2f}")



XGBoost Regressor Metrics:
Mean Absolute Error: 14.50
Mean Squared Error: 428.65
R-squared Score: 0.86
Accuracy: 0.86




In [20]:
new_input = pd.DataFrame({
    'Vegetable':[input("enter vegetable:").lower()],
    'Season': [input('Enter season:').lower()],
    'Month':[input('Enter month:').lower()],
    'Temp': [int(input('Enter temperature:'))],
    'Deasaster Happen in last 3month': [input('Diseaster happened?:').lower()],
    'Vegetable condition': [input('Enter Vegetable condition:').lower()]
})

predicted_pricerf = rf_model.predict(new_input)
predicted_pricemlp = mlpmodel.predict(new_input)
predicted_pricexgb = xgb_model.predict(new_input)
print(f"Predicted Price per kg in rf: {predicted_pricerf[0]:.2f}")
print(f"Predicted Price per kg in mlp: {predicted_pricemlp[0]:.2f}")
print(f"Predicted Price per kg in xgb: {predicted_pricexgb[0]:.2f}")

enter vegetable: potato
Enter season: winter
Enter month: jan
Enter temperature: 15
Diseaster happened?: no
Enter Vegetable condition: fresh


Predicted Price per kg in rf: 25.79
Predicted Price per kg in mlp: 21.68
Predicted Price per kg in xgb: 22.53


In [24]:
print(f"Accuracy in rf: {rf_accuracy:.2f}")
print(f"Accuracy in xgb: {xg_accuracy:.2f}")
print(f"Accuracy in mlp: {mlpaccuracy:.2f}")

Accuracy in rf: 0.91
Accuracy in xgb: 0.95
Accuracy in mlp: 0.86


In [26]:
print(f"Mean Absolute Error in rf: {rf_mae:.2f}")
print(f"Mean Absolute Error in xgb: {xgb_mae:.2f}")
print(f"Mean Absolute Error in mlp: {mlpmae:.2f}")

Mean Absolute Error in rf: 11.12
Mean Absolute Error in xgb: 8.21
Mean Absolute Error in mlp: 14.50
