In [1]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


In [2]:
# Step 1: Load the dataset
data = pd.read_csv("farmers_retailers.csv")

In [3]:
# Step 2: Handle Missing Values (Imputation)
imputer = SimpleImputer(strategy='mean')  # Fill missing numerical values with the mean
data[['price', 'rating', 'quantity_available']] = imputer.fit_transform(data[['price', 'rating', 'quantity_available']])

In [4]:
# Step 3: Encode Categorical Variables
encoder = LabelEncoder()
data['product'] = encoder.fit_transform(data['product'])  # Convert product names to numbers

In [5]:
# Step 4: Feature Scaling (Standardization)
scaler = StandardScaler()
scaled_features = scaler.fit_transform(data[['price', 'rating', 'quantity_available']])

In [6]:
# Step 5: Select Target Variable & Features
X = scaled_features  # Using all numeric features for now
y = data['product']  # Assuming 'product' is the target for recommendations

In [7]:
# Step 6: Split Data into Train & Test Sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
# Step 7: Initialize & Train a Random Forest Model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

In [9]:
# Step 8: Evaluate Initial Model Performance
y_pred = rf_model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Initial Model Performance:\nMAE: {mae}\nMSE: {mse}\nR2 Score: {r2}")

Initial Model Performance:
MAE: 1.0444999999999998
MSE: 1.487015
R2 Score: -0.09339338235294092


In [10]:
# Step 9: Feature Importance Analysis
feature_importances = rf_model.feature_importances_
feature_names = ['price', 'rating', 'quantity_available']
important_features = pd.DataFrame({'Feature': feature_names, 'Importance': feature_importances})
important_features = important_features.sort_values(by='Importance', ascending=False)

In [11]:
# Select Top 4 Features
top_features = important_features['Feature'].values[:4]
X_selected = data[top_features]

In [12]:
# Step 10: Retrain Model with Selected Features
X_train_sel, X_test_sel, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=42)
scaler_selected = StandardScaler()
X_train_sel = scaler_selected.fit_transform(X_train_sel)
X_test_sel = scaler_selected.transform(X_test_sel)

In [13]:
# Step 11: Hyperparameter Tuning with Grid Search CV
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

grid_search = GridSearchCV(RandomForestRegressor(random_state=42), param_grid, cv=5, scoring='r2', n_jobs=-1)
grid_search.fit(X_train_sel, y_train)

best_rf_model = grid_search.best_estimator_

In [14]:
# Step 12: Evaluate Final Model
y_pred_final = best_rf_model.predict(X_test_sel)
mae_final = mean_absolute_error(y_test, y_pred_final)
mse_final = mean_squared_error(y_test, y_pred_final)
r2_final = r2_score(y_test, y_pred_final)

print(f"Final Model Performance:\nMAE: {mae_final}\nMSE: {mse_final}\nR2 Score: {r2_final}")

Final Model Performance:
MAE: 1.0378081131963581
MSE: 1.3804718299646122
R2 Score: -0.015052816150450177


In [15]:
# Step 13: Save the Final Model & Scaler
joblib.dump(best_rf_model, "best_rf_model.pkl")
joblib.dump(scaler_selected, "scaler.pkl")

print("Model and Scaler saved successfully.")

Model and Scaler saved successfully.


In [16]:
!pip install gradio

Collecting gradio
  Downloading gradio-5.20.1-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.11-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.7.2 (from gradio)
  Downloading gradio_client-1.7.2-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3

In [17]:
import gradio as gr
import pandas as pd
import joblib
import numpy as np

In [18]:
# Load trained model and scaler
model = joblib.load("best_rf_model.pkl")
scaler = joblib.load("scaler.pkl")

In [19]:
# Load dataset for reference
data = pd.read_csv("farmers_retailers.csv")

In [20]:
# Load dataset for reference
data = pd.read_csv("farmers_retailers.csv")

In [25]:
# Function to recommend farmers
def recommend_farmers(product, max_price, min_rating, quantity_needed):
    # Convert product to numeric encoding (match training data)
    product_mapping = {name: idx for idx, name in enumerate(data['product'].unique())}
    product_encoded = product_mapping.get(product, -1)  # Default to -1 if not found

    if product_encoded == -1:
        return "Product not found in dataset."

    # Filter dataset based on user inputs
    filtered_data = data[(data['product'] == product) &
                         (data['price'] <= max_price) &
                         (data['rating'] >= min_rating) &
                         (data['quantity_available'] >= quantity_needed)]

    if filtered_data.empty:
        return "No farmers match your criteria."

    # Prepare feature input for model prediction
    features = np.array([[max_price, min_rating, quantity_needed]])
    features_scaled = scaler.transform(features)

    # Get recommendations using trained model
    predicted_values = model.predict(features_scaled)

    # Sort recommendations based on similarity (or model predictions)
    filtered_data['prediction_score'] = np.abs(filtered_data['price'] - predicted_values[0])
    recommendations = filtered_data.sort_values(by='prediction_score', ascending=True)

    # Return top recommendations
    return recommendations[['farmer_id', 'product', 'price', 'rating', 'quantity_available']].head(5)

In [26]:
# Define Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("# 🌿 DirectFarm Recommender System")

    with gr.Row():
        product = gr.Dropdown(list(data['product'].unique()), label="Select Product")
        max_price = gr.Slider(0.8, 3.0, step=0.1, label="Maximum Price (USD)")
        min_rating = gr.Slider(3.5, 5.0, step=0.1, label="Minimum Rating")
        quantity_needed = gr.Number(label="Quantity Needed (kg)")

    recommend_btn = gr.Button("🔍 Recommend Farmers")
    output = gr.DataFrame()

    recommend_btn.click(recommend_farmers, inputs=[product, max_price, min_rating, quantity_needed], outputs=output)


In [27]:
# Run UI locally
demo.launch(share=True)  # Use `share=True` for public link

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://088bba4c73d8a80cdb.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


