# Best Buyer Prediction Model
This notebook trains a machine learning model to predict the best buyer for a farmer based on crop details, buyer offers, and historical data.

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder


In [2]:
# Load dataset
df = pd.read_csv("farmer_buyer_dataset.csv")
df.head()


Unnamed: 0,Farmer_ID,Crop_Type,Quantity,Location,Moisture_Content,Expected_Price,Preferred_Buyer_Type,Time_Urgency,Season,Historical_Buyer_Deals,...,Offered_Price_Sarkari_Procurement_Center,Distance_Sarkari_Procurement_Center,Rating_Sarkari_Procurement_Center,Offered_Price_FieldFresh_Traders,Distance_FieldFresh_Traders,Rating_FieldFresh_Traders,Offered_Price_GreenYield_Buyer_Group,Distance_GreenYield_Buyer_Group,Rating_GreenYield_Buyer_Group,Best_Buyer
0,F1000,Maize,38,Karnal,19.96,1685,Government,2,Kharif,FieldFresh Traders,...,1703,16,4.2,2682,47,4.0,2595,49,5.0,FieldFresh Traders
1,F1001,Wheat,15,Sirsa,13.23,2390,Government,7,Rabi,Krishi Mandal,...,1670,43,3.4,1826,13,3.3,1408,6,4.8,Krishi Mandal
2,F1002,Wheat,8,Rohtak,17.77,1646,Private,6,Kharif,FieldFresh Traders,...,2088,46,4.4,1966,80,4.4,2477,40,3.4,Krishi Mandal
3,F1003,Maize,37,Rohtak,19.63,2079,Private,2,Rabi,Krishi Mandal,...,1569,52,3.6,1508,53,4.0,2568,72,4.0,GreenYield Buyer Group
4,F1004,Mustard,10,Sonipat,13.64,2173,Government,1,Rabi,Krishi Mandal,...,2374,53,4.1,1796,16,3.2,1769,83,4.6,AgroBazaar Pvt Ltd


In [3]:
# Encode categorical columns
label_encoders = {}
categorical_cols = ['Crop_Type', 'Location', 'Preferred_Buyer_Type', 'Season', 'Historical_Buyer_Deals']

for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Encode target
target_encoder = LabelEncoder()
df['Best_Buyer'] = target_encoder.fit_transform(df['Best_Buyer'])


In [4]:
# Features and target
X = df.drop(columns=['Farmer_ID', 'Best_Buyer'])
y = df['Best_Buyer']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [5]:
# Train a Random Forest Classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Predictions
y_pred = clf.predict(X_test)


In [6]:
# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=target_encoder.classes_))


Accuracy: 0.875

Classification Report:
                             precision    recall  f1-score   support

        AgroBazaar Pvt Ltd       0.93      0.76      0.84        17
        FieldFresh Traders       0.91      0.83      0.87        12
    GreenYield Buyer Group       0.90      0.95      0.92        19
             Krishi Mandal       0.78      0.95      0.86        19
Sarkari Procurement Center       0.92      0.85      0.88        13

                  accuracy                           0.88        80
                 macro avg       0.89      0.87      0.87        80
              weighted avg       0.88      0.88      0.87        80



In [7]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
import random

# Load dataset
df = pd.read_csv("farmer_buyer_dataset.csv")

# Encode categorical variables
label_encoders = {}
categorical_cols = ['Crop_Type', 'Location', 'Preferred_Buyer_Type', 'Season', 'Historical_Buyer_Deals']
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Encode target
target_encoder = LabelEncoder()
df['Best_Buyer'] = target_encoder.fit_transform(df['Best_Buyer'])

# Prepare training data
X = df.drop(columns=['Farmer_ID', 'Best_Buyer'])
y = df['Best_Buyer']

# Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X, y)

# Buyers list
buyers = ['AgroBazaar Pvt Ltd', 'Krishi Mandal', 'Sarkari Procurement Center',
          'FieldFresh Traders', 'GreenYield Buyer Group']

# Take user input
def get_user_input():
    print("Enter farmer details:")
    crop = input("Crop Type (Wheat/Rice/Mustard/etc): ")
    qty = int(input("Quantity (quintals): "))
    loc = input("Location (Hisar/Karnal/etc): ")
    moisture = float(input("Moisture Content (%): "))
    exp_price = int(input("Expected Price (₹): "))
    buyer_type = input("Preferred Buyer Type (Private/Government): ")
    urgency = int(input("Time Urgency (1-7): "))
    season = input("Season (Rabi/Kharif): ")
    past_buyer = input("Historical Buyer Deal (e.g. Krishi Mandal): ")

    sample = {
        'Crop_Type': crop,
        'Quantity': qty,
        'Location': loc,
        'Moisture_Content': moisture,
        'Expected_Price': exp_price,
        'Preferred_Buyer_Type': buyer_type,
        'Time_Urgency': urgency,
        'Season': season,
        'Historical_Buyer_Deals': past_buyer
    }

    for buyer in buyers:
        key = buyer.replace(" ", "_").replace(".", "").replace(",", "")
        sample[f'Offered_Price_{key}'] = random.randint(1500, 2600)
        sample[f'Distance_{key}'] = random.randint(5, 100)
        sample[f'Rating_{key}'] = round(random.uniform(3.0, 5.0), 1)

    return sample

# Predict function
def predict_best_buyer(user_input):
    input_df = pd.DataFrame([user_input])

    # Encode categorical fields
    for col in categorical_cols:
        le = label_encoders[col]
        if input_df[col].iloc[0] in le.classes_:
            input_df[col] = le.transform(input_df[col])
        else:
            input_df[col] = le.transform([le.classes_[0]])

    # Predict
    pred_index = model.predict(input_df)[0]
    return target_encoder.inverse_transform([pred_index])[0]

# Main
if __name__ == "__main__":
    user_input = get_user_input()
    best_buyer = predict_best_buyer(user_input)
    print("\n✅ Recommended Best Buyer:", best_buyer)


Enter farmer details:


Crop Type (Wheat/Rice/Mustard/etc):  wheat
Quantity (quintals):  45
Location (Hisar/Karnal/etc):  hisar
Moisture Content (%):  20
Expected Price (₹):  15000
Preferred Buyer Type (Private/Government):  government
Time Urgency (1-7):  3
Season (Rabi/Kharif):  rabi
Historical Buyer Deal (e.g. Krishi Mandal):  krishi mandal



✅ Recommended Best Buyer: GreenYield Buyer Group


In [8]:
import pandas as pd
import pickle
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder

# Load dataset
df = pd.read_csv("farmer_buyer_dataset.csv")

# Encode categorical features
label_encoders = {}
cat_cols = ['Crop_Type', 'Location', 'Preferred_Buyer_Type', 'Season', 'Historical_Buyer_Deals']

for col in cat_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Encode target
target_encoder = LabelEncoder()
df['Best_Buyer'] = target_encoder.fit_transform(df['Best_Buyer'])

# Train model
X = df.drop(columns=['Farmer_ID', 'Best_Buyer'])
y = df['Best_Buyer']

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X, y)

# Save model and encoders
with open("model.pkl", "wb") as f:
    pickle.dump((model, label_encoders, target_encoder), f)

print("✅ Model and encoders saved to model.pkl")


✅ Model and encoders saved to model.pkl
