The first part of the model is to generate a synthetic data with at least 100 rows.

In [2]:
#import libraries
import pandas as pd
import random
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler

In [3]:
# Step 1: Generate Synthetic Dataset
def generate_dataset():
    # Define products
    products = ["Tomatoes", "Potatoes", "Carrots", "Onions"]

    # Generate synthetic data
    data = []
    for i in range(1, 101):  # 100 rows
        farmer_id = i
        retailer_id = 100 + i
        product = random.choice(products)
        price = round(random.uniform(0.8, 3.0), 2)  # Random price between 0.8 and 3.0
        latitude = round(random.uniform(32.0, 42.0), 4)  # Random latitude in the USA
        longitude = round(random.uniform(-122.0, -74.0), 4)  # Random longitude in the USA
        location = f"{latitude},{longitude}"
        rating = round(random.uniform(3.5, 5.0), 1)  # Random rating between 3.5 and 5.0
        quantity_available = random.randint(100, 500)  # Random quantity between 100 and 500

        data.append([farmer_id, retailer_id, product, price, location, rating, quantity_available])

    # Create a DataFrame
    columns = ["farmer_id", "retailer_id", "product", "price", "location", "rating", "quantity_available"]
    df = pd.DataFrame(data, columns=columns)

    # Save to CSV
    df.to_csv("farmers_retailers.csv", index=False)
    print("Dataset generated and saved as 'farmers_retailers.csv'")
    return df


In [4]:
print(df.head())

   farmer_id  retailer_id   product  price           location  rating  \
0          1          101  Potatoes   2.05  38.4946,-121.2056     3.8   
1          2          102    Onions   2.74  32.1957,-114.3409     4.3   
2          3          103  Potatoes   2.55  35.9723,-109.4378     3.9   
3          4          104  Tomatoes   0.87  41.5947,-119.1465     4.3   
4          5          105  Potatoes   2.49   38.9277,-93.0347     4.3   

   quantity_available  
0                 442  
1                 461  
2                 114  
3                 488  
4                 332  


In [5]:
#check for missing values
print(df.isnull().sum())

farmer_id             0
retailer_id           0
product               0
price                 0
location              0
rating                0
quantity_available    0
dtype: int64


Train the DirectFarm Recommender System Model. This step preprocesses the data and trains a K-Nearest Neighbours (KNN) model.

In [6]:
# Step 2: Train the Recommender System Model
def train_model(data):
    # Feature engineering
    features = data[['price', 'rating', 'quantity_available']]

    # Normalize features
    scaler = StandardScaler()
    features_scaled = scaler.fit_transform(features)

    # Train a KNN model
    model = NearestNeighbors(n_neighbors=5, metric='euclidean')
    model.fit(features_scaled)
    print("Model trained successfully.")
    return model, scaler

This step filters the dataset based on user input ie product, price, rating and quantity and uses the trained model to recommend farmers.

In [7]:
# Step 3: Recommend Farmers
def recommend_farmers(model, scaler, data, product, max_price, min_rating, quantity_needed):
    # Filter data based on user input
    filtered_data = data[(data['product'] == product) &
                         (data['price'] <= max_price) &
                         (data['rating'] >= min_rating) &
                         (data['quantity_available'] >= quantity_needed)]

    if len(filtered_data) == 0:
        return None

    # Get features for filtered data
    filtered_features = filtered_data[['price', 'rating', 'quantity_available']]
    filtered_features_scaled = scaler.transform(filtered_features)

    # Find nearest neighbors
    distances, indices = model.kneighbors(filtered_features_scaled)

    # Return recommended farmers
    recommendations = filtered_data.iloc[indices[0]]
    return recommendations


This step generates the dataset, then trains the model and demonstrates how to use the recommender system.

In [9]:
# Main Script
if __name__ == "__main__":
    # Step 1: Generate the dataset
    data = generate_dataset()

    # Step 2: Train the model
    model, scaler = train_model(data)

    # Step 3: Example usage of the recommender system
    product = "Tomatoes"
    max_price = 3.0
    min_rating = 4.0
    quantity_needed = 50

def recommend_farmers(model, scaler, data, product, max_price, min_rating, quantity_needed):
    # Filter data based on user input
    filtered_data = data[(data['product'] == product) &
                         (data['price'] <= max_price) &
                         (data['rating'] >= min_rating) &
                         (data['quantity_available'] >= quantity_needed)]

    if len(filtered_data) == 0:
        return None

    # Get features for filtered data
    filtered_features = filtered_data[['price', 'rating', 'quantity_available']]
    filtered_features_scaled = scaler.transform(filtered_features)

    # Find nearest neighbors
    distances, indices = model.kneighbors(filtered_features_scaled)

    # Return recommended farmers
    # Get the indices of the recommendations within the filtered_data
    recommendations_indices = filtered_data.index[indices[0]]
    recommendations = data.loc[recommendations_indices]  # Use data.loc to get recommendations from the original data using recommendations_indices
    return recommendations


Dataset generated and saved as 'farmers_retailers.csv'
Model trained successfully.


Save the trained model as a pickle file

In [11]:
import joblib

# Assuming best_rf_model is your trained NearestNeighbours model
joblib.dump(model, 'bestmodel1.pkl')

['bestmodel1.pkl']