<a href="https://colab.research.google.com/github/Dorthi12/Hawkin-s-Farm/blob/dorthi-ml/CropRecommendation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Crop Recommendation

In [2]:
# ===============================
# 1) LOAD DATA
# ===============================
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix, save_npz
import joblib

pricing = pd.read_csv("/content/pricing_synthetic_generated.csv")
tx = pd.read_csv("/content/transactions_synthetic_generated.csv")

print("Products:", pricing.shape)
print("Transactions:", tx.shape)

# ===============================
# 2) BUILD USER → ITEM MATRIX
# ===============================
# Map SKU → index
sku_list = pricing['sku'].tolist()
sku_to_idx = {sku:i for i,sku in enumerate(sku_list)}
idx_to_sku = {i:sku for sku,i in sku_to_idx.items()}

num_users = tx["user_id"].max() + 1
num_items = len(sku_list)

# Aggregate quantity per (user, sku)
from collections import defaultdict
agg = defaultdict(float)
for _, row in tx.iterrows():
    agg[(row.user_id, sku_to_idx[row.sku])] += row.quantity

rows, cols, vals = [], [], []
for (u,i), q in agg.items():
    rows.append(u)
    cols.append(i)
    vals.append(q)

user_item = csr_matrix((vals, (rows, cols)), shape=(num_users, num_items))

print("User–Item matrix shape:", user_item.shape)

# ===============================
# 3) HOLDOUT TEST SET (one item per user)
# ===============================
train = user_item.copy().tolil()
test_pairs = []

for u in range(num_users):
    row = user_item.getrow(u).tocoo()
    if row.nnz == 0:
        continue
    heldout = np.random.choice(row.col)
    train[u, heldout] = 0
    test_pairs.append((u, heldout))

train = train.tocsr()

# ===============================
# 4) ITEM–ITEM SIMILARITY (COSINE)
# ===============================
item_user = train.T.tocsr()              # shape: items × users
dense = item_user.toarray()
norms = np.linalg.norm(dense, axis=1, keepdims=True)
norms[norms == 0] = 1
dense_norm = dense / norms

similarity = dense_norm @ dense_norm.T   # cosine sim
np.fill_diagonal(similarity, 0)

print("Similarity matrix:", similarity.shape)

# ===============================
# 5) RECOMMENDER FUNCTION
# ===============================
def recommend_for_user(user_id, K=10):
    user_vector = train.getrow(user_id).toarray().ravel()
    bought = np.where(user_vector > 0)[0]

    if len(bought) == 0:
        # Cold start → Popular items based on units_sold
        popular = pricing.groupby("sku")["units_sold"].sum().sort_values(ascending=False)
        return popular.head(K).index.tolist()

    # score items by similarity to bought items
    scores = similarity[bought].sum(axis=0)
    scores[bought] = -np.inf  # do not recommend already-bought items

    top_idx = np.argsort(-scores)[:K]
    return [idx_to_sku[i] for i in top_idx]

# ===============================
# 6) EVALUATION — PRECISION@10
# ===============================
hits = 0
total = 0

for u, item_idx in test_pairs:
    recs = recommend_for_user(u, K=10)
    if idx_to_sku[item_idx] in recs:
        hits += 1
    total += 1

precision_at10 = hits / total
print("Precision@10:", round(precision_at10, 4))

# ===============================
# 7) SAVE MODEL
# ===============================
joblib.dump({
    "similarity_matrix": similarity,
    "sku_to_idx": sku_to_idx,
    "idx_to_sku": idx_to_sku,
    "train_matrix_path": "/content/train_user_item.npz"
}, "/content/recommender_model.pkl")

print("Model saved at /content/recommender_model.pkl")



Products: (5000, 7)
Transactions: (25000, 4)
User–Item matrix shape: (800, 5000)
Similarity matrix: (5000, 5000)
Precision@10: 0.0037
Model saved at /content/recommender_model.pkl
