<a href="https://colab.research.google.com/github/Atnatiwos/Agricultural-Price-Prediction-System/blob/main/Agricultural_Price_Prediction_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Agricultural Price Prediction System  
## Predicting Commodity Prices for Ethiopian Markets

**Course:** SWEG4112 ‚Äì Introduction to Machine Learning  
**Group:** 3  
**Project Type:** Regression System  
**Dataset:** WFP / FEWS NET Ethiopia Food Prices  

### Problem Statement
Agricultural commodity prices in Ethiopia fluctuate due to seasonality, weather conditions, supply-demand imbalance, and holidays. This project builds a supervised machine learning regression system to predict commodity prices using real Ethiopian market data.


In [1]:
!pip install xgboost

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor




In [13]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [16]:
DATA_PATH = "/content/drive/MyDrive/Colab/agriculture_project/Data/wfp_food_prices_eth.csv"
df = pd.read_csv(DATA_PATH)


df = pd.read_csv(list(uploaded.keys())[0])
df.head()


  df = pd.read_csv(DATA_PATH)
  df = pd.read_csv(list(uploaded.keys())[0])


Unnamed: 0,date,admin1,admin2,market,market_id,latitude,longitude,category,commodity,commodity_id,unit,priceflag,pricetype,currency,price,usdprice
0,#date,#adm1+name,#adm2+name,#loc+market+name,#loc+market+code,#geo+lat,#geo+lon,#item+type,#item+name,#item+code,#item+unit,#item+price+flag,#item+price+type,#currency+code,#value,#value+usd
1,2000-01-15,Addis Ababa,AA ZONE1,Addis Ababa,480,9.02,38.75,cereals and tubers,Maize (white),67,100 KG,actual,Wholesale,ETB,120.75,15.08
2,2000-01-15,Amhara,Administrative unit not available,Baher Dar,482,11.6,37.38,cereals and tubers,Maize (white),67,100 KG,actual,Wholesale,ETB,115.5,14.43
3,2000-01-15,Dire Dawa,DIRE DAWA,Diredawa,472,9.59,41.87,cereals and tubers,Maize (white),67,100 KG,actual,Wholesale,ETB,161.75,20.2
4,2000-01-15,Tigray,MEKELE,Mekele,487,13.5,39.48,cereals and tubers,Maize (white),67,100 KG,actual,Wholesale,ETB,147.75,18.45


In [17]:
# Remove metadata row
df = df[df["date"] != "#date"]

# Convert date
df["date"] = pd.to_datetime(df["date"])
df["Year"] = df["date"].dt.year
df["Month"] = df["date"].dt.month

# Filter key commodities
commodities = [
    "Maize (white)",
    "Wheat (white)",
    "Sorghum (white)",
    "Teff (white)",
    "Barley"
]
df = df[df["commodity"].isin(commodities)]

# Convert price to ETB per KG
def normalize_price(row):
    if "100" in str(row["unit"]):
        return float(row["price"]) / 100
    return float(row["price"])

df["Price"] = df.apply(normalize_price, axis=1)

df = df[["Year","Month","admin1","market","commodity","pricetype","Price"]]
df.head()


Unnamed: 0,Year,Month,admin1,market,commodity,pricetype,Price
1,2000,1,Addis Ababa,Addis Ababa,Maize (white),Wholesale,1.2075
2,2000,1,Amhara,Baher Dar,Maize (white),Wholesale,1.155
3,2000,1,Dire Dawa,Diredawa,Maize (white),Wholesale,1.6175
4,2000,1,Tigray,Mekele,Maize (white),Wholesale,1.4775
5,2000,2,Addis Ababa,Addis Ababa,Maize (white),Wholesale,1.25


In [18]:
# Ethiopian seasons
def ethiopian_season(month):
    if month in [2,3,4,5]:
        return "Belg"
    elif month in [6,7,8,9]:
        return "Meher"
    return "Bega"

df["Season"] = df["Month"].apply(ethiopian_season)

# Rainfall index (proxy)
def rainfall_index(month):
    if month in [6,7,8,9]:
        return 8
    elif month in [2,3,4,5]:
        return 5
    return 2

df["Rainfall_Index"] = df["Month"].apply(rainfall_index)

# Holiday indicator
df["Holiday"] = df["Month"].isin([1,4,5,9]).astype(int)


In [6]:
df = df.sort_values(by=["market","commodity","Year","Month"])

df["Prev_Price"] = df.groupby(
    ["market","commodity"]
)["Price"].shift(1)

df["MA_3"] = df.groupby(
    ["market","commodity"]
)["Price"].rolling(3).mean().reset_index(level=[0,1], drop=True)

df = df.dropna()
df.head()


Unnamed: 0,Year,Month,admin1,market,commodity,pricetype,Price,Season,Rainfall_Index,Holiday,Prev_Price,MA_3
28239,2023,8,Afar,Abaala,Barley,Retail,90.0,Meher,8,0,10.0,36.666667
30975,2024,5,Afar,Abaala,Maize (white),Retail,50.0,Belg,5,0,64.0,42.0
31384,2024,6,Afar,Abaala,Maize (white),Retail,50.0,Meher,8,0,50.0,54.666667
31827,2024,7,Afar,Abaala,Maize (white),Retail,46.0,Meher,8,0,50.0,48.666667
32604,2024,9,Afar,Abaala,Maize (white),Retail,46.0,Meher,8,1,46.0,47.333333


In [19]:
X = df.drop("Price", axis=1)
y = df["Price"]

X = pd.get_dummies(X, drop_first=True)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [28]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

print("Random Forest MAE:", mean_absolute_error(y_test, rf_preds))
print("Random Forest RMSE:", np.sqrt(mean_squared_error(y_test, rf_preds)))
print("Random Forest R2:", r2_score(y_test, rf_preds))


ValueError: Found input variables with inconsistent numbers of samples: [1998, 1866]

In [10]:
rf = RandomForestRegressor(
    n_estimators=300,
    random_state=42
)

rf.fit(X_train, y_train)
rf_preds = rf.predict(X_test)

print("Random Forest MAE:", mean_absolute_error(y_test, rf_preds))
print("Random Forest RMSE:", mean_squared_error(y_test, rf_preds, squared=False))
print("Random Forest R2:", r2_score(y_test, rf_preds))


Random Forest MAE: 2.0010611525544837


TypeError: got an unexpected keyword argument 'squared'

In [None]:
xgb = XGBRegressor(
    n_estimators=300,
    learning_rate=0.1,
    max_depth=6,
    subsample=0.9,
    colsample_bytree=0.9,
    random_state=42
)

xgb.fit(X_train, y_train)
xgb_preds = xgb.predict(X_test)

print("XGBoost MAE:", mean_absolute_error(y_test, xgb_preds))
print("XGBoost RMSE:", np.sqrt(mean_squared_error(y_test, xgb_preds)))
print("XGBoost R2:", r2_score(y_test, xgb_preds))



In [29]:
plt.figure(figsize=(6,6))
plt.scatter(y_test, xgb_preds, alpha=0.5)
plt.xlabel("Actual Price (ETB/kg)")
plt.ylabel("Predicted Price (ETB/kg)")
plt.title("Actual vs Predicted Agricultural Prices")
plt.show()


NameError: name 'xgb_preds' is not defined

<Figure size 600x600 with 0 Axes>


## Conclusion
This project successfully implemented a regression-based agricultural price prediction system using real Ethiopian market data. Incorporating seasonal, weather, and lag features significantly improved model accuracy. The XGBoost model achieved strong predictive performance and demonstrates practical value for farmers, traders, and policymakers.
#data link
https://data.humdata.org/dataset/2e4f1922-e446-4b57-a98a-d0e2d5e34afa/resource/87bac18e-f3aa-4b29-8cf8-76763e823dc5/download/wfp_food_prices_eth.csv




In [34]:
!pip install streamlit pyngrok


Collecting streamlit
  Downloading streamlit-1.52.2-py3-none-any.whl.metadata (9.8 kB)
Collecting pyngrok
  Downloading pyngrok-7.5.0-py3-none-any.whl.metadata (8.1 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.52.2-py3-none-any.whl (9.0 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m9.0/9.0 MB[0m [31m59.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyngrok-7.5.0-py3-none-any.whl (24 kB)
Downloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m6.9/6.9 MB[0m [31m96.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyngrok, pydeck, streamlit
Successfully installed pydeck-0.9.1 pyngrok-7.5.0 streamlit-1.52.2


In [36]:
#import joblib

#joblib.dump(xgb, "price_prediction_model.pkl")
#joblib.dump(X.columns.tolist(), "model_features.pkl")


import joblib

MODEL_PATH = "/content/drive/MyDrive/Colab/agriculture_project/Model/price_model.pkl"
FEATURE_PATH = "/content/drive/MyDrive/Colab/agriculture_project/Model/model_features.pkl"

joblib.dump(model, MODEL_PATH)
joblib.dump(X.columns.tolist(), FEATURE_PATH)

print("Model & features saved to Google Drive")


NameError: name 'model' is not defined

In [33]:
%%writefile /content/drive/MyDrive//Colab/agriculture_project/App/app.py
import streamlit as st
import pandas as pd
import joblib
import numpy as np
import matplotlib.pyplot as plt
import calendar

# Load model and features
#model = joblib.load("price_prediction_model.pkl")
#features = joblib.load("model_features.pkl")

model = joblib.load("/content/drive/MyDrive/Colab/agriculture_project/Model/price_model.pkl")
features = joblib.load("/content/drive/MyDrive/Colab/agriculture_project/Model/model_features.pkl")

st.set_page_config(
    page_title="Ethiopian Agricultural Price Predictor",
    layout="wide"
)

# ---------------- HEADER ----------------
st.title("üåæ Ethiopian Agricultural Price Prediction System")
st.markdown(
    "Predict **next month agricultural commodity prices** in Ethiopian markets using machine learning."
)
st.divider()

# ---------------- INPUTS ----------------
st.subheader("üì• Prediction Inputs")

col1, col2, col3 = st.columns(3)

with col1:
    commodity = st.selectbox(
        "Commodity",
        ["Teff (white)", "Wheat (white)", "Maize (white)", "Sorghum (white)", "Barley"],
        help="Select the commodity to forecast"
    )

with col2:
    market = st.selectbox(
        "Market",
        ["Addis Ababa", "Adama", "Bahir Dar", "Hawassa", "Jimma", "Dire Dawa"],
        help="Select the market location"
    )

with col3:
    month = st.selectbox(
        "Prediction Month",
        list(range(1,13)),
        help="Select the current month; the system predicts the NEXT month price"
    )

col4, col5, col6 = st.columns(3)

with col4:
    prev_price = st.number_input(
        "Previous Month Price (ETB/kg)",
        5.0, 150.0, 40.0,
        help="Observed price in the most recent month"
    )

with col5:
    ma3 = st.number_input(
        "3-Month Moving Average (ETB/kg)",
        5.0, 150.0, 38.0,
        help="Average price of the last three months"
    )

with col6:
    rainfall = st.slider(
        "Rainfall Index",
        1, 10, 5,
        help="Seasonal rainfall intensity affecting crop supply"
    )

# ---------------- AUTO HOLIDAY & FASTING ----------------
def holiday_fasting_flags(month):
    holiday_months = [4, 9, 12]   # Easter, Meskel, Christmas/Eid
    fasting_months = [3, 4, 6]    # Lent, Ramadan (approx)
    return {
        "Holiday": 1 if month in holiday_months else 0,
        "Fasting": 1 if month in fasting_months else 0
    }

flags = holiday_fasting_flags(month)

# ---------------- SEASON FLAGS ----------------
def season_flags(month):
    if month in [2,3,4,5]:
        return {"Season_Belg":1, "Season_Meher":0}
    elif month in [6,7,8,9]:
        return {"Season_Belg":0, "Season_Meher":1}
    return {"Season_Belg":0, "Season_Meher":0}

season = season_flags(month)

# ---------------- BUILD INPUT ----------------
data = {
    "Month": month,
    "Rainfall_Index": rainfall,
    "Holiday": flags["Holiday"],
    "Fasting": flags["Fasting"],
    "Prev_Price": prev_price,
    "MA_3": ma3,
    "Season_Belg": season["Season_Belg"],
    "Season_Meher": season["Season_Meher"],
}

for col in features:
    if col.startswith("commodity_"):
        data[col] = 1 if col == f"commodity_{commodity}" else 0
    if col.startswith("market_"):
        data[col] = 1 if col == f"market_{market}" else 0
    if col.startswith("pricetype_"):
        data[col] = 1

input_df = pd.DataFrame([data])

for col in features:
    if col not in input_df:
        input_df[col] = 0

input_df = input_df[features]

st.divider()

# ---------------- PREDICTION ----------------
if st.button("üîÆ Predict Next Month Price", use_container_width=True):

    prediction = model.predict(input_df)[0]

    next_month = 1 if month == 12 else month + 1
    next_month_name = calendar.month_name[next_month]

    st.success(
        f"üí∞ Predicted Price for **{next_month_name}**: {prediction:.2f} ETB per KG"
    )

    # ---------------- 3-MONTH FORECAST ----------------
    st.subheader("üìà 3-Month Price Forecast")

    preds = []
    prev = prev_price
    avg = ma3
    m = month

    for _ in range(3):
        input_df["Prev_Price"] = prev
        input_df["MA_3"] = (prev + avg) / 2
        p = model.predict(input_df)[0]
        preds.append(p)
        prev = p
        m = 1 if m == 12 else m + 1

    months = [calendar.month_name[(month+i-1)%12+1] for i in range(1,4)]

    for mo, pr in zip(months, preds):
        st.write(f"üìÖ {mo}: **{pr:.2f} ETB/kg**")

    # ---------------- TREND CHART ----------------
    plt.figure()
    plt.plot(months, preds, marker="o")
    plt.xlabel("Month")
    plt.ylabel("Price (ETB/kg)")
    plt.title("3-Month Price Forecast Trend")
    plt.grid(True)
    st.pyplot(plt)

st.divider()
st.caption("Machine Learning Project ‚Äì Ethiopian Agricultural Markets")


Overwriting /content/drive/MyDrive//Colab/agriculture_project/App/app.py


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [37]:
from pyngrok import ngrok

ngrok.set_auth_token("36wQfewIxvInBKrAGD7cRPvyxQM_7Wh1iN5p5Ch4QVkFP1HiW")

!streamlit run app.py &>/content/logs.txt &
public_url = ngrok.connect(8501)
public_url




<NgrokTunnel: "https://unpopular-pattie-hesitatingly.ngrok-free.dev" -> "http://localhost:8501">