# Importing Libraries

In [18]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.arima.model import ARIMA
from pmdarima import auto_arima
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import warnings
warnings.filterwarnings('ignore')

# Data Loading and Overview

In [3]:
df = pd.read_csv('Data/bazarya_price_data.csv')

In [4]:
df.head()

Unnamed: 0,date,product,category,price,unit,market_name,region,woreda,price_type
0,2023-02-26,Millet,Cereal,1407.95,quintal,Jimma,SNNPR,North Shewa,Retail
1,2023-05-05,Hot pepper,Vegetable,1334.51,quintal,Bahir Dar,Oromia,Wolaita,Wholesale
2,2023-12-03,Sugar (local),Processed Food,245.54,kg,Jimma,Tigray,Basketo,Wholesale
3,2024-06-08,Fava beans,Other,132.15,kg,Wolkite,SNNPR,Kaffa,Farmgate
4,2024-01-05,Nifro,Other,1619.45,quintal,Gonder,Tigray,Bole,Retail


In [23]:
df.value_counts()

date        product               category        price    unit     market_name  region              woreda       price_type
2023-01-01  Barley                Cereal          1474.81  kg       Adama        Tigray              South Wollo  Farmgate      1
2023-12-28  Millet                Cereal          2474.89  quintal  Hossana      Oromia              South Wollo  Wholesale     1
            Pepper powder         Other           1763.27  quintal  Dessie       Sidama              North Shewa  Farmgate      1
            Rice (local)          Cereal          1780.54  quintal  Wolkite      Southwest Ethiopia  North Shewa  Farmgate      1
            Salt                  Processed Food  126.91   kg       Adama        Addis Ababa         South Wollo  Farmgate      1
            Spaghetti             Processed Food  1754.51  quintal  Hossana      Oromia              Lideta       Wholesale     1
            Tea (local)           Processed Food  140.65   kg       Hawassa      Amhara        

In [7]:
df.isnull().sum()

date           0
product        0
category       0
price          0
unit           0
market_name    0
region         0
woreda         0
price_type     0
dtype: int64

# Data Preprocessing

In [10]:
df['date'] = pd.to_datetime(df['date'])

In [12]:
df = df.sort_values('date')

In [13]:
df.head()

Unnamed: 0,date,product,category,price,unit,market_name,region,woreda,price_type
7777,2023-01-01,Sugar (local),Processed Food,2161.23,quintal,Jimma,Amhara,Jimma Town,Wholesale
9766,2023-01-01,Sugar (local),Processed Food,1226.75,quintal,Shola,Addis Ababa,Bole,Retail
868,2023-01-01,Yogurt,Animal Product,1490.15,quintal,Hossana,Southwest Ethiopia,Basketo,Farmgate
6281,2023-01-01,Papaya,Fruit,168.28,kg,Gonder,Addis Ababa,Bole,Farmgate
2013,2023-01-01,Goat meat,Animal Product,67.78,kg,Jimma,Oromia,Kolfe,Retail


In [19]:
unique_combinations = df[['product', 'market_name', 'price_type']].drop_duplicates()

In [21]:
df.shape

(10527, 9)

In [22]:
unique_combinations.shape

(1795, 3)

In [24]:
filtered_dfs = {}

for _, row in unique_combinations.iterrows():
    product = row['product']
    market = row['market_name']
    price_type = row['price_type']

    key = (product, market, price_type)
    
    filtered_df = df[
        (df['product'] == product) &
        (df['market_name'] == market) &
        (df['price_type'] == price_type)
    ][['date', 'price']].set_index('date').sort_index()

    filtered_dfs[key] = filtered_df

In [26]:
len(filtered_dfs)

1795