In [23]:
import pandas as pd
import numpy as np
import os
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import joblib
from tqdm import tqdm

In [24]:
# Load and parse
df = pd.read_csv('crop_prices.csv', parse_dates=['Arrival_Date'])
df.dropna(subset=['Modal_Price'], inplace=True)

# Aggregate to monthly



In [25]:
df.head(10)

Unnamed: 0,State,District,Market,Commodity,Variety,Grade,Arrival_Date,Min_Price,Max_Price,Modal_Price,Commodity_Code
0,West Bengal,Kolkata,Mechua,Apple,Other,Medium,03/07/2016,9200,9300,9300,17
1,West Bengal,Kolkata,Mechua,Apple,Other,Small,03/07/2016,7800,8000,8000,17
2,West Bengal,Kolkata,Mechua,Apple,Other,Medium,06/07/2016,8400,8500,8400,17
3,West Bengal,Kolkata,Mechua,Apple,Other,Medium,09/07/2016,8400,8600,8500,17
4,West Bengal,Kolkata,Mechua,Apple,Other,Medium,10/07/2016,8400,8600,8500,17
5,West Bengal,Kolkata,Mechua,Apple,Other,Large,12/07/2016,10500,11000,10600,17
6,West Bengal,Kolkata,Mechua,Apple,Other,Medium,13/07/2016,8500,8600,8600,17
7,West Bengal,Kolkata,Mechua,Apple,Other,Large,14/07/2016,10200,10600,10300,17
8,West Bengal,Kolkata,Mechua,Apple,Other,Medium,15/07/2016,8600,9000,8800,17
9,West Bengal,Kolkata,Mechua,Apple,Other,Small,20/07/2016,7700,7800,7700,17


In [27]:
df['Arrival_Date'] = pd.to_datetime(df['Arrival_Date'], dayfirst=True, errors='coerce')


In [28]:
pairs = df[['Commodity', 'Market']].drop_duplicates()

In [29]:
os.makedirs('lstm_models', exist_ok=True)
os.makedirs('scalers', exist_ok=True)

In [30]:
def create_sequences(data, input_steps=12, forecast_steps=6):
    X, y = [], []
    for i in range(len(data) - input_steps - forecast_steps + 1):
        X.append(data[i:i+input_steps])
        y.append(data[i+input_steps:i+input_steps+forecast_steps])
    return np.array(X), np.array(y)

In [31]:
for _, row in tqdm(pairs.iterrows(), total=pairs.shape[0]):
    commodity = row['Commodity']
    market = row['Market']
    
    subset = df[(df['Commodity'] == commodity) & (df['Market'] == market)][['Arrival_Date', 'Modal_Price']].copy()
    if len(subset) < 400:  # Skip small datasets
        continue

    # Set DateTimeIndex and resample monthly
    subset.set_index('Arrival_Date', inplace=True)
    subset.index = pd.DatetimeIndex(subset.index)
    monthly = subset.resample("ME").mean().dropna()

    if len(monthly) < 30:  # Not enough months for learning
        continue

    # Normalize
    scaler = MinMaxScaler()
    modal_scaled = scaler.fit_transform(monthly)

    # Create sequences
    X, y = create_sequences(modal_scaled, 12, 6)
    if len(X) < 10:
        continue

    X_train, y_train = X, y

    # Define LSTM model
    model = Sequential()
    model.add(LSTM(64, activation='relu', input_shape=(X.shape[1], X.shape[2])))
    model.add(Dense(6))  # Forecast 6 months
    model.compile(optimizer='adam', loss='mse')
    model.fit(X_train, y_train, epochs=50, batch_size=8, verbose=0)

    # Save model and scaler
    model.save(f"lstm_models/lstm_{commodity}_{market}.h5")
    joblib.dump(scaler, f"scalers/scaler_{commodity}_{market}.pkl")


  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)
100%|██████████████████████████████████████████████████████████████████████████████████| 27/27 [08:55<00:00, 19.83s/it]
