<a href="https://colab.research.google.com/github/SaloniKhare/Projects/blob/main/AgriPredict/Agri.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.layers import Input, LSTM, Dense, Embedding, Flatten, Concatenate, RepeatVector
from tensorflow.keras.models import Model

df = pd.read_csv("/content/merged_all.csv")
df = df.sort_values("Arrival_Date")

# Encode categorical features
le_market = LabelEncoder()
le_commodity = LabelEncoder()
le_variety = LabelEncoder()
le_grade = LabelEncoder()

df["Market_enc"] = le_market.fit_transform(df["Market"])
df["Commodity_enc"] = le_commodity.fit_transform(df["Commodity"])
df["Variety_enc"] = le_variety.fit_transform(df["Variety"])
df["Grade_enc"] = le_grade.fit_transform(df["Grade"])

In [None]:
df["Modal_Price"] = df["Modal_Price"].clip(lower=df["Modal_Price"].quantile(0.01),
                                           upper=df["Modal_Price"].quantile(0.99))


In [None]:
scaler = MinMaxScaler()
df["Price_scaled"] = scaler.fit_transform(df["Modal_Price"].values.reshape(-1, 1))


In [None]:
seq_len = 30

X_prices = []
X_market = []
X_commodity = []
X_variety = []
X_grade = []
y = []

for i in range(seq_len, len(df)):
    X_prices.append(df["Price_scaled"].values[i-seq_len:i])
    X_market.append(df["Market_enc"].values[i])
    X_commodity.append(df["Commodity_enc"].values[i])
    X_variety.append(df["Variety_enc"].values[i])
    X_grade.append(df["Grade_enc"].values[i])
    y.append(df["Price_scaled"].values[i])

X_prices = np.array(X_prices)
X_market = np.array(X_market)
X_commodity = np.array(X_commodity)
X_variety = np.array(X_variety)
X_grade = np.array(X_grade)
y = np.array(y)


In [None]:
# sequence input
seq_input = Input(shape=(seq_len, 1))
x_seq = LSTM(64, return_sequences=True)(seq_input)
x_seq = LSTM(32)(x_seq)

# categorical inputs + embedding
market_input = Input(shape=(1,))
commodity_input = Input(shape=(1,))
variety_input = Input(shape=(1,))
grade_input = Input(shape=(1,))

market_emb = Embedding(df["Market_enc"].nunique(), 5)(market_input)
commodity_emb = Embedding(df["Commodity_enc"].nunique(), 5)(commodity_input)
variety_emb = Embedding(df["Variety_enc"].nunique(), 5)(variety_input)
grade_emb = Embedding(df["Grade_enc"].nunique(), 5)(grade_input)

# flatten embeddings
market_emb = Flatten()(market_emb)
commodity_emb = Flatten()(commodity_emb)
variety_emb = Flatten()(variety_emb)
grade_emb = Flatten()(grade_emb)

# merge
merged = Concatenate()([x_seq, market_emb, commodity_emb, variety_emb, grade_emb])

output = Dense(32, activation="relu")(merged)
output = Dense(1)(output)

model = Model(inputs=[seq_input, market_input, commodity_input, variety_input, grade_input], outputs=output)
model.compile(optimizer="adam", loss="mse")

model.summary()


In [None]:
history = model.fit(
    [X_prices.reshape(-1, seq_len, 1),
     X_market,
     X_commodity,
     X_variety,
     X_grade],
    y,
    epochs=10,
    batch_size=32,
    validation_split=0.1
)


Epoch 1/10
[1m129750/129750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1203s[0m 9ms/step - loss: 0.0039 - val_loss: 0.0032
Epoch 2/10
[1m129750/129750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1213s[0m 9ms/step - loss: 0.0032 - val_loss: 0.0031
Epoch 3/10
[1m129750/129750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1205s[0m 9ms/step - loss: 0.0031 - val_loss: 0.0029
Epoch 4/10
[1m129750/129750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1222s[0m 9ms/step - loss: 0.0030 - val_loss: 0.0030
Epoch 5/10
[1m129750/129750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1228s[0m 9ms/step - loss: 0.0030 - val_loss: 0.0029
Epoch 6/10
[1m129750/129750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1208s[0m 9ms/step - loss: 0.0029 - val_loss: 0.0029
Epoch 7/10
[1m129750/129750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1194s[0m 9ms/step - loss: 0.0029 - val_loss: 0.0029
Epoch 8/10
[1m129750/129750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1190s[0m 9ms/step - 

In [None]:
pred_scaled = model.predict([X_prices[-10:].reshape(-1, seq_len, 1),
                             X_market[-10:],
                             X_commodity[-10:],
                             X_variety[-10:],
                             X_grade[-10:]])

pred = scaler.inverse_transform(pred_scaled)
actual = scaler.inverse_transform(y[-10:].reshape(-1,1))

print("Actual:", actual.flatten())
print("Predicted:", pred.flatten())


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 306ms/step
Actual: [ 5000. 11000.  6000. 14000.  2500.  2300.  6250.  7000. 30000.  3500.]
Predicted: [ 6373.5376 15252.444  11655.283   8498.504   3889.3984  2351.7134
  6690.217   5550.6475 24449.43    4037.5215]


In [None]:
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

actual = np.array([3315, 7889, 2300, 2325, 3915, 15000, 11500, 2569, 3200, 4260])
pred = np.array([3255.627, 7064.292, 2283.2185, 2406.033, 3789.21, 17312.344,
                 14604.941, 3113.1, 2708.0679, 4215.7314])

# MAE
mae = mean_absolute_error(actual, pred)

# RMSE
rmse = np.sqrt(mean_squared_error(actual, pred))

# MAPE
mape = np.mean(np.abs((actual - pred) / actual)) * 100

# R2 Score
r2 = r2_score(actual, pred)

print("MAE:", mae)
print("RMSE:", rmse)
print("MAPE (%):", mape)
print("R² Score:", r2)


MAE: 760.5271200000001
RMSE: 1274.1245867691914
MAPE (%): 9.967949415170787
R² Score: 0.9071395192596206


In [None]:
# Predictions
pred_scaled = model.predict([
    X_prices[-10:].reshape(-1, seq_len, 1),
    X_market[-10:],
    X_commodity[-10:],
    X_variety[-10:],
    X_grade[-10:]
])

# Inverse scale
pred = scaler.inverse_transform(pred_scaled)
actual = scaler.inverse_transform(y[-10:].reshape(-1, 1))

# Flatten for metrics
actual_vals = actual.flatten()
pred_vals = pred.flatten()

# Metrics
mae = mean_absolute_error(actual_vals, pred_vals)
rmse = np.sqrt(mean_squared_error(actual_vals, pred_vals))
mape = np.mean(np.abs((actual_vals - pred_vals) / actual_vals)) * 100
r2 = r2_score(actual_vals, pred_vals)

print("Actual:", actual_vals)
print("Predicted:", pred_vals)
print("MAE:", mae)
print("RMSE:", rmse)
print("MAPE (%):", mape)
print("R2 Score:", r2)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
Actual: [ 5000. 11000.  6000. 14000.  2500.  2300.  6250.  7000. 30000.  3500.]
Predicted: [ 6373.5376 15252.444  11655.283   8498.504   3889.3984  2351.7134
  6690.217   5550.6475 24449.43    4037.5215]
MAE: 2620.15341796875
RMSE: 3428.4668845142824
MAPE (%): 31.911296558595957
R2 Score: 0.8114326983144591


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
df=pd.read_csv("/content/merged_all.csv")
print(df.shape)

(4613357, 11)


In [None]:
import joblib

save_path = r"C:\Users\Saloni1\Documents\AgriModel"  # <<< CHANGE THIS TO YOUR FOLDER

model.save(fr"{save_path}\lstm_agri_model.h5")

joblib.dump(scaler, fr"{save_path}\price_scaler.save")
joblib.dump(le_market, fr"{save_path}\le_market.save")
joblib.dump(le_commodity, fr"{save_path}\le_commodity.save")
joblib.dump(le_variety, fr"{save_path}\le_variety.save")
joblib.dump(le_grade, fr"{save_path}\le_grade.save")

print("\nModel & all encoders saved successfully in your internal storage!")




Model & all encoders saved successfully in your internal storage!


In [None]:

model.save("/content/drive/MyDrive/lstm_agri_model.h5")

import joblib
joblib.dump(scaler, "/content/drive/MyDrive/price_scaler.save")
joblib.dump(le_market, "/content/drive/MyDrive/le_market.save")
joblib.dump(le_commodity, "/content/drive/MyDrive/le_commodity.save")
joblib.dump(le_variety, "/content/drive/MyDrive/le_variety.save")
joblib.dump(le_grade, "/content/drive/MyDrive/le_grade.save")

print("Saved to Google Drive!")




Saved to Google Drive!


In [None]:
save_path = "/content/AgriModel"
import os
os.makedirs(save_path, exist_ok=True)
import joblib

model.save(f"{save_path}/lstm_agri_model.h5")

joblib.dump(scaler, f"{save_path}/price_scaler.save")
joblib.dump(le_market, f"{save_path}/le_market.save")
joblib.dump(le_commodity, f"{save_path}/le_commodity.save")
joblib.dump(le_variety, f"{save_path}/le_variety.save")
joblib.dump(le_grade, f"{save_path}/le_grade.save")

print("Model + Encoders saved inside /content/AgriModel")
!zip -r AgriModel.zip /content/AgriModel
from google.colab import files
files.download("AgriModel.zip")




Model + Encoders saved inside /content/AgriModel
  adding: content/AgriModel/ (stored 0%)
  adding: content/AgriModel/le_commodity.save (deflated 44%)
  adding: content/AgriModel/price_scaler.save (deflated 40%)
  adding: content/AgriModel/le_market.save (deflated 51%)
  adding: content/AgriModel/lstm_agri_model.h5 (deflated 15%)
  adding: content/AgriModel/le_grade.save (deflated 35%)
  adding: content/AgriModel/le_variety.save (deflated 49%)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>