<a href="https://colab.research.google.com/github/Rishitha541/GenAI-Project/blob/main/Gen_ai_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import Libraries

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# Load Dataset

In [None]:
df = pd.read_csv('/content/cleaned_onion_prices.csv')
df['Price Date'] = pd.to_datetime(df['Price Date'])
df.set_index('Price Date', inplace=True)

#Sales Prices for LSTM

In [None]:
scaler = MinMaxScaler()
df['Scaled_Price'] = scaler.fit_transform(df[['Modal Price']])

# Prepare LSTM data

In [None]:
look_back = 5
def create_lstm_dataset(dataset, look_back=1):
    X, Y = [], []
    for i in range(len(dataset) - look_back):
        X.append(dataset[i:(i+look_back), 0])
        Y.append(dataset[i + look_back, 0])
    return np.array(X), np.array(Y)

prices = df['Scaled_Price'].values.reshape(-1, 1)
X_lstm_all, y_lstm_all = create_lstm_dataset(prices, look_back)
X_lstm_all = X_lstm_all.reshape((X_lstm_all.shape[0], X_lstm_all.shape[1], 1))

#Split LSTM data

In [None]:
split_idx = int(len(X_lstm_all) * 0.8)
X_lstm_train, X_lstm_test = X_lstm_all[:split_idx], X_lstm_all[split_idx:]
y_lstm_train, y_lstm_test = y_lstm_all[:split_idx], y_lstm_all[split_idx:]

# Train LSTM

In [None]:
lstm_model = Sequential([
    LSTM(50, input_shape=(look_back, 1)),
    Dense(1)
])
lstm_model.compile(optimizer='adam', loss='mean_squared_error')
lstm_model.fit(X_lstm_train, y_lstm_train, epochs=50, batch_size=8, verbose=0)

  super().__init__(**kwargs)


<keras.src.callbacks.history.History at 0x7efaae97fb90>

# Evaluate LSTM

In [None]:
lstm_preds = lstm_model.predict(X_lstm_test)
lstm_preds_rescaled = scaler.inverse_transform(lstm_preds)
y_lstm_test_rescaled = scaler.inverse_transform(y_lstm_test.reshape(-1, 1))
lstm_rmse = np.sqrt(mean_squared_error(y_lstm_test_rescaled, lstm_preds_rescaled))
lstm_r2 = r2_score(y_lstm_test_rescaled, lstm_preds_rescaled)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 110ms/step


# Prepare data for RF & XGB

In [None]:
df['Day'] = df.index.day
df['Month'] = df.index.month
df['Year'] = df.index.year
X_ml = df[['Day', 'Month', 'Year']]
y_ml = df['Modal Price']

# Split ML data

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_ml, y_ml, test_size=0.2, random_state=42)

# Train and evaluate Random Forest

In [None]:
rf_model = RandomForestRegressor()
rf_model.fit(X_train, y_train)
rf_preds = rf_model.predict(X_test)
rf_rmse = np.sqrt(mean_squared_error(y_test, rf_preds))
rf_r2 = r2_score(y_test, rf_preds)

# Train and evaluate XGBoost

In [None]:
xgb_model = XGBRegressor()
xgb_model.fit(X_train, y_train)
xgb_preds = xgb_model.predict(X_test)
xgb_rmse = np.sqrt(mean_squared_error(y_test, xgb_preds))
xgb_r2 = r2_score(y_test, xgb_preds)

# Print evaluation

In [None]:
print("\nModel Evaluation Metrics:")
print(f"LSTM         - RMSE: {lstm_rmse:.2f}, R² Score: {lstm_r2:.2f}")
print(f"RandomForest - RMSE: {rf_rmse:.2f}, R² Score: {rf_r2:.2f}")
print(f"XGBoost      - RMSE: {xgb_rmse:.2f}, R² Score: {xgb_r2:.2f}")


Model Evaluation Metrics:
LSTM         - RMSE: 213.03, R² Score: -0.80
RandomForest - RMSE: 157.58, R² Score: 0.99
XGBoost      - RMSE: 146.77, R² Score: 0.99


# Prediction

In [None]:
def predict_combined_price_for_date(date_str):
    try:
        target_date = pd.to_datetime(date_str, format='%d-%m-%Y')
    except ValueError:
        print("Invalid date format. Please use DD-MM-YYYY.")
        return
        # --- LSTM Prediction ---
    last_sequence = prices[-look_back:]
    steps = (target_date.year - df.index[-1].year) * 12 + (target_date.month - df.index[-1].month)
    if steps < 1 or steps > 36:
        lstm_price = None
    else:
        for _ in range(steps):
            input_seq = last_sequence.reshape((1, look_back, 1))
            next_pred = lstm_model.predict(input_seq, verbose=0)
            last_sequence = np.append(last_sequence[1:], next_pred)
        lstm_price = scaler.inverse_transform(next_pred.reshape(-1, 1))[0][0]

    # --- RF & XGB ---
    ml_features = pd.DataFrame({
        'Day': [target_date.day],
        'Month': [target_date.month],
        'Year': [target_date.year]
    })
    rf_price = rf_model.predict(ml_features)[0]
    xgb_price = xgb_model.predict(ml_features)[0]

    # Final Combined Prediction
    predictions = [rf_price, xgb_price]
    if lstm_price is not None:
        predictions.append(lstm_price)

    final_price = np.mean(predictions)
    print(f"\nFinal Predicted Modal Price on {date_str}: Rs. {final_price:.2f}")

# User Input

In [None]:
user_input = input("\nEnter the date (DD-MM-YYYY): ")
predict_combined_price_for_date(user_input)


Enter the date (DD-MM-YYYY): 30-01-2027

Final Predicted Modal Price on 30-01-2027: Rs. 4774.82
