In [5]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import pickle

# Load the dataset
data = pd.read_csv('vegetable_data.csv')

# Update the date parsing format
data['Date'] = pd.to_datetime(data['Date'], format='%d/%m/%Y')

# Preprocess the data
data = data.set_index('Date')

# Create a dictionary to map vegetable names to unique integers
vegetable_mapping = {v: i for i, v in enumerate(data['Vegetable'].unique())}
data['Vegetable'] = data['Vegetable'].map(vegetable_mapping)

# Normalize the 'Price' column using Min-Max scaling
scaler = MinMaxScaler()
data['Price'] = scaler.fit_transform(data['Price'].values.reshape(-1, 1))

# Define a function to create sequences for LSTM training
def create_sequences(data, seq_length):
    sequences = []
    target = []
    for i in range(len(data) - seq_length):
        seq = data.iloc[i:i+seq_length][['Vegetable', 'Price']]  # Include 'Vegetable' column
        label = data.iloc[i+seq_length]['Price']
        sequences.append(seq)
        target.append(label)
    return np.array(sequences), np.array(target)

# Choose a sequence length
seq_length = 10

# Create sequences and targets
X, y = create_sequences(data, seq_length)

# Split the data into training and testing sets
train_size = int(0.8 * len(X))
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# Create an LSTM model
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(seq_length, 2)))  # Adjust input shape
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=64, validation_data=(X_test, y_test))

# Save the model and scaler using pickle
with open('model.pkl', 'wb') as model_file:
    pickle.dump(model, model_file)
with open('scaler.pkl', 'wb') as scaler_file:
    pickle.dump(scaler, scaler_file)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [8]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler, LabelEncoder
import pickle

# Load the dataset
data = pd.read_csv("vegetable_data.csv")

# Convert date strings to datetime objects
data['Date'] = pd.to_datetime(data['Date'], format='%d/%m/%Y')

# Encode vegetable names using LabelEncoder
label_encoder = LabelEncoder()
data['Vegetable'] = label_encoder.fit_transform(data['Vegetable'])

# Calculate the number of days since the earliest date
data['DaysSinceEarliest'] = (data['Date'] - data['Date'].min()).dt.days

# Split the data into features (X) and target (y)
X = data[['DaysSinceEarliest', 'Vegetable']]
y = data['Price']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=62)

# Standardize features using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train a linear regression model
model = LinearRegression()
model.fit(X_train_scaled, y_train)

# Save the trained model and scaler using Pickle
with open('vegetable_price_model.pkl', 'wb') as model_file:
    pickle.dump(model, model_file)

with open('scaler.pkl', 'wb') as scaler_file:
    pickle.dump(scaler, scaler_file)
