In [1]:
import pandas as pd

# Load the dataset
file_path = 'new_generated_food_commodities_data.csv'
data = pd.read_csv(file_path)

# Display the first few rows of the dataset to understand its structure
data.head(), data.info(), data.describe()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50000 entries, 0 to 49999
Data columns (total 12 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   Date                               50000 non-null  object 
 1   Commodity                          50000 non-null  object 
 2   Price Reporting Centre             50000 non-null  object 
 3   Price (INR per kg)                 50000 non-null  float64
 4   Buffer Stock (Metric Tons)         50000 non-null  int64  
 5   Market Intervention Decision       50000 non-null  object 
 6   Seasonality Factor                 50000 non-null  object 
 7   Historical Trend                   50000 non-null  object 
 8   Market Intelligence Input          50000 non-null  object 
 9   Crop Sowing Estimate (Hectares)    50000 non-null  int64  
 10  Production Estimate (Metric Tons)  50000 non-null  int64  
 11  ARIMA Forecast Price (INR per kg)  50000 non-null  flo

(                  Date Commodity Price Reporting Centre  Price (INR per kg)  \
 0  2019-01-01 00:00:00     Masur                 Mumbai               50.35   
 1  2019-01-01 01:00:00     Moong                Kolkata               96.50   
 2  2019-01-01 02:00:00     Moong              Hyderabad              145.89   
 3  2019-01-01 03:00:00     Wheat                Kolkata              129.14   
 4  2019-01-01 04:00:00     Wheat                  Delhi              100.83   
 
    Buffer Stock (Metric Tons) Market Intervention Decision Seasonality Factor  \
 0                        6002                      Release                Low   
 1                        4859                         Hold                Low   
 2                        2818                         Hold                Low   
 3                        2032                         Hold                Low   
 4                        4858                      Release                Low   
 
   Historical Trend Mark

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score

# Convert 'Date' to datetime
data['Date'] = pd.to_datetime(data['Date'])

# Encode categorical variables
le = LabelEncoder()
categorical_columns = ['Commodity', 'Price Reporting Centre', 'Market Intervention Decision', 
                       'Seasonality Factor', 'Historical Trend', 'Market Intelligence Input']
for col in categorical_columns:
    data[col] = le.fit_transform(data[col])

# Select features and target variable
X = data.drop(columns=['Price (INR per kg)', 'ARIMA Forecast Price (INR per kg)', 'Date'])
y = data['Price (INR per kg)']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the Random Forest model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)

# Make predictions
y_pred = model.predict(X_test_scaled)

# Evaluate the model
accuracy = r2_score(y_test, y_pred) * 100
accuracy

98.97182570307727

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import r2_score
# Convert 'Date' to datetime
data['Date'] = pd.to_datetime(data['Date'])

# Encode categorical variables
le = LabelEncoder()
categorical_columns = ['Commodity', 'Price Reporting Centre', 'Market Intervention Decision', 
                       'Seasonality Factor', 'Historical Trend', 'Market Intelligence Input']
for col in categorical_columns:
    data[col] = le.fit_transform(data[col])

# Select features and target variable
X = data.drop(columns=['Price (INR per kg)', 'ARIMA Forecast Price (INR per kg)', 'Date'])
y = data['Price (INR per kg)']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the SVR model
model = SVR(kernel='rbf', C=100, gamma=0.1, epsilon=0.1)
model.fit(X_train_scaled, y_train)

# Make predictions
y_pred = model.predict(X_test_scaled)

# Evaluate the model
accuracy = r2_score(y_test, y_pred) * 100
accuracy


5.657763470803367

In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score

# Load dataset
data = pd.read_csv('new_generated_food_commodities_data.csv')

# Convert 'Date' column to datetime format
data['Date'] = pd.to_datetime(data['Date'])

# Extract useful date features
data['Year'] = data['Date'].dt.year
data['Month'] = data['Date'].dt.month
data['Day'] = data['Date'].dt.day

# Encode categorical variables
le = LabelEncoder()
categorical_columns = ['Commodity', 'Price Reporting Centre', 'Market Intervention Decision', 
                       'Seasonality Factor', 'Historical Trend', 'Market Intelligence Input']
for col in categorical_columns:
    data[col] = le.fit_transform(data[col])

# Select features and target variable (Drop Date column)
X = data.drop(columns=['Date', 'Price (INR per kg)', 'ARIMA Forecast Price (INR per kg)'])
y = data['Price (INR per kg)']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features (optional for Decision Tree)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the Decision Tree model
model = DecisionTreeRegressor(random_state=42)
model.fit(X_train_scaled, y_train)

# Make predictions
y_pred = model.predict(X_test_scaled)

# Evaluate the model
accuracy = r2_score(y_test, y_pred) * 100
accuracy



93.53064853684765

In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestRegressor
import pickle

# Load the dataset
file_path = 'new_generated_food_commodities_data.csv'
data = pd.read_csv(file_path)

# Convert 'Date' to datetime
data['Date'] = pd.to_datetime(data['Date'])

# Encode categorical variables
le = LabelEncoder()
categorical_columns = ['Commodity', 'Price Reporting Centre', 'Market Intervention Decision', 
                       'Seasonality Factor', 'Historical Trend', 'Market Intelligence Input']
for col in categorical_columns:
    data[col] = le.fit_transform(data[col])

# Select features and target variable
X = data.drop(columns=['Price (INR per kg)', 'ARIMA Forecast Price (INR per kg)', 'Date'])
y = data['Price (INR per kg)']

# Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train the Random Forest model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_scaled, y)

# Save the model, LabelEncoder, and StandardScaler to .pkl files
with open('dtr.pkl', 'wb') as file:
    pickle.dump(model, file)

with open('label_encoder.pkl', 'wb') as le_file:
    pickle.dump(le, le_file)

with open('scaler.pkl', 'wb') as scaler_file:
    pickle.dump(scaler, scaler_file)