In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error
import joblib

In [2]:
data = pd.read_csv("crops_prices.csv")

In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12068 entries, 0 to 12067
Data columns (total 16 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   State              12068 non-null  object 
 1   District           12068 non-null  object 
 2   Market             12068 non-null  object 
 3   Commodity          12068 non-null  object 
 4   Grade              12068 non-null  object 
 5   Min_Price          12068 non-null  int64  
 6   Max_Price          12068 non-null  int64  
 7   Frequent_price     12068 non-null  int64  
 8   temperature        12068 non-null  int64  
 9   humidity           12068 non-null  int64  
 10  pressure           12068 non-null  int64  
 11  weather            12068 non-null  object 
 12  wind_speed         12068 non-null  float64
 13  Price_Range        12068 non-null  int64  
 14  Average_Price      12068 non-null  float64
 15  Weather_Condition  12068 non-null  object 
dtypes: float64(2), int64(7

In [4]:
# 2. One-Hot Encoding
data = pd.get_dummies(data, columns=['State', 'District', 'Market', 'Commodity', 'Grade', 'temperature', 'weather', 'Weather_Condition'], drop_first=True)

In [5]:
# 3. Features (X) and Target Variables (y_min, y_max)
X = data.drop(columns=['Min_Price', 'Max_Price'])  # Replace with your actual target column names
y_min = data['Min_Price']
y_max = data['Max_Price']

In [6]:
# 4. Scaling Features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [7]:
# 5. Train-Test Split for Min_Price and Max_Price
X_train, X_test, y_train_min, y_test_min = train_test_split(X_scaled, y_min, test_size=0.2, random_state=42)

# Split the data for Max_Price (using the same indices for consistency)
_, _, y_train_max, y_test_max = train_test_split(X_scaled, y_max, test_size=0.2, random_state=42)

In [8]:
# 6. Model Training
# Train the model for Min_Price
model_min = RandomForestRegressor(n_estimators=100, max_depth=10, random_state=42)
model_min.fit(X_train, y_train_min)

# Train the model for Max_Price
model_max = GradientBoostingRegressor(n_estimators=100, random_state=42)
model_max.fit(X_train, y_train_max)

In [9]:
# 7. Predictions
min_price_predictions = model_min.predict(X_test)
max_price_predictions = model_max.predict(X_test)

In [10]:
# 8. Model Evaluation
# R^2 score for Min_Price model
min_price_r2_score = model_min.score(X_test, y_test_min)
print(f'Min Price Model R^2 Score: {min_price_r2_score}')

# R^2 score for Max_Price model
max_price_r2_score = model_max.score(X_test, y_test_max)
print(f'Max Price Model R^2 Score: {max_price_r2_score}')

Min Price Model R^2 Score: 0.9989521879577627
Max Price Model R^2 Score: 0.9957025557745386


In [11]:
# Mean Squared Error (MSE)
min_price_mse = mean_squared_error(y_test_min, min_price_predictions)
max_price_mse = mean_squared_error(y_test_max, max_price_predictions)
print(f'Min Price Model Mean Squared Error: {min_price_mse}')
print(f'Max Price Model Mean Squared Error: {max_price_mse}')

Min Price Model Mean Squared Error: 2.1716207512181
Max Price Model Mean Squared Error: 11.134254816392215


In [12]:
import joblib

# Save the trained models
joblib.dump(model_min, 'model_min.pkl')
joblib.dump(model_max, 'model_max.pkl')

# Save the fitted scaler
joblib.dump(scaler, 'scaler.pkl')

['scaler.pkl']

In [13]:
from sklearn.metrics import mean_squared_error, r2_score


# Predictions for Min_Price on training and test data
y_train_min_pred = model_min.predict(X_train)
y_test_min_pred = model_min.predict(X_test)

# Predictions for Max_Price on training and test data
y_train_max_pred = model_max.predict(X_train)
y_test_max_pred = model_max.predict(X_test)

# Min_Price Model Performance
train_mse_min = mean_squared_error(y_train_min, y_train_min_pred)
train_r2_min = r2_score(y_train_min, y_train_min_pred)

test_mse_min = mean_squared_error(y_test_min, y_test_min_pred)
test_r2_min = r2_score(y_test_min, y_test_min_pred)

# Max_Price Model Performance
train_mse_max = mean_squared_error(y_train_max, y_train_max_pred)
train_r2_max = r2_score(y_train_max, y_train_max_pred)

test_mse_max = mean_squared_error(y_test_max, y_test_max_pred)
test_r2_max = r2_score(y_test_max, y_test_max_pred)

# Display Min_Price Model Results
print(f"Min_Price Model Training Performance:\nMean Squared Error (MSE): {train_mse_min}\nR² Score: {train_r2_min}")
print(f"Min_Price Model Test Performance:\nMean Squared Error (MSE): {test_mse_min}\nR² Score: {test_r2_min}\n")

# Display Max_Price Model Results
print(f"Max_Price Model Training Performance:\nMean Squared Error (MSE): {train_mse_max}\nR² Score: {train_r2_max}")
print(f"Max_Price Model Test Performance:\nMean Squared Error (MSE): {test_mse_max}\nR² Score: {test_r2_max}")


Min_Price Model Training Performance:
Mean Squared Error (MSE): 16.106644225831808
R² Score: 0.9960127672766522
Min_Price Model Test Performance:
Mean Squared Error (MSE): 2.1716207512181
R² Score: 0.9989521879577627

Max_Price Model Training Performance:
Mean Squared Error (MSE): 2.1589122118446067
R² Score: 0.9995824861918428
Max_Price Model Test Performance:
Mean Squared Error (MSE): 11.134254816392215
R² Score: 0.9957025557745386
