In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
import joblib

In [2]:
df = pd.read_csv("crops_prices.csv")

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12068 entries, 0 to 12067
Data columns (total 16 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   State              12068 non-null  object 
 1   District           12068 non-null  object 
 2   Market             12068 non-null  object 
 3   Commodity          12068 non-null  object 
 4   Grade              12068 non-null  object 
 5   Min_Price          12068 non-null  int64  
 6   Max_Price          12068 non-null  int64  
 7   Frequent_price     12068 non-null  int64  
 8   temperature        12068 non-null  int64  
 9   humidity           12068 non-null  int64  
 10  pressure           12068 non-null  int64  
 11  weather            12068 non-null  object 
 12  wind_speed         12068 non-null  float64
 13  Price_Range        12068 non-null  int64  
 14  Average_Price      12068 non-null  float64
 15  Weather_Condition  12068 non-null  object 
dtypes: float64(2), int64(7

In [4]:
# Features and target variable
features = ['State', 'District', 'Market', 'Commodity', 'Grade','temperature', 'weather', 'Weather_Condition']
target = 'Average_Price'

In [5]:
X = df[features]
y = df[target]

In [6]:
# Train-Test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [7]:
# Preprocessing: One-hot encoding for categorical variables
categorical_features = ['State', 'District', 'Market', 'Commodity', 'Grade', 'weather', 'Weather_Condition']
numerical_features = ['temperature']

In [8]:
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features),
        ('num', 'passthrough', numerical_features)
    ])

In [9]:
# Initialize the scaler
scaler = StandardScaler()

# Update the preprocessor to include scaling for numerical features
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features),
        ('num', Pipeline(steps=[('scaler', scaler)]), numerical_features)  # Apply scaling here
    ])

In [10]:
# Define and train the model (pipeline) using Random Forest
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(random_state=42))
])

In [11]:
# Train the model
model.fit(X_train, y_train)

In [12]:
# Predict on test data
y_pred = model.predict(X_test)

In [13]:
# Model evaluation: RMSE and R²
mse = mean_squared_error(y_test, y_pred)
rmse = mse ** 0.5
r2 = r2_score(y_test, y_pred)

print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"R² (R-squared): {r2}")

Root Mean Squared Error (RMSE): 21.31393770319462
R² (R-squared): 0.8015732484372855


In [14]:
# Save the model to a file
joblib.dump(model, 'average_price_model.pkl')
print("Model saved successfully!")

Model saved successfully!


In [15]:
# Load the saved model from file
loaded_model = joblib.load('average_price_model.pkl')

# Use the loaded model to make predictions
y_loaded_pred = loaded_model.predict(X_test)


In [16]:
# Evaluate the loaded model
loaded_rmse = mean_squared_error(y_test, y_loaded_pred) ** 0.5
loaded_r2 = r2_score(y_test, y_loaded_pred)

print(f"Loaded Model RMSE: {loaded_rmse}")
print(f"Loaded Model R²: {loaded_r2}")

Loaded Model RMSE: 21.31393770319462
Loaded Model R²: 0.8015732484372855


In [17]:
def predict_average_price_with_input(pipeline):
    # Getting user input for each feature
    state = input("Enter State: ")
    district = input("Enter District: ")
    market = input("Enter Market: ")
    commodity = input("Enter Commodity: ")
    grade = input("Enter Grade: ")
    temperature = int(input("Enter Temperature: "))
    weather = input("Enter Weather: ")
    weather_condition = input("Enter Weather Condition: ")
    
    # Create a dictionary with the feature names as keys and input values as data
    input_data = {
        'State': [state],
        'District': [district],
        'Market': [market],
        'Commodity': [commodity],
        'Grade': [grade],
        'temperature': [temperature],
        'weather': [weather],
        'Weather_Condition': [weather_condition]
    }
    
    # Convert to DataFrame
    input_df = pd.DataFrame(input_data)
    
    # Use the trained pipeline to make a prediction
    predicted_price = pipeline.predict(input_df)
    
    # Return the predicted price (as a float)
    return predicted_price[0]

# Define and train the model (pipeline) using Random Forest
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(random_state=42))
])

# Assuming you have your X_train and y_train defined elsewhere
# Train the pipeline on the training data
model.fit(X_train, y_train)

# Call the function with the trained model
predicted_price = predict_average_price_with_input(model)

print(f"Predicted Average Price: {predicted_price}")


Enter State:  Assam
Enter District:  Barpeta
Enter Market:   Barpeta Road
Enter Commodity:  Bottle gourd
Enter Grade:  Local
Enter Temperature:  30
Enter Weather:  clear sky
Enter Weather Condition:  Favorable


Predicted Average Price: 13.75
