In [75]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from xgboost import XGBRegressor
from sklearn.ensemble import AdaBoostRegressor
import pickle


In [76]:
df = pd.read_csv("crop_price_prediction_dataset.csv")

In [77]:
df.head()

Unnamed: 0,Date,City,Temperature,Crop_Type,Season,Max_Price,Min_Price,Modal_Price
0,2023-01-01,Pune,33.38,Wheat,Winter,1876.46,968.74,1422.6
1,2023-01-02,Pune,27.03,Wheat,Winter,1044.94,507.98,776.46
2,2023-01-03,Pune,28.78,Wheat,Winter,2158.02,1263.65,1710.835
3,2023-01-04,Pune,30.16,Wheat,Winter,1959.33,1388.86,1674.095
4,2023-01-05,Pune,19.49,Wheat,Winter,1057.27,537.91,797.59


In [95]:
df.tail()

Unnamed: 0,Date,City,Temperature,Crop_Type,Season,Max_Price,Min_Price,Modal_Price
17515,2023-12-27,Nagpur,18.76,Pulses,Winter,2554.0,581.39,1567.695
17516,2023-12-28,Nagpur,38.3,Pulses,Winter,4363.58,2103.59,3233.585
17517,2023-12-29,Nagpur,28.25,Pulses,Winter,3432.83,2488.8,2960.815
17518,2023-12-30,Nagpur,26.42,Pulses,Winter,1024.69,517.48,771.085
17519,2023-12-31,Nagpur,36.04,Pulses,Winter,1518.88,761.66,1140.27


In [98]:
rows = len(df.axes[0])
print("Number of Rows: ", rows)

Number of Rows:  17520


In [99]:
cols = len(df.axes[1])
print("Number of Columns: ", cols)

Number of Columns:  8


In [78]:
def clean_data(df):
    # Handle missing values by forward filling
    df = df.ffill()  # Use forward fill method
    
    # Handle outliers (capping values at 5th and 95th percentiles for 'Temperature')
    df['Temperature'] = df['Temperature'].clip(lower=df['Temperature'].quantile(0.05),
                                                upper=df['Temperature'].quantile(0.95))
    
    return df

In [53]:
def preprocess_data(df):
    # Clean the data
    df = clean_data(df)
    
    # Encode categorical variables (Crop_Type, City, Season)
    label_encoders = {}
    for column in ['Crop_Type', 'City', 'Season']:
        label_encoders[column] = LabelEncoder()
        df[column] = label_encoders[column].fit_transform(df[column])
    
    # Feature scaling for numerical columns
    scaler = StandardScaler()
    df[['Temperature']] = scaler.fit_transform(df[['Temperature']])
    
    # Define features and target variable (Modal_Price for example)
    X = df[['Crop_Type', 'City', 'Season', 'Temperature']]
    y = df['Modal_Price']  # Change this to 'Max_Price' or 'Min_Price' if required
    
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    return X_train, X_test, y_train, y_test, label_encoders, scaler

In [79]:
# Train the model
X_train, X_test, y_train, y_test, label_encoders, scaler = preprocess_data(df)
xgb_model = XGBRegressor(learning_rate=0.01, max_depth=3, n_estimators=100)
xgb_model.fit(X_train, y_train)

In [80]:
def price_prediction(crop_type, city, season, temperature, label_encoders, scaler, xgb_model, ada_model):
    # Encode user inputs
    encoded_crop = label_encoders['Crop_Type'].transform([crop_type])[0]
    encoded_city = label_encoders['City'].transform([city])[0]
    encoded_season = label_encoders['Season'].transform([season])[0]
    
    # Create a DataFrame for prediction
    user_input = pd.DataFrame([[encoded_crop, encoded_city, encoded_season, temperature]], 
                              columns=['Crop_Type', 'City', 'Season', 'Temperature'])
    
    # Scale temperature
    user_input[['Temperature']] = scaler.transform(user_input[['Temperature']])
    
    # Predict with both models
    xgb_prediction = xgb_model.predict(user_input)[0]
    ada_prediction = ada_model.predict(user_input)[0]
    
    # Return the maximum of the two predictions
    return max(xgb_prediction, ada_prediction)

In [81]:
X_train, X_test, y_train, y_test, label_encoders, scaler = preprocess_data(df)

In [82]:
 xgb_model, ada_model = train_models(X_train, y_train)

In [94]:
with open('xgb_model.pkl', 'wb') as model_file:
    pickle.dump(xgb_model, model_file)

with open('label_encoders.pkl', 'wb') as le_file:
    pickle.dump(label_encoders, le_file)

with open('scaler.pkl', 'wb') as scaler_file:
    pickle.dump(scaler, scaler_file)

print("Model, label encoders, and scaler saved successfully.")

Model, label encoders, and scaler saved successfully.


In [83]:
 price_prediction('Wheat', 'Mumbai', 'Winter', 25, label_encoders, scaler, xgb_model, ada_model)

np.float64(2339.0435850465187)

In [84]:
 price_prediction('Wheat', 'Nashik', 'Summer', 25, label_encoders, scaler, xgb_model, ada_model)

np.float64(2257.0856518602995)

In [85]:
 price_prediction('Rice', 'Pune', 'Winter', 25, label_encoders, scaler, xgb_model, ada_model)

np.float64(2283.9418381344344)

In [86]:
 price_prediction('Barley', 'Pune', 'Winter', 25, label_encoders, scaler, xgb_model, ada_model)

np.float64(2283.9418381344344)

In [87]:
 price_prediction('Pulses', 'Nagpur', 'Winter', 25, label_encoders, scaler, xgb_model, ada_model)

np.float64(2301.0527933409926)

In [88]:
 price_prediction('Pulses', 'Nagpur', 'Monsoon', 25, label_encoders, scaler, xgb_model, ada_model)

np.float64(2312.761727779998)

In [89]:
 price_prediction('Pulses', 'Nagpur', 'Summer', 25, label_encoders, scaler, xgb_model, ada_model)

np.float64(2312.761727779998)

In [90]:
 price_prediction('Pulses', 'Nagpur', 'Winter', 26, label_encoders, scaler, xgb_model, ada_model)

np.float32(2403.4346)

In [91]:
 price_prediction('Pulses', 'Nagpur', 'Winter', 20, label_encoders, scaler, xgb_model, ada_model)

np.float64(2283.314186295499)