In [24]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import joblib

# Load dataset
data = pd.read_csv('Crop Prediction dataset.csv')

# Handle missing values
data['Production'].fillna(data['Production'].median(), inplace=True)

# Apply log transformation to normalize scales
data['Area'] = np.log1p(data['Area'])
data['Production'] = np.log1p(data['Production'])

# Encode categorical variables
label_encoders = {}
for col in ['State_Name', 'District_Name', 'Season', 'Crop']:
    label_encoders[col] = LabelEncoder()
    data[col] = label_encoders[col].fit_transform(data[col])

# Select features and target
X = data[['Temperature', 'Humidity', 'Soil_Moisture', 'Area', 'Crop', 'State_Name', 'District_Name', 'Season']]
y = data['Production']

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train Random Forest Regressor
model = RandomForestRegressor(n_estimators=100, max_depth=15, random_state=42)
model.fit(X_train, y_train)

# Evaluate model
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'MAE: {mae}, MSE: {mse}, R2 Score: {r2}')

# Save the trained model
joblib.dump(model, 'crop_yield_prediction_model.pkl')

print("Updated model training complete and saved successfully!")


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['Production'].fillna(data['Production'].median(), inplace=True)


MAE: 0.35914682395305925, MSE: 0.3917764789501351, R2 Score: 0.9562752333905651
Updated model training complete and saved successfully!
