In [None]:
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeRegressor
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import joblib

# Load the dataset
df = pd.read_csv('crop_production_data.csv')


X = df.drop(columns=['Yield (quintals)'])
Y = df['Yield (quintals)']

# Identify categorical and numerical columns
categorical_cols = ['District', 'Crop', 'Season']
numerical_cols = X.select_dtypes(include=['int64', 'float64']).columns

# Create transformers
numerical_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(handle_unknown='ignore')

# Combine transformers
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_cols),
        ('cat', categorical_transformer, categorical_cols)
    ])

# Splitting data into train and test sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Creating and training Decision Tree model
dt_model = Pipeline(steps=[('preprocessor', preprocessor),
                           ('regressor', DecisionTreeRegressor())])
dt_model.fit(X_train, Y_train)

# Creating and training SVM model
svm_model = Pipeline(steps=[('preprocessor', preprocessor),
                            ('regressor', SVR())])
svm_model.fit(X_train, Y_train)

# Evaluating models on test set
dt_predictions = dt_model.predict(X_test)
svm_predictions = svm_model.predict(X_test)

dt_rmse = np.sqrt(mean_squared_error(Y_test, dt_predictions))
svm_rmse = np.sqrt(mean_squared_error(Y_test, svm_predictions))

print("Decision Tree RMSE:", dt_rmse)
print("SVM RMSE:", svm_rmse)


# Creating Function to predict yield and production using both models
def predict_yield_production(area, season, crop, district):
    # Creating input data for prediction
    input_data = pd.DataFrame({
        'Area (hectares)': [area],
        'Season': [season],
        'Crop': [crop],
        'District': [district]
    })

    # Predicting yield using Decision Tree
    dt_predicted_yield = dt_model.predict(input_data)[0]
    dt_predicted_production = dt_predicted_yield * area * 0.1 # Calculate Production

    # Predicting yield using SVM
    svm_predicted_yield = svm_model.predict(input_data)[0]
    svm_predicted_production = svm_predicted_yield * area * 0.1 # Calculate Production

    print("\nDecision Tree Predictions:")
    print(f"Predicted Yield (quintals): {dt_predicted_yield:.2f}")
    # Use calculated production instead of accessing non-existent column
    print(f"Predicted Production (metric tons): {dt_predicted_production:.2f}")

    print("\nSVM Predictions:")
    print(f"Predicted Yield (quintals): {svm_predicted_yield:.2f}")
    # Use calculated production instead of accessing non-existent column
    print(f"Predicted Production (metric tons): {svm_predicted_production:.2f}")

# Getting input from the user
area = float(input("Enter area in hectares: "))
season = input("Enter season (Kharif/Rabi): ")
crop = input("Enter crop name: ")
district = input("Enter district: ")

# Calling the prediction function
predict_yield_production(area, season, crop, district)

Decision Tree RMSE: 11.17689109779475
SVM RMSE: 8.243328393701578
Enter area in hectares: 20000
Enter season (Kharif/Rabi): Rabi
Enter crop name: Gram
Enter district: Udaipur

Decision Tree Predictions:
Predicted Yield (quintals): 30.31
Predicted Production (metric tons): 60619.63

SVM Predictions:
Predicted Yield (quintals): 36.76
Predicted Production (metric tons): 73518.06
