In [16]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
import numpy as np

# Load the data
data = pd.read_csv('./pro_crop_data_final.csv')

# Check the first few rows to ensure the data is loaded correctly
print(data.head())

# Ensure the column names are as expected
print(data.columns)

# Define the feature columns and target column
feature_cols = ['crop_temp', 'crop_humidity', 'crop_ph', 'crop_nitrogen', 'crop_phosphorus', 
                'crop_potassium', 'crop_calcium', 'crop_magnesium', 'crop_zinc', 'crop_sodium', 
                'crop_ec', 'crop_light']
target_col = 'crop_name'

# Check if all the required columns are present in the dataset
missing_cols = set(feature_cols + [target_col]) - set(data.columns)
if missing_cols:
    raise ValueError(f"Missing columns in the dataset: {missing_cols}")

# Convert categorical features to numerical values using one-hot encoding
data = pd.get_dummies(data, columns=['crop_growth_stage', 'crop_type'])

# Define the feature matrix and target vector
X = data[feature_cols]
y = data[target_col]

# Train the Decision Tree model
model = DecisionTreeClassifier(random_state=42)
model.fit(X, y)

# Evaluate the model on the same training data
y_pred = model.predict(X)
accuracy = accuracy_score(y, y_pred)
print(f'Accuracy: {accuracy}')

# Function to predict the top 5 crops
def predict_top_5_crops(model, input_data):
    input_df = pd.DataFrame([input_data])
    probas = model.predict_proba(input_df)
    top_5_indices = np.argsort(probas[0])[-5:][::-1]
    top_5_crops = model.classes_[top_5_indices]
    return top_5_crops

# Sample input data
input_data = {
    'crop_temp': 55.0,
    'crop_humidity': 240.0,
    'crop_ph': 6,
    'crop_nitrogen': 50.0,
    'crop_phosphorus': 23.0,
    'crop_potassium': 13.5,
    'crop_calcium': 1.5,
    'crop_magnesium': 0.25,
    'crop_zinc': 0.2,
    'crop_sodium': 0.4,
    'crop_ec': 1.3,
    'crop_light': 400.0
}

# Predict the top 5 crops
top_5_crops = predict_top_5_crops(model, input_data)
print('Top 5 crops that can be grown in hydroponics system:', top_5_crops)


   crop_id          crop_name crop_growth_stage     crop_type  crop_temp  \
0        0           aloevera         flowering  multipurpose     17.053   
1        1           aloevera        harvesting  multipurpose     17.565   
2        2           aloevera           seeding  multipurpose     17.688   
3        3           aloevera        vegetative  multipurpose     17.710   
4        4  armeniancucumbers         flowering    fruitables     17.724   

   crop_humidity  crop_ph  crop_nitrogen  crop_phosphorus  crop_potassium  \
0        219.327      3.5         30.213           13.128           5.222   
1        226.295      3.5         30.326           13.198           5.288   
2        231.111      3.5         30.344           13.266           5.318   
3        238.016      3.5         30.353           13.305           5.466   
4        239.391      3.5         31.431           13.406           5.589   

   ...  crop_boron  crop_iron  crop_zinc  crop_manganese  crop_molybdenum  \
0  