In [1]:
# Step 1: Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import pickle

In [3]:
# Step 2: Load your dataset
# Replace 'your_dataset.csv' with the path to your dataset file
df = pd.read_csv('pesticides dataset.csv')

In [12]:
print(df.isnull().sum())

Pesticides Name      0
Symptoms             2
Cronical Diseases    2
dtype: int64


In [13]:
df['Pesticides Name'].fillna('Unknown', inplace=True)  # Replace NaN with 'Unknown'
df['Symptoms'].fillna(df['Symptoms'].mode()[0], inplace=True)  # Replace NaN with the mode
df['Cronical Diseases'].fillna(df['Cronical Diseases'].mode()[0], inplace=True)  # Replace NaN with the mode


In [14]:
df

Unnamed: 0,Pesticides Name,Symptoms,Cronical Diseases
0,Acephate,"nausea, dizziness, confusion and at very high ...","Organophosphate poisoning, affecting the nervo..."
1,Imidacloprid,"fatigue, headache, muscle weakness, and eye an...",Neurotoxicity due to nicotinic acetylcholine r...
2,Azadirachtin,"skin irritation, Eyes irritation, Gastrointest...","Liver damage, Kidney damage"
3,Chlorpyrifos,"runny nose, tears, and increased saliva or dr...","Seizures, Coma, Organ damage, Death"
4,Lambda-cyhalothrin,"Skin tingling, burning, and prickling feelings",coagulation necrosis (acute) and granulomatous...
...,...,...,...
999,Emamectin Benzoate 1.5% +\n Fipronil 3.5% SCÃ‚Â,"Nausea, headaches, dizziness, skin irritation",Neurotoxic effects
1000,Tebuconazol e 6.7% +\n Captan 26.9% w/w SC,"Skin irritation, respiratory discomfort, heada...",otential liver damage and allergic reactions
1001,Pyriproxyfen 10% EC,"Mild skin irritation, headaches.",Generally low toxicity
1002,Pyriproxifen 5% +\n Diafenthiuro n 25% SE,"Skin irritation, nausea",Low systemic toxicity


In [15]:
print(df.isnull().sum())

Pesticides Name      0
Symptoms             0
Cronical Diseases    0
dtype: int64


In [16]:
# Step 3: Inspect the dataset (Optional)
print("Dataset preview:")
print(df.head())
print("\nDataset info:")
print(df.info())

Dataset preview:
      Pesticides Name                                           Symptoms  \
0            Acephate  nausea, dizziness, confusion and at very high ...   
1        Imidacloprid  fatigue, headache, muscle weakness, and eye an...   
2        Azadirachtin  skin irritation, Eyes irritation, Gastrointest...   
3        Chlorpyrifos   runny nose, tears, and increased saliva or dr...   
4  Lambda-cyhalothrin     Skin tingling, burning, and prickling feelings   

                                   Cronical Diseases  
0  Organophosphate poisoning, affecting the nervo...  
1  Neurotoxicity due to nicotinic acetylcholine r...  
2                        Liver damage, Kidney damage  
3                Seizures, Coma, Organ damage, Death  
4  coagulation necrosis (acute) and granulomatous...  

Dataset info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1004 entries, 0 to 1003
Data columns (total 3 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             ----

In [17]:
# Step 4: Preprocess the data
# Ensure column names match exactly (remove leading/trailing spaces if necessary)
df.columns = df.columns.str.strip()

In [18]:

# Define input (X) and output (y) variables
X = df[['Pesticides Name']]  # Input variable
y_symptoms = df['Symptoms']  # Output variable 1
y_disease = df['Cronical Diseases']  # Output variable 2


In [19]:
# If the input variable needs encoding (e.g., if it's categorical):
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
X = X.apply(le.fit_transform)  # Apply LabelEncoder to transform Pesticides Name

In [20]:
# Step 5: Split the dataset into training and testing sets
X_train, X_test, y_train_symptoms, y_test_symptoms = train_test_split(X, y_symptoms, test_size=0.2, random_state=42)
X_train, X_test, y_train_disease, y_test_disease = train_test_split(X, y_disease, test_size=0.2, random_state=42)

In [21]:
# Step 6: Train a RandomForestClassifier for Symptoms prediction
model_symptoms = RandomForestClassifier(random_state=42)
model_symptoms.fit(X_train, y_train_symptoms)

In [23]:
# Step 7: Train another RandomForestClassifier for Cronical Diseases prediction
model_disease = RandomForestClassifier(random_state=42)
model_disease.fit(X_train, y_train_disease)


In [24]:
# Step 8: Evaluate the Symptoms model
y_pred_symptoms = model_symptoms.predict(X_test)
accuracy_symptoms = accuracy_score(y_test_symptoms, y_pred_symptoms)
print(f"Symptoms Model Accuracy: {accuracy_symptoms:.2f}")

Symptoms Model Accuracy: 0.05


In [25]:
# Step 9: Evaluate the Cronical Diseases model
y_pred_disease = model_disease.predict(X_test)
accuracy_disease = accuracy_score(y_test_disease, y_pred_disease)
print(f"Disease Model Accuracy: {accuracy_disease:.2f}")

Disease Model Accuracy: 0.05


In [26]:
# step 10
with open('model_symptoms.pkl', 'wb') as file:
    pickle.dump(model_symptoms, file)

with open('model_disease.pkl', 'wb') as file:
    pickle.dump(model_disease, file)

print("Models saved successfully as 'model_symptoms.pkl' and 'model_disease.pkl'")

Models saved successfully as 'model_symptoms.pkl' and 'model_disease.pkl'
