## Crop and Fertilizer Recommendation System using ML

In [47]:
# Importing necessary libraries with error handling
try:
    import numpy as np  # For numerical computations
    import pandas as pd  # For data manipulation
    import matplotlib.pyplot as plt  # For data visualization
    import seaborn as sns  # For advanced statistical visualization
    
    plt.ion()  # Enable interactive mode for Matplotlib
    
except ImportError as e:
    missing_lib = str(e).split(" ")[2]  # Extract the name of the missing library
    print(f"{missing_lib.capitalize()} is not installed. Please install the required libraries.")
    raise ImportError(f"{missing_lib} is required for this code to run.")


In [61]:
import os  #  Importing OS Module  To check if the file exists

# Defining  the file path
file_path = "Crop_recommendation.csv"

# Check if the file exists
if not file_path.endswith('.csv'):
    print(f"Error: The file '{file_path}' is not a CSV file.")
if not os.path.exists(file_path):
    print(f"Error: The file '{file_path}' does not exist.")
    crop = None
else:
    try:
        # Load the dataset into a pandas DataFrame
        crop = pd.read_csv(file_path)
        print("Dataset loaded successfully.")
    except pd.errors.EmptyDataError:
        print(f"Error: The file '{file_path}' is empty.")
        crop = None
    except pd.errors.ParserError:
        print(f"Error: Failed to parse the file '{file_path}'. Please check the file format.")
        crop = None


Dataset loaded successfully.


In [51]:
# checking the 'crop' DataFrame is none or empty
if crop is None or crop.empty:
    # If the dataframe is either None or Empty, print an error message
    print("Error: The dataset could not be loaded or is empty.")
else:
    # If the DataFrame is loaded and not empty, display the first ten rows of the dataset
    print("First ten (10) rows of the dataset:")
    print(crop.head(10))
    # Print a separator line before displaying the last ten rows
    print("\nLast ten (10) rows of the dataset:")
    print(crop.tail(10))


First ten (10) rows of the dataset:
    N   P   K  temperature   humidity        ph    rainfall label
0  90  42  43    20.879744  82.002744  6.502985  202.935536  rice
1  85  58  41    21.770462  80.319644  7.038096  226.655537  rice
2  60  55  44    23.004459  82.320763  7.840207  263.964248  rice
3  74  35  40    26.491096  80.158363  6.980401  242.864034  rice
4  78  42  42    20.130175  81.604873  7.628473  262.717340  rice
5  69  37  42    23.058049  83.370118  7.073454  251.055000  rice
6  69  55  38    22.708838  82.639414  5.700806  271.324860  rice
7  94  53  40    20.277744  82.894086  5.718627  241.974195  rice
8  89  54  38    24.515881  83.535216  6.685346  230.446236  rice
9  68  58  38    23.223974  83.033227  6.336254  221.209196  rice

Last ten (10) rows of the dataset:
        N   P   K  temperature   humidity        ph    rainfall   label
2190  103  40  30    27.309018  55.196224  6.348316  141.483164  coffee
2191  118  31  34    27.548230  62.881792  6.123796  181.4

In [52]:

if crop.shape[0] == 0: # Displays the number of rows in the DataFrame.
     print("The dataset has no rows .Please check the data source.")
elif crop.shape[1] == 0: # Displays the number of columns in the DataFrame.
    print("The dataset has no columns.Please check the file format.")
else: # Displays both the number of rows as well as columns in the DataFrame.
    print(f"The dataset contains {crop.shape[0]} rows and {crop.shape[1]} columns.")


The dataset contains 2200 rows and 8 columns.


In [55]:
# Displaying Basic information about the Dataset.
if crop is not None and not crop.empty:
    print("Displaying basic information about the dataset:")
    crop.info()
    # Showing  the number of missing values in each column.
    print("\nMissing values per column:")
    print(crop.isnull().sum())
    
    
else:
    print("The dataset is empty or not loaded.")


Displaying basic information about the dataset:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2200 entries, 0 to 2199
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   N            2200 non-null   int64  
 1   P            2200 non-null   int64  
 2   K            2200 non-null   int64  
 3   temperature  2200 non-null   float64
 4   humidity     2200 non-null   float64
 5   ph           2200 non-null   float64
 6   rainfall     2200 non-null   float64
 7   label        2200 non-null   object 
dtypes: float64(4), int64(3), object(1)
memory usage: 137.6+ KB

Missing values per column:
N              0
P              0
K              0
temperature    0
humidity       0
ph             0
rainfall       0
label          0
dtype: int64


In [56]:
# Checking for the number of duplicated rows in the dataset
duplicates = crop.duplicated().sum()
print(f"Number of duplicated rows: {duplicates}")

# Removing duplicate rows if present.
if duplicates > 0:
    print("Removing duplicate rows...")
    crop = crop.drop_duplicates()  # Remove duplicates
    print(f"Duplicates removed. The dataset now has {crop.shape[0]} rows.")
else:
    print("No duplicates found in the dataset.")


Number of duplicated rows: 0
No duplicates found in the dataset.


In [59]:
# Summary statistics for numerical and categorical columns
print(crop.describe())


                 N            P            K  temperature     humidity  \
count  2200.000000  2200.000000  2200.000000  2200.000000  2200.000000   
mean     50.551818    53.362727    48.149091    25.616244    71.481779   
std      36.917334    32.985883    50.647931     5.063749    22.263812   
min       0.000000     5.000000     5.000000     8.825675    14.258040   
25%      21.000000    28.000000    20.000000    22.769375    60.261953   
50%      37.000000    51.000000    32.000000    25.598693    80.473146   
75%      84.250000    68.000000    49.000000    28.561654    89.948771   
max     140.000000   145.000000   205.000000    43.675493    99.981876   

                ph     rainfall  
count  2200.000000  2200.000000  
mean      6.469480   103.463655  
std       0.773938    54.958389  
min       3.504752    20.211267  
25%       5.971693    64.551686  
50%       6.425045    94.867624  
75%       6.923643   124.267508  
max       9.935091   298.560117  


In [60]:
# Displaying column names in a list format.
print("column names in the dataset:")

print(list(crop.columns))


column names in the dataset:
['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall', 'label']


In [24]:
# checking  the target distribution
crop['label'].value_counts()

label
rice           100
maize          100
chickpea       100
kidneybeans    100
pigeonpeas     100
mothbeans      100
mungbean       100
blackgram      100
lentil         100
pomegranate    100
banana         100
mango          100
grapes         100
watermelon     100
muskmelon      100
apple          100
orange         100
papaya         100
coconut        100
cotton         100
jute           100
coffee         100
Name: count, dtype: int64