In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
import joblib

# Load the dataset
file_path = "Crop_recommendation.csv"
farm = pd.read_csv(file_path)
farm.sample(15)


In [None]:
# Display basic dataset information
print("Dataset Info:\n", farm.info())
print("\nFirst 5 Rows:\n", farm.head())



In [None]:
# Check for missing values
print("\nMissing Values:\n", farm.isnull().sum())



In [None]:
# Summary statistics
desc_stats = farm.describe()
print("\nSummary Statistics:\n", desc_stats)
desc_stats.to_csv("descriptive_statistics.csv")




In [None]:
farm.label.value_counts()

In [None]:
# Set Seaborn style
sns.set_style("whitegrid")



In [None]:
# --- Visualization ---

# 1. Distribution of each numerical column
farm.hist(figsize=(12, 8), bins=20, edgecolor='black')
plt.suptitle("Distribution of Numerical Features", fontsize=14)
plt.show()



In [None]:
# 2. Correlation heatmap
# Remove non-numeric columns for correlation analysis
numeric_df = farm.select_dtypes(include=['number'])

# Compute the correlation matrix
corr_matrix = numeric_df.corr()

# Plot the correlation heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt='.2f', linewidths=0.5)
plt.title('Correlation Heatmap')
plt.show()



In [None]:
# Pairplot to visualize relationships
sns.pairplot(farm, hue='label', diag_kind='kde')
plt.legend(title='Crops', bbox_to_anchor=(1.05, 5), loc='upper left')
plt.show()


In [None]:
plt.figure(figsize=(12, 6))
sns.boxplot(data=farm.drop(columns=['label']))
plt.xticks(rotation=45)
plt.title('Nutrient Distribution Boxplot')
plt.show()

In [None]:
# Countplot of different crop labels
plt.figure(figsize=(12, 6))
sns.countplot(y=farm['label'], order=farm['label'].value_counts().index, palette='viridis')
plt.title('Crop Distribution')
plt.xlabel('Count')
plt.ylabel('Crop Type')

plt.show()



In [None]:
# Scatter plot of key nutrient relationships
plt.figure(figsize=(8, 6))
sns.scatterplot(x=farm['N'], y=farm['P'], hue=farm['label'], palette='deep')
plt.title('Nitrogen vs Phosphorus')
plt.xlabel('Nitrogen')
plt.ylabel('Phosphorus')
plt.legend(title='Crops', bbox_to_anchor=(1.05, 1), loc='upper left')

plt.show()

In [None]:
plt.figure(figsize=(8, 6))
sns.scatterplot(x=farm['K'], y=farm['ph'], hue=farm['label'], palette='coolwarm')
plt.title('Potassium vs pH')
plt.xlabel('Potassium')
plt.ylabel('pH Level')
plt.legend(title='Crops', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.show()


In [None]:
# Scatter plot of Temperature vs Humidity
plt.figure(figsize=(8, 6))
sns.scatterplot(x=farm['temperature'], y=farm['humidity'], hue=farm['label'], palette='coolwarm')
plt.title('Temperature vs Humidity')
plt.xlabel('Temperature (°C)')
plt.ylabel('Humidity (%)')
plt.legend(title='Crops', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.show()

In [None]:
# Scatter plot of Temperature vs Rainfall
plt.figure(figsize=(8, 6))
sns.scatterplot(x=farm['temperature'], y=farm['rainfall'], hue=farm['label'], palette='viridis')
plt.title('Temperature vs Rainfall')
plt.xlabel('Temperature (°C)')
plt.ylabel('Rainfall (mm)')
plt.legend(title='Crops', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.show()

In [None]:
# Scatter plot of Humidity vs Rainfall
plt.figure(figsize=(8, 6))
sns.scatterplot(x=farm['humidity'], y=farm['rainfall'], hue=farm['label'], palette='plasma')
plt.title('Humidity vs Rainfall')
plt.xlabel('Humidity (%)')
plt.ylabel('Rainfall (mm)')
plt.legend(title='Crops', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.show()

In [None]:
# Bar plot comparing average temperature, humidity, and rainfall by crop label
avg_values = farm.groupby('label')[['temperature', 'humidity', 'rainfall']].mean().reset_index()
plt.figure(figsize=(12, 6))
melted_avg = avg_values.melt(id_vars='label', var_name='Factor', value_name='Value')
sns.barplot(x='label', y='Value', hue='Factor', data=melted_avg, palette='coolwarm')
plt.xticks(rotation=90)
plt.title('Comparison of Temperature, Humidity, and Rainfall by Crop')
plt.xlabel('Crop Label')
plt.ylabel('Average Value')
plt.legend(title='Factor', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.show()

In [None]:
# Boxplot to compare temperature distribution among different crops
plt.figure(figsize=(12, 6))
sns.boxplot(x='label', y='temperature', data=farm, palette='coolwarm')
plt.xticks(rotation=90)
plt.title('Temperature Distribution Across Different Crops')
plt.xlabel('Crop Label')
plt.ylabel('Temperature (°C)')
plt.show()

In [None]:
# Boxplot to compare humidity distribution among different crops
plt.figure(figsize=(12, 6))
sns.boxplot(x='label', y='humidity', data=farm, palette='viridis')
plt.xticks(rotation=90)
plt.title('Humidity Distribution Across Different Crops')
plt.xlabel('Crop Label')
plt.ylabel('Humidity (%)')
plt.show()

In [None]:
# Boxplot to compare rainfall distribution among different crops
plt.figure(figsize=(12, 6))
sns.boxplot(x='label', y='rainfall', data=farm, palette='plasma')
plt.xticks(rotation=90)
plt.title('Rainfall Distribution Across Different Crops')
plt.xlabel('Crop Label')
plt.ylabel('Rainfall (mm)')
plt.show()

In [None]:
from platform import python_version

print(python_version())