# Crop Recommendation System: EDA and Baseline Model

## 1. Import Libraries

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import joblib
import seaborn as sns
import matplotlib.pyplot as plt

## 2. Load the Dataset

In [None]:
df = pd.read_csv('data/Crop_recommendation.csv')
df.head()

## 3. Exploratory Data Analysis (EDA)

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
# Check for missing values
df.isnull().sum()

In [None]:
# Check the distribution of the target variable
plt.figure(figsize=(15, 6))
sns.countplot(y='label', data=df)
plt.title('Distribution of Crops')
plt.show()

## 4. Data Preprocessing

In [None]:
X = df.drop('label', axis=1)
y = df['label']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## 5. Train the Baseline Model (Random Forest)

In [None]:
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

## 6. Evaluate the Model

In [None]:
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

## 7. Save the Model

In [None]:
joblib.dump(model, 'models/baseline_random_forest.joblib')