In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# Importing required libraries
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
import seaborn as sns
from sklearn.metrics import f1_score

In [None]:
# Load the dataset
crops = pd.read_csv("/kaggle/input/crop-recommendation/Crop_recommendation.csv")
crops.head()

In [None]:
# Check for missing values
crops.isna().sum()

In [None]:
# Remove specified columns

# Find columns with all null values
empty_columns = crops.columns[crops.isnull().all()]

# Filter columns to remove by names containing 'Unnamed'
columns_to_remove = [col for col in empty_columns if 'Unnamed' in col]

# Remove columns with all null values and containing 'Unnamed' in the name
crops.drop(columns=columns_to_remove, inplace=True)

In [None]:
# Check how many crops we have, i.e., multi-class target
crops.label.unique()

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    crops[['Nitrogen', 'phosphorus', 'potassium', 'temperature', 'humidity', 'ph', 'rainfall']],
    crops["label"],
    test_size=0.2,
    random_state=42
)

In [None]:
# Train a logistic regression model for each feature
for feature in ['Nitrogen', 'phosphorus', 'potassium', 'temperature', 'humidity', 'ph', 'rainfall']:
    log_reg = LogisticRegression(
        max_iter=2000,
        multi_class="multinomial",
    )
    log_reg.fit(X_train[[feature]], y_train)
    y_pred = log_reg.predict(X_test[[feature]])
    f1 = f1_score(y_test, y_pred, average="weighted")
    print(f"F1-score for {feature}: {f1}")

In [None]:
# Calculate the correlation matrix
crops_corr = crops[['Nitrogen', 'phosphorus', 'potassium', 'temperature', 'humidity', 'ph', 'rainfall']].corr()

# Create a heatmap using seaborn
sns.heatmap(crops_corr, annot=True)
plt.show()

In [None]:
# Select the final features for the model
final_features = ["potassium", "humidity", "rainfall"]

# Split the data with the final features
X_train, X_test, y_train, y_test = train_test_split(
    crops[final_features],
    crops["label"],
    test_size=0.2,
    random_state=42
)

In [None]:
# Train a new model and evaluate performance
log_reg = LogisticRegression(
    max_iter=2000, 
    multi_class="multinomial"
)
log_reg.fit(X_train, y_train)
y_pred = log_reg.predict(X_test)
model_performance = f1_score(y_test, y_pred, average="weighted")

model_performance