In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.pipeline import Pipeline

import warnings 
  
print('Hello') 
  
# Settings the warnings to be ignored 
warnings.filterwarnings('ignore') 

Hello


In [2]:
df = pd.read_csv("Crop_recommendation.csv")

# Train Test Split

In [3]:
from sklearn.model_selection import train_test_split

In [4]:
x_train, x_test, y_train, y_test = train_test_split(
    df.drop(columns='label'),
    df['label'],
    test_size=0.2
)

In [5]:
np.save('X_test.npy', x_test)
np.save('y_test.npy', y_test)

# column Transformers

In [6]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import accuracy_score

In [7]:
trf1 = ColumnTransformer(transformers=[
    ('standarisation', StandardScaler(with_mean=False), [0,6]),
],
    remainder='passthrough'
)

In [8]:
trf2 = ColumnTransformer(transformers=[
    ('ohe_label', OneHotEncoder(sparse_output=False, handle_unknown='ignore'), [-1])
], remainder='passthrough')

# importing models

In [9]:
from sklearn.neighbors import KNeighborsClassifier  # k-Nearest Neighbors (KNN)
from sklearn.svm import SVC  # Support Vector Machine (SVM)
from sklearn.tree import DecisionTreeClassifier  # Decision Tree Classifier
from sklearn.linear_model import LogisticRegression  # Logistic Regression
from sklearn.naive_bayes import GaussianNB  # Naive Bayes
from sklearn.ensemble import GradientBoostingClassifier  # Gradient Boosting Machine (GBM)
from sklearn.ensemble import RandomForestClassifier  # Random Forest Classifier


In [10]:
models_dict = {
    'KNeighborsClassifier': KNeighborsClassifier(),
    'SVC': SVC(),
    'DecisionTreeClassifier': DecisionTreeClassifier(),
    'LogisticRegression': LogisticRegression(),
    'GaussianNB': GaussianNB(),
    'GradientBoostingClassifier': GradientBoostingClassifier(),
    'RandomForestClassifier': RandomForestClassifier()
}


In [11]:
models_dict.keys()

dict_keys(['KNeighborsClassifier', 'SVC', 'DecisionTreeClassifier', 'LogisticRegression', 'GaussianNB', 'GradientBoostingClassifier', 'RandomForestClassifier'])

In [12]:
pipelines = {
    'KNeighborsClassifier': Pipeline([
        ('scaler', trf1) , # StandardScaler as a preprocessing step
        ('model', KNeighborsClassifier())
    ]),
    'SVC': Pipeline([
        ('scaler', trf1),
        ('model', SVC())
    ]),
    'DecisionTreeClassifier': Pipeline([
        # No need to scale for Decision Tree, so skipping the scaler
        ('encode', trf2),
        ('model', DecisionTreeClassifier())
    ]),
    'LogisticRegression': Pipeline([
        ('scaler', StandardScaler()),
        ('encode', trf2),
        ('model', LogisticRegression())
    ]),
    'GaussianNB': Pipeline([
        # Naive Bayes does not require scaling, skipping the scaler
        ('encode', trf2),
        ('model', GaussianNB())
    ]),
    'GradientBoostingClassifier': Pipeline([
        # Skipping scaler for GBM as it doesn't need feature scaling
        ('encode', trf2),
        ('model', GradientBoostingClassifier())
    ]),
    'RandomForestClassifier': Pipeline([
        # Skipping scaler for Random Forest as it's not necessary
        ('encode', trf2),
        ('model', RandomForestClassifier())
    ])
}

# Optionally, print the pipelines to check
for model_name, pipeline in pipelines.items():
    print(f"{model_name} pipeline:", pipeline)

KNeighborsClassifier pipeline: Pipeline(steps=[('scaler',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('standarisation',
                                                  StandardScaler(with_mean=False),
                                                  [0, 6])])),
                ('model', KNeighborsClassifier())])
SVC pipeline: Pipeline(steps=[('scaler',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('standarisation',
                                                  StandardScaler(with_mean=False),
                                                  [0, 6])])),
                ('model', SVC())])
DecisionTreeClassifier pipeline: Pipeline(steps=[('encode',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('ohe_label',
                                                  OneHotEncoder(handle_unknown='ignor

In [14]:
for name, model in pipelines.items():
    model.fit(x_train.values, y_train)
    y_pred = model.predict(x_test)
    score = accuracy_score(y_test, y_pred)
    print(f"{name} mode with accuracy: {score}")

KNeighborsClassifier mode with accuracy: 0.9136363636363637
SVC mode with accuracy: 0.8772727272727273
DecisionTreeClassifier mode with accuracy: 0.9431818181818182
LogisticRegression mode with accuracy: 0.9159090909090909
GaussianNB mode with accuracy: 0.5613636363636364
GradientBoostingClassifier mode with accuracy: 0.9545454545454546
RandomForestClassifier mode with accuracy: 0.9727272727272728


In [15]:
import pickle
print("Hello")
for model_name, pipeline in pipelines.items():
    print(f'"{model_name}" : "models/{model_name}.pkl",')
    file = pickle.dump(pipeline, open(f'models/{model_name}.pkl', 'wb'))

Hello
"KNeighborsClassifier" : "models/KNeighborsClassifier.pkl",
"SVC" : "models/SVC.pkl",
"DecisionTreeClassifier" : "models/DecisionTreeClassifier.pkl",
"LogisticRegression" : "models/LogisticRegression.pkl",
"GaussianNB" : "models/GaussianNB.pkl",
"GradientBoostingClassifier" : "models/GradientBoostingClassifier.pkl",
"RandomForestClassifier" : "models/RandomForestClassifier.pkl",
