In [1]:
#Import the Libraries

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.linear_model import LogisticRegression
import pickle
import matplotlib.pyplot as plt

In [3]:
def selectkbest(indep_X, dep_Y, n):
    test = SelectKBest(score_func=chi2, k=n)
    fit1 = test.fit(indep_X, dep_Y)
    selectk_features = fit1.transform(indep_X)
    return selectk_features

def split_scalar(indep_X, dep_Y):
    X_train, X_test, y_train, y_test = train_test_split(indep_X, dep_Y, test_size=0.25, random_state=0)
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)
    return X_train, X_test, y_train, y_test

In [4]:
def cm_prediction(classifier, X_test, y_test):
    y_pred = classifier.predict(X_test)
    from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
    cm = confusion_matrix(y_test, y_pred)
    Accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    return classifier, Accuracy, report, X_test, y_test, cm

def logistic(X_train, y_train, X_test, y_test):
    classifier = LogisticRegression(random_state=0)
    classifier.fit(X_train, y_train)
    classifier, Accuracy, report, X_test, y_test, cm = cm_prediction(classifier, X_test, y_test)
    return classifier, Accuracy, report, X_test, y_test, cm

def svm_linear(X_train, y_train, X_test, y_test):
    from sklearn.svm import SVC
    classifier = SVC(kernel='linear', random_state=0)
    classifier.fit(X_train, y_train)
    classifier, Accuracy, report, X_test, y_test, cm = cm_prediction(classifier, X_test, y_test)
    return classifier, Accuracy, report, X_test, y_test, cm

def svm_NL(X_train, y_train, X_test, y_test):
    from sklearn.svm import SVC
    classifier = SVC(kernel='rbf', random_state=0)
    classifier.fit(X_train, y_train)
    classifier, Accuracy, report, X_test, y_test, cm = cm_prediction(classifier, X_test, y_test)
    return classifier, Accuracy, report, X_test, y_test, cm

def Navie(X_train, y_train, X_test, y_test):
    from sklearn.naive_bayes import GaussianNB
    classifier = GaussianNB()
    classifier.fit(X_train, y_train)
    classifier, Accuracy, report, X_test, y_test, cm = cm_prediction(classifier, X_test, y_test)
    return classifier, Accuracy, report, X_test, y_test, cm

def knn(X_train, y_train, X_test, y_test):
    from sklearn.neighbors import KNeighborsClassifier
    classifier = KNeighborsClassifier(n_neighbors=5, metric='minkowski', p=2)
    classifier.fit(X_train, y_train)
    classifier, Accuracy, report, X_test, y_test, cm = cm_prediction(classifier, X_test, y_test)
    return classifier, Accuracy, report, X_test, y_test, cm

def Decision(X_train, y_train, X_test, y_test):
    from sklearn.tree import DecisionTreeClassifier
    classifier = DecisionTreeClassifier(criterion='entropy', random_state=0)
    classifier.fit(X_train, y_train)
    classifier, Accuracy, report, X_test, y_test, cm = cm_prediction(classifier, X_test, y_test)
    return classifier, Accuracy, report, X_test, y_test, cm

def random(X_train, y_train, X_test, y_test):
    from sklearn.ensemble import RandomForestClassifier
    classifier = RandomForestClassifier(n_estimators=10, criterion='entropy', random_state=0)
    classifier.fit(X_train, y_train)
    classifier, Accuracy, report, X_test, y_test, cm = cm_prediction(classifier, X_test, y_test)
    return classifier, Accuracy, report, X_test, y_test, cm

In [5]:
def selectk_Classification(acclog, accsvml, accsvmnl, accknn, accnav, accdes, accrf):
    dataframe = pd.DataFrame(index=['ChiSquare'], columns=['Logistic', 'SVMl', 'SVMnl', 'KNN', 'Navie', 'Decision', 'Random'])
    dataframe['Logistic'] = acclog
    dataframe['SVMl'] = accsvml
    dataframe['SVMnl'] = accsvmnl
    dataframe['KNN'] = accknn
    dataframe['Navie'] = accnav
    dataframe['Decision'] = accdes
    dataframe['Random'] = accrf
    return dataframe

In [6]:
# Load dataset
dataset = pd.read_csv("dataset_preprocess_ds.csv")

In [7]:
# One-hot encode categorical variables
dataset = pd.get_dummies(dataset, drop_first=True)

In [8]:
dataset.columns

Index(['Crop/Year', 'Area_Hectares', 'Production_Tonnes', 'District_Name_BEED',
       'District_Name_HINGOLI', 'District_Name_JALNA', 'District_Name_LATUR',
       'District_Name_NANDED', 'District_Name_OSMANABAD',
       'Winter/Summer_Rabi       ', 'Winter/Summer_Summer     ',
       'Winter/Summer_Whole Year ', 'Rice_Bajra', 'Rice_Castor seed',
       'Rice_Cotton(lint)', 'Rice_Gram', 'Rice_Groundnut', 'Rice_Jowar',
       'Rice_Linseed', 'Rice_Maize', 'Rice_Moong(Green Gram)',
       'Rice_Niger seed', 'Rice_Other  Rabi pulses',
       'Rice_Other Kharif pulses', 'Rice_Rice', 'Rice_Safflower',
       'Rice_Sesamum', 'Rice_Soyabean', 'Rice_Sugarcane', 'Rice_Sunflower',
       'Rice_Wheat'],
      dtype='object')

In [9]:
# Separate features and target variable
indep_X = dataset.drop('Production_Tonnes', axis=1)
dep_Y = dataset['Rice_Sunflower']

In [10]:
# Select K best features
kbest = selectkbest(indep_X, dep_Y, 1)

In [11]:
dataset

Unnamed: 0,Crop/Year,Area_Hectares,Production_Tonnes,District_Name_BEED,District_Name_HINGOLI,District_Name_JALNA,District_Name_LATUR,District_Name_NANDED,District_Name_OSMANABAD,Winter/Summer_Rabi,...,Rice_Niger seed,Rice_Other Rabi pulses,Rice_Other Kharif pulses,Rice_Rice,Rice_Safflower,Rice_Sesamum,Rice_Soyabean,Rice_Sugarcane,Rice_Sunflower,Rice_Wheat
0,2014,32900,8700.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2014,45300,24200.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2014,101675,61475.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,2014,4100,900.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2014,3700,1700.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
146,2014,24100,7100.0,0,0,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,1
147,2014,1900,800.0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
148,2014,1300,,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
149,2014,500,200.0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,1,0


In [12]:
kbest=selectkbest(indep_X,dep_Y,4)       

acclog=[]
accsvml=[]
accsvmnl=[]
accknn=[]
accnav=[]
accdes=[]
accrf=[]

In [13]:
kbest

array([[ 32900,      0,      0,      0],
       [ 45300,      0,      0,      0],
       [101675,      0,      0,      0],
       [  4100,      0,      0,      0],
       [  3700,      0,      0,      0],
       [101675,      0,      1,      0],
       [  3700,      0,      0,      0],
       [   100,      0,      0,      0],
       [   100,      0,      0,      0],
       [ 18700,      0,      0,      0],
       [   100,      0,      0,      1],
       [ 41800,      0,      0,      0],
       [101675,      0,      0,      0],
       [  9000,      0,      1,      0],
       [   100,      0,      0,      0],
       [   600,      0,      0,      0],
       [ 35400,      0,      0,      0],
       [  3400,      0,      1,      0],
       [ 13900,      0,      0,      0],
       [ 60600,      0,      0,      0],
       [101675,      0,      0,      0],
       [  3300,      0,      0,      0],
       [101675,      0,      0,      0],
       [ 10800,      0,      0,      0],
       [ 19400, 

In [16]:
X_train, X_test, y_train, y_test=split_scalar(kbest,dep_Y)   
    
        
classifier,Accuracy,report,X_test,y_test,cm=logistic(X_train,y_train,X_test)
acclog.append(Accuracy)

classifier,Accuracy,report,X_test,y_test,cm=svm_linear(X_train,y_train,X_test)  
accsvml.append(Accuracy)
    
classifier,Accuracy,report,X_test,y_test,cm=svm_NL(X_train,y_train,X_test)  
accsvmnl.append(Accuracy)
    
classifier,Accuracy,report,X_test,y_test,cm=knn(X_train,y_train,X_test)  
accknn.append(Accuracy)
    
classifier,Accuracy,report,X_test,y_test,cm=Navie(X_train,y_train,X_test)  
accnav.append(Accuracy)
    
classifier,Accuracy,report,X_test,y_test,cm=Decision(X_train,y_train,X_test)  
accdes.append(Accuracy)
    
classifier,Accuracy,report,X_test,y_test,cm=random(X_train,y_train,X_test)  
accrf.append(Accuracy)
    
result=selectk_Classification(acclog,accsvml,accsvmnl,accknn,accnav,accdes,accrf)

TypeError: logistic() missing 1 required positional argument: 'y_test'