<a href="https://colab.research.google.com/github/Aman2632/basic_programming/blob/master/leaf_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [24]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder,StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV,StratifiedShuffleSplit
from sklearn.metrics import accuracy_score, log_loss

In [18]:
train_raw= pd.read_csv('train.csv')
test_raw = pd.read_csv('test.csv')

print("Size of  the training  data:",train_raw.shape[0],"*",train_raw.shape[1])
print("Size of  the test  data:",test_raw.shape[0],"*",test_raw.shape[1])

Size of  the training  data: 990 * 194
Size of  the test  data: 594 * 193


In [19]:
le= LabelEncoder().fit(train_raw.species) #encoding  the species 
labels= le.transform(train_raw.species)
classes= list(le.classes_)
test_ids= test_raw.id
train=  train_raw.drop(['id','species'], axis= 1)
test= test_raw.drop(['id'], axis =1)


In [20]:
#from sklearn.model_selection import train_test_split

ss_split = StratifiedShuffleSplit(n_splits=10, test_size=0.2, random_state=0)
ss_split.get_n_splits(train, labels)

for train_index, test_index in ss_split.split(train, labels):   
    X_train, X_test = train.values[train_index], train.values[test_index]
    y_train, y_test = labels[train_index], labels[test_index]


In [22]:
scaler = StandardScaler().fit(X_train)
X_train_scaled = scaler.transform(X_train)

param_grid = {'C': [1000, 10000],'tol': [0.000001, 0.00001]}
log_reg = LogisticRegression(solver='newton-cg', multi_class='multinomial')
grid_search = GridSearchCV(log_reg, param_grid, scoring= 'neg_log_loss', refit='True', n_jobs=1, cv=None)
grid_search.fit(X_train_scaled, y_train)

print("Best parameters: {}".format(grid_search.best_params_))
print('Best neg_loss_score: {}'.format(grid_search.best_score_))
print('Best estimator: {}'.format(grid_search.best_estimator_))


Best parameters: {'C': 10000, 'tol': 1e-05}
Best neg_loss_score: -0.048449995350367016
Best estimator: LogisticRegression(C=10000, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='multinomial', n_jobs=None, penalty='l2',
                   random_state=None, solver='newton-cg', tol=1e-05, verbose=0,
                   warm_start=False)


In [25]:
scaler = StandardScaler().fit(X_test)
X_test_scaled = scaler.transform(X_test)

train_predictions = grid_search.predict(X_test_scaled)
acc = accuracy_score(y_test, train_predictions)
print("Accuracy: {:.4%}".format(acc))
train_predictions_prob = grid_search.predict_proba(X_test_scaled)
logloss = log_loss(y_test, train_predictions_prob)
print("Log Loss: {:.6}".format(logloss))

Accuracy: 98.4848%
Log Loss: 0.0236682


In [27]:
# GENERATING CSV OUTPUT FILE ON THE test.csv 


scaler = StandardScaler().fit(X_train)
X_train_scaled = scaler.transform(X_train)

# Using the optimal parameters
param_grid = {'C': [1000],
              'tol': [0.000001]}
log_reg = LogisticRegression(solver='newton-cg', multi_class='multinomial')
grid_search = GridSearchCV(log_reg, param_grid, scoring='neg_log_loss', refit='True', n_jobs=1, cv=ss_split)
grid_search.fit(X_train_scaled, y_train)

scaler = StandardScaler().fit(test)
test_scaled = scaler.transform(test)

test_predictions = grid_search.predict_proba(test_scaled)

# Format DataFrame
submission = pd.DataFrame(test_predictions, columns=classes)
submission.insert(0, 'id', test_ids)
submission.reset_index()

# Export Submission
submission.to_csv('output.csv', index = False)

# Double check the output
submission.head()

Unnamed: 0,id,Acer_Capillipes,Acer_Circinatum,Acer_Mono,Acer_Opalus,Acer_Palmatum,Acer_Pictum,Acer_Platanoids,Acer_Rubrum,Acer_Rufinerve,Acer_Saccharinum,Alnus_Cordata,Alnus_Maximowiczii,Alnus_Rubra,Alnus_Sieboldiana,Alnus_Viridis,Arundinaria_Simonii,Betula_Austrosinensis,Betula_Pendula,Callicarpa_Bodinieri,Castanea_Sativa,Celtis_Koraiensis,Cercis_Siliquastrum,Cornus_Chinensis,Cornus_Controversa,Cornus_Macrophylla,Cotinus_Coggygria,Crataegus_Monogyna,Cytisus_Battandieri,Eucalyptus_Glaucescens,Eucalyptus_Neglecta,Eucalyptus_Urnigera,Fagus_Sylvatica,Ginkgo_Biloba,Ilex_Aquifolium,Ilex_Cornuta,Liquidambar_Styraciflua,Liriodendron_Tulipifera,Lithocarpus_Cleistocarpus,Lithocarpus_Edulis,...,Quercus_Coccinea,Quercus_Crassifolia,Quercus_Crassipes,Quercus_Dolicholepis,Quercus_Ellipsoidalis,Quercus_Greggii,Quercus_Hartwissiana,Quercus_Ilex,Quercus_Imbricaria,Quercus_Infectoria_sub,Quercus_Kewensis,Quercus_Nigra,Quercus_Palustris,Quercus_Phellos,Quercus_Phillyraeoides,Quercus_Pontica,Quercus_Pubescens,Quercus_Pyrenaica,Quercus_Rhysophylla,Quercus_Rubra,Quercus_Semecarpifolia,Quercus_Shumardii,Quercus_Suber,Quercus_Texana,Quercus_Trojana,Quercus_Variabilis,Quercus_Vulcanica,Quercus_x_Hispanica,Quercus_x_Turneri,Rhododendron_x_Russellianum,Salix_Fragilis,Salix_Intergra,Sorbus_Aria,Tilia_Oliveri,Tilia_Platyphyllos,Tilia_Tomentosa,Ulmus_Bergmanniana,Viburnum_Tinus,Viburnum_x_Rhytidophylloides,Zelkova_Serrata
0,4,9.008434e-11,1.110463e-09,2.632073e-13,3.71645e-08,2.352777e-09,4.064803e-09,1.768521e-13,2.364594e-12,7.386875e-12,3.676975e-08,7.082984e-08,1.145264e-13,4.401491e-13,3.473237e-10,6.500442e-09,7.234471e-10,2.587518e-06,1.598682e-13,1.033758e-08,1.517595e-11,6.732377e-08,1.251436e-09,8.497245e-08,6.052109e-13,5.473392e-12,2.648303e-11,3.560764e-06,7.648078e-08,7.793212e-11,8.37197e-11,3.3619970000000002e-18,3.471217e-06,1.155666e-06,1.362437e-06,4.8869e-12,5.866732e-08,3.324809e-10,3.9288269999999996e-20,2.679459e-09,...,9.744338e-11,1.072914e-09,4.635259e-10,7.671141e-10,2.469741e-10,1.604761e-05,1.097404e-09,1.227928e-07,3.012206e-17,3.54698e-12,1.040004e-10,2.725857e-13,5.151118e-12,3.497276e-07,5.337441e-15,1.026236e-05,3.092328e-06,2.656604e-15,5.048058e-09,1.314882e-05,1.82803e-10,1.270991e-08,1.541359e-15,1.3402230000000002e-17,3.356001e-05,2.508164e-05,8.666719e-14,3.622023e-15,1.999738e-16,5.108456e-06,5.026842e-15,8.301392e-10,2.904141e-10,1.291162e-11,2.98724e-09,7.928672e-13,1.093352e-13,3.406073e-15,3.417424e-07,7.521494e-12
1,7,7.569856e-10,6.803398e-09,7.776958e-08,5.441388e-07,1.546202e-09,9.544596e-09,7.877243e-06,1.120744e-09,1.172221e-09,9.564221e-09,3.28715e-12,6.188733e-11,3.321578e-11,3.397008e-10,9.572006e-10,5.723148e-10,4.091255e-10,2.001442e-07,4.845402e-10,9.255324e-07,4.929411e-09,2.344501e-11,9.0171e-10,9.167035e-11,6.606083e-10,2.909519e-10,1.573706e-08,1.096748e-07,2.968103e-10,2.293852e-12,7.074956e-09,3.541063e-09,8.10121e-09,1.32097e-10,6.828909e-08,2.554701e-11,6.898415e-06,4.27085e-10,4.699562e-08,...,3.178571e-07,6.38542e-08,1.278245e-08,4.069405e-10,1.406554e-07,3.554263e-13,5.023459e-10,3.999385e-10,2.565569e-09,3.404363e-08,6.65107e-06,5.086648e-13,7.474035e-11,1.937686e-10,5.969287e-09,7.216791e-15,4.613882e-11,1.596871e-06,1.680182e-09,2.51314e-09,1.253934e-08,9.922973e-10,5.827232e-09,2.141518e-07,5.451317e-07,5.720239e-07,1.377167e-07,1.272074e-07,2.963321e-07,1.231878e-13,1.600169e-08,2.606813e-08,4.108339e-11,2.318302e-12,1.294124e-14,1.906134e-06,2.574062e-09,3.042251e-06,2.75803e-10,1.193339e-08
2,9,5.304588e-08,0.9973271,1.104389e-07,3.858415e-08,0.002148125,3.744998e-06,2.818599e-08,8.377288e-06,5.785608e-05,2.687391e-05,4.376429e-08,4.034032e-05,4.035905e-08,1.450595e-07,4.266312e-06,2.731486e-06,0.0002302545,3.418138e-10,9.727925e-08,5.910047e-06,8.279505e-08,1.566042e-07,4.686104e-11,2.194924e-10,4.265368e-11,1.112154e-05,8.998275e-06,1.523493e-10,6.898161e-09,1.393938e-09,1.575823e-10,1.512741e-08,4.819382e-06,2.001269e-06,1.109232e-11,5.432313e-06,8.45585e-10,6.104561e-12,4.110559e-10,...,1.423864e-09,5.348021e-07,2.981009e-10,7.439226e-08,2.562304e-07,7.312806e-09,1.730898e-08,8.812368e-08,1.56578e-11,2.222296e-11,1.339854e-07,1.453555e-09,7.819397e-09,3.015317e-08,2.285804e-06,5.988334e-08,6.77648e-08,1.310138e-10,5.052385e-08,5.161864e-07,2.467248e-08,3.957291e-08,1.007153e-12,1.273122e-12,1.708362e-08,6.800867e-07,3.02152e-05,1.937227e-09,7.905054e-09,2.200087e-09,2.252727e-07,6.74281e-09,6.967798e-08,1.280427e-08,3.415206e-09,6.895807e-09,1.38243e-07,5.418573e-11,1.218609e-08,6.963931e-05
3,12,2.785195e-09,0.00121296,8.258301e-08,1.850183e-08,2.192495e-07,3.167731e-09,8.428677e-06,2.894953e-07,6.234444e-06,2.611597e-05,4.720606e-10,3.474396e-06,1.601228e-05,0.0005160175,5.213729e-06,3.357262e-07,1.511197e-08,3.330866e-06,1.377532e-09,0.9963095,6.316584e-07,6.237508e-11,1.906046e-08,5.366076e-11,4.630192e-10,1.698746e-09,1.299907e-05,2.371901e-08,3.754334e-11,2.243763e-12,1.902759e-10,1.887087e-10,5.364392e-07,6.820586e-09,1.336979e-10,2.739597e-09,2.284489e-07,4.010583e-09,1.077409e-08,...,1.504699e-07,2.285484e-07,3.220323e-10,2.096864e-07,8.178468e-06,2.945116e-12,2.775546e-08,2.378427e-09,9.10916e-09,8.156738e-10,1.407988e-09,4.330176e-11,3.598764e-10,7.947911e-11,2.99156e-07,1.017831e-11,2.770566e-09,3.174131e-09,2.425397e-07,2.167662e-07,9.797911e-08,9.94418e-07,7.206089e-10,2.411645e-07,1.020608e-08,6.082433e-08,6.333297e-05,2.227296e-07,3.575261e-08,2.65079e-12,5.253153e-07,7.977147e-09,7.65185e-08,5.446276e-10,2.525862e-10,7.761713e-07,0.0005167899,2.915442e-08,4.291996e-09,7.709214e-05
4,13,5.942099e-08,5.114701e-08,1.420662e-10,1.701499e-11,2.945584e-08,1.403208e-10,5.364248e-08,7.860097e-09,3.747378e-06,1.701993e-07,3.258912e-09,6.819817e-05,8.926093e-08,1.235132e-06,0.9998908,1.564703e-08,2.362018e-05,7.302247e-10,1.055677e-08,1.837166e-09,2.851166e-07,9.739257e-12,3.099651e-08,8.439954e-12,2.65255e-10,5.152594e-10,2.664367e-07,4.51407e-11,1.979474e-11,6.73394e-13,3.186107e-12,2.067839e-12,3.962717e-10,7.005919e-09,1.033711e-13,5.77212e-08,2.483874e-10,2.259913e-14,9.15926e-10,...,1.587851e-09,1.552973e-09,8.350361e-11,5.996714e-10,3.008223e-08,1.194844e-11,9.312008e-12,8.648722e-10,7.540728e-13,3.980872e-12,9.787469e-11,3.947187e-10,9.380688e-09,1.789848e-11,1.510633e-10,4.787419e-09,4.698875e-10,2.708726e-12,1.630734e-08,4.946213e-08,3.109585e-08,2.826033e-08,6.542187e-14,2.090464e-12,2.044007e-08,1.285111e-09,1.095048e-09,3.584083e-10,1.913314e-12,4.657612e-11,2.472565e-08,9.548104e-11,4.751815e-07,3.630436e-10,5.30031e-07,2.563243e-07,3.842864e-06,9.35025e-10,1.240307e-08,9.275394e-09
