In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import imblearn
from imblearn.over_sampling import SMOTE
from collections import Counter

from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split

from sklearn.neighbors import  KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
import pickle

import warnings
warnings.filterwarnings("ignore")

%matplotlib inline

In [2]:
data = pd.read_csv("D:\Research Internship\Programs\Datasets\FertilizerPrediction.csv")
print(data)
data.describe()

    Temparature  Humidity   Moisture Soil Type  Crop Type  Nitrogen  \
0            26         52        38     Sandy      Maize        37   
1            29         52        45     Loamy  Sugarcane        12   
2            34         65        62     Black     Cotton         7   
3            32         62        34       Red    Tobacco        22   
4            28         54        46    Clayey      Paddy        35   
..          ...        ...       ...       ...        ...       ...   
94           25         50        32    Clayey     Pulses        24   
95           30         60        27       Red    Tobacco         4   
96           38         72        51     Loamy      Wheat        39   
97           36         60        43     Sandy    Millets        15   
98           29         58        57     Black  Sugarcane        12   

    Potassium  Phosphorous Fertilizer Name  
0           0            0            Urea  
1           0           36             DAP  
2           

Unnamed: 0,Temparature,Humidity,Moisture,Nitrogen,Potassium,Phosphorous
count,99.0,99.0,99.0,99.0,99.0,99.0
mean,30.282828,59.151515,43.181818,18.909091,3.383838,18.606061
std,3.502304,5.840331,11.271568,11.599693,5.814667,13.476978
min,25.0,50.0,25.0,4.0,0.0,0.0
25%,28.0,54.0,34.0,10.0,0.0,9.0
50%,30.0,60.0,41.0,13.0,0.0,19.0
75%,33.0,64.0,50.5,24.0,7.5,30.0
max,38.0,72.0,65.0,42.0,19.0,42.0


In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 99 entries, 0 to 98
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Temparature      99 non-null     int64 
 1   Humidity         99 non-null     int64 
 2   Moisture         99 non-null     int64 
 3   Soil Type        99 non-null     object
 4   Crop Type        99 non-null     object
 5   Nitrogen         99 non-null     int64 
 6   Potassium        99 non-null     int64 
 7   Phosphorous      99 non-null     int64 
 8   Fertilizer Name  99 non-null     object
dtypes: int64(6), object(3)
memory usage: 7.1+ KB


In [4]:
data.head()

Unnamed: 0,Temparature,Humidity,Moisture,Soil Type,Crop Type,Nitrogen,Potassium,Phosphorous,Fertilizer Name
0,26,52,38,Sandy,Maize,37,0,0,Urea
1,29,52,45,Loamy,Sugarcane,12,0,36,DAP
2,34,65,62,Black,Cotton,7,9,30,14-35-14
3,32,62,34,Red,Tobacco,22,0,20,28-28
4,28,54,46,Clayey,Paddy,35,0,0,Urea


In [5]:
data.columns

Index(['Temparature', 'Humidity ', 'Moisture', 'Soil Type', 'Crop Type',
       'Nitrogen', 'Potassium', 'Phosphorous', 'Fertilizer Name'],
      dtype='object')

# Data Preprocessing


In [6]:
X_count = Counter(data.drop('Fertilizer Name', axis=1))
X_count

Counter({'Temparature': 1,
         'Humidity ': 1,
         'Moisture': 1,
         'Soil Type': 1,
         'Crop Type': 1,
         'Nitrogen': 1,
         'Potassium': 1,
         'Phosphorous': 1})

In [7]:
y_count =Counter(data['Fertilizer Name'])
y_count

Counter({'Urea': 22,
         'DAP': 18,
         '14-35-14': 14,
         '28-28': 17,
         '17-17-17': 7,
         '20-20': 14,
         '10-26-26': 7})

### Label Encoding

In [8]:
Soillabelencoder = LabelEncoder()

data["Soil Type"] = Soillabelencoder.fit_transform(data["Soil Type"])

croplabelencoder = LabelEncoder()
data["Crop Type"] = croplabelencoder.fit_transform(data["Crop Type"])

fertilabelencoder = LabelEncoder()
data["Fertilizer Name"] = fertilabelencoder.fit_transform(data["Fertilizer Name"])

In [9]:
data

Unnamed: 0,Temparature,Humidity,Moisture,Soil Type,Crop Type,Nitrogen,Potassium,Phosphorous,Fertilizer Name
0,26,52,38,4,3,37,0,0,6
1,29,52,45,2,8,12,0,36,5
2,34,65,62,0,1,7,9,30,1
3,32,62,34,3,9,22,0,20,4
4,28,54,46,1,6,35,0,0,6
...,...,...,...,...,...,...,...,...,...
94,25,50,32,1,7,24,0,19,4
95,30,60,27,3,9,4,17,17,0
96,38,72,51,2,10,39,0,0,6
97,36,60,43,4,4,15,0,41,5


In [10]:
X = data[data.columns[:-1]]
y = data[data.columns[-1]]

In [11]:
X.shape


(99, 8)

In [12]:
y.shape

(99,)

# Upsampling the data

In [13]:
upsample = SMOTE()
X_upsampled, y_upsampled = upsample.fit_resample(X, y)


In [14]:
X_count = Counter(X_upsampled)
print(X_count)
print(X_upsampled)
y_count = Counter(y_upsampled)
print(y_count)

Counter({'Temparature': 1, 'Humidity ': 1, 'Moisture': 1, 'Soil Type': 1, 'Crop Type': 1, 'Nitrogen': 1, 'Potassium': 1, 'Phosphorous': 1})
     Temparature  Humidity   Moisture  Soil Type  Crop Type  Nitrogen  \
0             26         52        38          4          3        37   
1             29         52        45          2          8        12   
2             34         65        62          0          1         7   
3             32         62        34          3          9        22   
4             28         54        46          1          6        35   
..           ...        ...       ...        ...        ...       ...   
149           25         51        39          1          5        21   
150           29         55        48          2          9        12   
151           33         59        40          4          4        15   
152           32         63        31          3          2        13   
153           29         53        47          2         

In [15]:
data_upsampled = X_upsampled.join(y_upsampled)
data_upsampled


Unnamed: 0,Temparature,Humidity,Moisture,Soil Type,Crop Type,Nitrogen,Potassium,Phosphorous,Fertilizer Name
0,26,52,38,4,3,37,0,0,6
1,29,52,45,2,8,12,0,36,5
2,34,65,62,0,1,7,9,30,1
3,32,62,34,3,9,22,0,20,4
4,28,54,46,1,6,35,0,0,6
...,...,...,...,...,...,...,...,...,...
149,25,51,39,1,5,21,0,22,4
150,29,55,48,2,9,12,0,36,5
151,33,59,40,4,4,15,0,39,5
152,32,63,31,3,2,13,0,38,5


In [16]:
data_upsampled.to_csv(r'D:\Research Internship\Programs\Datasets\up_dataset.csv')

# Test Train Split

SPLIT 1 = 8:2
SPLIT 2 = 7:3

In [17]:
X1_train, X1_test, y1_train, y1_test = train_test_split(X_upsampled, y_upsampled, test_size = 0.2, random_state = 42)
print(f"Train Data: {X1_train.shape}, {y1_train.shape}")
print(f"Test Data: {X1_test.shape}, {y1_test.shape}")

Train Data: (123, 8), (123,)
Test Data: (31, 8), (31,)


In [18]:
X2_train, X2_test, y2_train, y2_test = train_test_split(X_upsampled, y_upsampled, test_size = 0.3, random_state = 42)
print(f"Train Data: {X2_train.shape}, {y2_train.shape}")
print(f"Test Data: {X2_test.shape}, {y2_test.shape}")

Train Data: (107, 8), (107,)
Test Data: (47, 8), (47,)


In [19]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report, confusion_matrix


def evaulate(y_true,y_pred):
    score=accuracy_score(y_true,y_pred)
    conmat=confusion_matrix(y_true,y_pred)
    classrep=classification_report(y_true,y_pred)
    print(f"Accuracy Score:",score, "\n Confusion Matrix:\n" ,conmat, "\n Classification Report:\n",classrep)

# Feature Scaling

In [20]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X1_train_scaled = sc.fit_transform(X1_train)
X1_test_scaled = sc.transform(X1_test)

In [21]:
X1_train_scaled


array([[ 1.48442831e+00,  1.27249296e+00,  4.86787568e-01,
         7.70519389e-01,  3.11866781e-01, -9.69251849e-01,
         3.53661758e-01,  8.45171662e-01],
       [-2.70342905e-02,  1.93067897e-01, -5.82406555e-01,
         1.52871047e+00, -3.61108904e-01, -5.93169901e-01,
        -7.87332242e-01, -3.43868863e-01],
       [-2.70342905e-02,  1.93067897e-01,  1.84758009e+00,
         7.70519389e-01, -1.37057243e+00, -6.87190388e-01,
         4.96286008e-01,  9.30103129e-01],
       [-1.53849690e+00, -1.60597387e+00,  1.16718383e+00,
         1.23283102e-02,  6.48354624e-01, -4.99149414e-01,
         9.24158759e-01, -2.58937397e-01],
       [-1.23620437e+00, -1.24616552e+00, -9.64092262e-02,
         7.70519389e-01, -6.97596747e-01,  6.29096429e-01,
        -7.87332242e-01,  8.07884677e-02],
       [ 1.18213579e+00,  1.09258878e+00,  9.72784897e-01,
         1.23283102e-02,  1.65781815e+00,  2.03940373e+00,
        -7.87332242e-01, -1.53290939e+00],
       [ 1.48442831e+00,  1.632301

In [22]:
X2_train_scaled = sc.fit_transform(X2_train)
X2_test_scaled = sc.transform(X2_test)

For 8:2 Ratio

Scaled data

In [23]:
from sklearn.ensemble import RandomForestClassifier
RFclassifier_scaled = RandomForestClassifier(n_estimators= 100, criterion = 'gini' , random_state= 42)
RFclassifier_scaled.fit(X1_train_scaled, y1_train)

In [24]:
y1_pred_scaled = RFclassifier_scaled.predict(X1_test_scaled)


In [25]:
from sklearn.model_selection import cross_val_score

cross_val_score(RFclassifier_scaled,X, y, cv=5).mean()

0.96

In [26]:
print(classification_report(y1_test,y1_pred_scaled))
accuracy_score(y1_test,y1_pred_scaled)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         4
           1       1.00      1.00      1.00         8
           2       1.00      1.00      1.00         3
           3       1.00      1.00      1.00         4
           4       1.00      1.00      1.00         5
           5       1.00      1.00      1.00         4
           6       1.00      1.00      1.00         3

    accuracy                           1.00        31
   macro avg       1.00      1.00      1.00        31
weighted avg       1.00      1.00      1.00        31



1.0

Unscaled data

In [27]:
RFclassifier_unscaled = RandomForestClassifier(n_estimators= 100, criterion = 'gini' , random_state= 42)
RFclassifier_unscaled.fit(X1_train, y1_train)

In [28]:
y1_pred_unscaled = RFclassifier_unscaled.predict(X1_test)

In [29]:
cross_val_score(RFclassifier_unscaled,X, y, cv=5).mean()

0.96

In [30]:
from sklearn.metrics import balanced_accuracy_score


accuracy_score(y1_test, y1_pred_unscaled)

1.0

In [31]:
print(classification_report(y1_test,y1_pred_unscaled))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         4
           1       1.00      1.00      1.00         8
           2       1.00      1.00      1.00         3
           3       1.00      1.00      1.00         4
           4       1.00      1.00      1.00         5
           5       1.00      1.00      1.00         4
           6       1.00      1.00      1.00         3

    accuracy                           1.00        31
   macro avg       1.00      1.00      1.00        31
weighted avg       1.00      1.00      1.00        31



for 8:2  ratio unscaled data overfits in Random Forest

For 7:3 ratio

Scaled Data

In [32]:
RFclassifier_scaled2 = RandomForestClassifier(n_estimators= 100, criterion = 'gini' , random_state= 42)
RFclassifier_scaled2.fit(X2_train_scaled, y2_train)

In [33]:
y2_pred_scaled = RFclassifier_scaled2.predict(X2_test)


In [34]:
cross_val_score(RFclassifier_scaled2, X,y, cv=5).mean()

0.96

In [35]:
X2_train_scaled

array([[ 0.00284458,  0.23510291,  1.34565628,  0.09131411,  1.00607041,
        -0.59241467,  0.25376989,  1.15154493],
       [-0.60589655, -0.85863671,  0.2096782 , -0.66027128,  0.32689354,
         1.80140593, -0.76813144, -1.52341425],
       [ 1.22032685,  1.14655259, -0.54764052, -0.66027128,  0.32689354,
         2.18441723, -0.76813144, -1.52341425],
       [ 1.82906799,  1.6934224 , -1.02096472, -1.41185667, -0.01269489,
        -0.3051562 , -0.76813144, -0.35311961],
       [ 0.00284458,  0.23510291, -1.58895376,  0.8428995 ,  1.34565885,
        -1.16693161,  1.71362894, -0.10234219],
       [-1.51900825, -1.58779645,  0.39900788,  0.09131411, -0.01269489,
        -0.49666184,  0.83771351, -0.35311961],
       [ 0.91595629,  0.96426265, -0.92629988, -0.66027128,  0.66648198,
         2.08866441, -0.76813144, -1.52341425],
       [-0.30152598,  0.05281297,  0.49367272,  0.09131411,  1.34565885,
        -0.40090902, -0.76813144, -0.77108198],
       [-0.91026712, -0.85863671

In [36]:
print(classification_report(y2_test,y2_pred_scaled))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         8
           1       0.00      0.00      0.00        10
           2       0.00      0.00      0.00         5
           3       0.00      0.00      0.00         5
           4       0.00      0.00      0.00         8
           5       0.00      0.00      0.00         5
           6       0.13      1.00      0.23         6

    accuracy                           0.13        47
   macro avg       0.02      0.14      0.03        47
weighted avg       0.02      0.13      0.03        47



Unscaled Data

In [37]:
RFclassifier_2 = RandomForestClassifier(n_estimators= 100, criterion = 'gini' , random_state= 42)
RFclassifier_2.fit(X2_train, y2_train)

In [38]:
y2_pred_unscaled = RFclassifier_2.predict(X2_test)
print(cross_val_score(RFclassifier_2, X,y, cv=5).mean())
print(classification_report(y2_test,y2_pred_unscaled))
print(accuracy_score(y2_test,y2_pred_unscaled))

0.96
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         8
           1       1.00      1.00      1.00        10
           2       1.00      1.00      1.00         5
           3       1.00      1.00      1.00         5
           4       1.00      1.00      1.00         8
           5       1.00      1.00      1.00         5
           6       1.00      1.00      1.00         6

    accuracy                           1.00        47
   macro avg       1.00      1.00      1.00        47
weighted avg       1.00      1.00      1.00        47

1.0


From this we can infer that scaled data gives a better result than   unscaled data for the  sample train, test  samples

# KNN classifier

For 8:2 Ratio 
Unscaled data

In [39]:
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier

Kneigh1 = KNeighborsClassifier(n_neighbors=5)
Kneigh1.fit(X1_train,y1_train)

pred_KN = Kneigh1.predict(X1_test)
print(cross_val_score(Kneigh1,X,y,cv=5).mean())
print(classification_report(y1_test,pred_KN))
print(evaulate(y1_test,pred_KN))


0.8673684210526316
              precision    recall  f1-score   support

           0       0.57      1.00      0.73         4
           1       1.00      0.38      0.55         8
           2       1.00      1.00      1.00         3
           3       1.00      1.00      1.00         4
           4       1.00      1.00      1.00         5
           5       0.67      1.00      0.80         4
           6       1.00      1.00      1.00         3

    accuracy                           0.84        31
   macro avg       0.89      0.91      0.87        31
weighted avg       0.90      0.84      0.82        31

Accuracy Score: 0.8387096774193549 
 Confusion Matrix:
 [[4 0 0 0 0 0 0]
 [3 3 0 0 0 2 0]
 [0 0 3 0 0 0 0]
 [0 0 0 4 0 0 0]
 [0 0 0 0 5 0 0]
 [0 0 0 0 0 4 0]
 [0 0 0 0 0 0 3]] 
 Classification Report:
               precision    recall  f1-score   support

           0       0.57      1.00      0.73         4
           1       1.00      0.38      0.55         8
           2       

SCaled data

In [40]:

Kneigh1Scaled = KNeighborsClassifier(n_neighbors=3)
Kneigh1Scaled.fit(X1_train_scaled,y1_train)

pred_KNScaled = Kneigh1Scaled.predict(X1_test_scaled)

print(classification_report(y1_test,pred_KNScaled))

              precision    recall  f1-score   support

           0       0.80      1.00      0.89         4
           1       1.00      0.62      0.77         8
           2       0.75      1.00      0.86         3
           3       1.00      0.75      0.86         4
           4       1.00      1.00      1.00         5
           5       0.67      1.00      0.80         4
           6       1.00      1.00      1.00         3

    accuracy                           0.87        31
   macro avg       0.89      0.91      0.88        31
weighted avg       0.91      0.87      0.87        31



In [41]:
evaulate(y1_test,pred_KNScaled)
cross_val_score(Kneigh1Scaled,X,y,cv=5).mean()

Accuracy Score: 0.8709677419354839 
 Confusion Matrix:
 [[4 0 0 0 0 0 0]
 [1 5 0 0 0 2 0]
 [0 0 3 0 0 0 0]
 [0 0 1 3 0 0 0]
 [0 0 0 0 5 0 0]
 [0 0 0 0 0 4 0]
 [0 0 0 0 0 0 3]] 
 Classification Report:
               precision    recall  f1-score   support

           0       0.80      1.00      0.89         4
           1       1.00      0.62      0.77         8
           2       0.75      1.00      0.86         3
           3       1.00      0.75      0.86         4
           4       1.00      1.00      1.00         5
           5       0.67      1.00      0.80         4
           6       1.00      1.00      1.00         3

    accuracy                           0.87        31
   macro avg       0.89      0.91      0.88        31
weighted avg       0.91      0.87      0.87        31



0.8784210526315789

For 7:3 Ratio
Unscaled data

In [42]:

Kneigh2 = KNeighborsClassifier(n_neighbors=5)
Kneigh2.fit(X2_train,y2_train)

pred_KN2 = Kneigh2.predict(X2_test)

print(classification_report(y2_test,pred_KN2))

              precision    recall  f1-score   support

           0       0.73      1.00      0.84         8
           1       1.00      0.50      0.67        10
           2       1.00      1.00      1.00         5
           3       1.00      1.00      1.00         5
           4       1.00      1.00      1.00         8
           5       0.71      1.00      0.83         5
           6       1.00      1.00      1.00         6

    accuracy                           0.89        47
   macro avg       0.92      0.93      0.91        47
weighted avg       0.92      0.89      0.88        47



In [43]:
evaulate(y2_test,pred_KN2)
cross_val_score(Kneigh2,X,y,cv=5).mean()

Accuracy Score: 0.8936170212765957 
 Confusion Matrix:
 [[8 0 0 0 0 0 0]
 [3 5 0 0 0 2 0]
 [0 0 5 0 0 0 0]
 [0 0 0 5 0 0 0]
 [0 0 0 0 8 0 0]
 [0 0 0 0 0 5 0]
 [0 0 0 0 0 0 6]] 
 Classification Report:
               precision    recall  f1-score   support

           0       0.73      1.00      0.84         8
           1       1.00      0.50      0.67        10
           2       1.00      1.00      1.00         5
           3       1.00      1.00      1.00         5
           4       1.00      1.00      1.00         8
           5       0.71      1.00      0.83         5
           6       1.00      1.00      1.00         6

    accuracy                           0.89        47
   macro avg       0.92      0.93      0.91        47
weighted avg       0.92      0.89      0.88        47



0.8673684210526316

Scaled data

In [44]:
Kneigh2scaled = KNeighborsClassifier(n_neighbors=5)
Kneigh2scaled.fit(X2_train_scaled,y2_train)

pred_KN2 = Kneigh2scaled.predict(X2_test_scaled)

print(classification_report(y2_test,pred_KN2))
evaulate(y2_test,pred_KN2)
print(cross_val_score(Kneigh2scaled,X,y,cv=5).mean())

              precision    recall  f1-score   support

           0       0.70      0.88      0.78         8
           1       1.00      0.40      0.57        10
           2       0.38      0.60      0.46         5
           3       0.71      1.00      0.83         5
           4       0.86      0.75      0.80         8
           5       0.80      0.80      0.80         5
           6       1.00      1.00      1.00         6

    accuracy                           0.74        47
   macro avg       0.78      0.78      0.75        47
weighted avg       0.81      0.74      0.74        47

Accuracy Score: 0.7446808510638298 
 Confusion Matrix:
 [[7 0 1 0 0 0 0]
 [1 4 4 1 0 0 0]
 [2 0 3 0 0 0 0]
 [0 0 0 5 0 0 0]
 [0 0 0 1 6 1 0]
 [0 0 0 0 1 4 0]
 [0 0 0 0 0 0 6]] 
 Classification Report:
               precision    recall  f1-score   support

           0       0.70      0.88      0.78         8
           1       1.00      0.40      0.57        10
           2       0.38      0.60     

# SVM Classifier

For 8:2 Ratio
Unscaled data

In [45]:
from sklearn.metrics import confusion_matrix
#with unscaled data
from sklearn.svm import SVC
SVCclassifier = SVC(kernel = 'rbf', random_state = 42)
SVCclassifier.fit(X1_train, y1_train)

# Predicting the Test set results
y1_pred = SVCclassifier.predict(X1_test)
print(classification_report(y1_test,y1_pred))
evaulate(y1_test,y1_pred)
print(cross_val_score(SVCclassifier,X,y,cv=5).mean())

              precision    recall  f1-score   support

           0       0.67      1.00      0.80         4
           1       1.00      0.38      0.55         8
           2       1.00      1.00      1.00         3
           3       1.00      1.00      1.00         4
           4       1.00      1.00      1.00         5
           5       0.57      1.00      0.73         4
           6       1.00      1.00      1.00         3

    accuracy                           0.84        31
   macro avg       0.89      0.91      0.87        31
weighted avg       0.90      0.84      0.82        31

Accuracy Score: 0.8387096774193549 
 Confusion Matrix:
 [[4 0 0 0 0 0 0]
 [2 3 0 0 0 3 0]
 [0 0 3 0 0 0 0]
 [0 0 0 4 0 0 0]
 [0 0 0 0 5 0 0]
 [0 0 0 0 0 4 0]
 [0 0 0 0 0 0 3]] 
 Classification Report:
               precision    recall  f1-score   support

           0       0.67      1.00      0.80         4
           1       1.00      0.38      0.55         8
           2       1.00      1.00     

Scaled data

In [46]:
SVCclassifierscaled = SVC(kernel = 'rbf', random_state = 42)
SVCclassifierscaled.fit(X1_train_scaled, y1_train)

# Predicting the Test set results
y1_pred_scaled = SVCclassifierscaled.predict(X1_test_scaled)
print(classification_report(y1_test,y1_pred_scaled))
evaulate(y1_test,y1_pred_scaled)
print(cross_val_score(SVCclassifierscaled,X,y,cv=5).mean())

              precision    recall  f1-score   support

           0       0.80      1.00      0.89         4
           1       1.00      0.62      0.77         8
           2       1.00      1.00      1.00         3
           3       1.00      1.00      1.00         4
           4       1.00      1.00      1.00         5
           5       0.67      1.00      0.80         4
           6       1.00      1.00      1.00         3

    accuracy                           0.90        31
   macro avg       0.92      0.95      0.92        31
weighted avg       0.93      0.90      0.90        31

Accuracy Score: 0.9032258064516129 
 Confusion Matrix:
 [[4 0 0 0 0 0 0]
 [1 5 0 0 0 2 0]
 [0 0 3 0 0 0 0]
 [0 0 0 4 0 0 0]
 [0 0 0 0 5 0 0]
 [0 0 0 0 0 4 0]
 [0 0 0 0 0 0 3]] 
 Classification Report:
               precision    recall  f1-score   support

           0       0.80      1.00      0.89         4
           1       1.00      0.62      0.77         8
           2       1.00      1.00     

For 7:3 ratio
Unscaled Data

In [47]:
SVCclassifier2 = SVC(kernel = 'rbf', random_state = 42)
SVCclassifier2.fit(X2_train, y2_train)

# Predicting the Test set results
y2_pred = SVCclassifier2.predict(X2_test)
print(classification_report(y2_test,y2_pred))
print(classification_report(
evaulate(y2_test,y2_pred)
print(cross_val_score(SVCclassifier2,X,y,cv=5).mean())

              precision    recall  f1-score   support

           0       0.78      0.88      0.82         8
           1       1.00      0.50      0.67        10
           2       0.83      1.00      0.91         5
           3       1.00      1.00      1.00         5
           4       1.00      1.00      1.00         8
           5       0.62      1.00      0.77         5
           6       1.00      1.00      1.00         6

    accuracy                           0.87        47
   macro avg       0.89      0.91      0.88        47
weighted avg       0.90      0.87      0.86        47

Accuracy Score: 0.8723404255319149 
 Confusion Matrix:
 [[7 0 1 0 0 0 0]
 [2 5 0 0 0 3 0]
 [0 0 5 0 0 0 0]
 [0 0 0 5 0 0 0]
 [0 0 0 0 8 0 0]
 [0 0 0 0 0 5 0]
 [0 0 0 0 0 0 6]] 
 Classification Report:
               precision    recall  f1-score   support

           0       0.78      0.88      0.82         8
           1       1.00      0.50      0.67        10
           2       0.83      1.00     

Scaled data

In [48]:
SVCclassifier2scaled = SVC(kernel = 'rbf', random_state = 42)
SVCclassifier2scaled.fit(X2_train_scaled, y2_train)

# Predicting the Test set results
y2_pred_scaled = SVCclassifier2scaled.predict(X2_test_scaled)
print(classification_report(y2_test,y2_pred_scaled))
evaulate(y2_test,y2_pred_scaled)
print(cross_val_score(SVCclassifier2scaled,X,y,cv=5).mean())

              precision    recall  f1-score   support

           0       0.88      0.88      0.88         8
           1       1.00      0.70      0.82        10
           2       0.71      1.00      0.83         5
           3       1.00      1.00      1.00         5
           4       1.00      0.88      0.93         8
           5       0.71      1.00      0.83         5
           6       1.00      1.00      1.00         6

    accuracy                           0.89        47
   macro avg       0.90      0.92      0.90        47
weighted avg       0.92      0.89      0.89        47

Accuracy Score: 0.8936170212765957 
 Confusion Matrix:
 [[7 0 1 0 0 0 0]
 [1 7 1 0 0 1 0]
 [0 0 5 0 0 0 0]
 [0 0 0 5 0 0 0]
 [0 0 0 0 7 1 0]
 [0 0 0 0 0 5 0]
 [0 0 0 0 0 0 6]] 
 Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.88      0.88         8
           1       1.00      0.70      0.82        10
           2       0.71      1.00     