In [3]:
import pandas as pd 

df = pd.read_excel("Raisin_Dataset.xlsx")
df.sample(5)

Unnamed: 0,Area,MajorAxisLength,MinorAxisLength,Eccentricity,ConvexArea,Extent,Perimeter,Class
51,114648,508.128933,288.953981,0.822571,118314,0.681905,1340.897,Kecimen
785,62526,357.205087,228.86293,0.767788,65994,0.707308,1006.598,Besni
299,36113,257.641853,181.157079,0.711056,37442,0.694481,737.637,Kecimen
108,82826,396.705912,269.762855,0.733205,84981,0.745435,1115.811,Kecimen
249,50274,305.397256,212.273447,0.718939,52016,0.687508,868.951,Kecimen


In [6]:
X = df[["Area", "MajorAxisLength", "MinorAxisLength", "Eccentricity", "ConvexArea", "Extent", "Perimeter"]]
y = df["Class"]

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

#### Scaling

In [13]:
from sklearn.preprocessing import StandardScaler , MinMaxScaler
scaler = StandardScaler()
scaler.fit(X)

X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [16]:
X_train.head()

Unnamed: 0,Area,MajorAxisLength,MinorAxisLength,Eccentricity,ConvexArea,Extent,Perimeter
687,129292,614.547426,270.08549,0.898249,133475,0.68969,1531.744
705,79975,439.312508,240.49419,0.836849,83384,0.691162,1162.608
296,56327,297.765725,241.934426,0.582962,58178,0.727739,915.496
172,58741,345.485448,222.381845,0.765295,60701,0.714819,948.233
863,67468,424.563654,207.823052,0.872004,70674,0.611694,1105.042


In [17]:
X_train_scaled

array([[ 1.06432546,  1.58331003,  0.3121899 , ...,  1.03785038,
        -0.18372849,  1.33706534],
       [-0.20084761,  0.07228172, -0.28009664, ..., -0.19147815,
        -0.15618566, -0.01205588],
       [-0.80751089, -1.1482581 , -0.25126952, ..., -0.81008139,
         0.52829137, -0.9152028 ],
       ...,
       [ 2.02280624,  1.52682193,  1.90489827, ...,  1.91117405,
         1.01069031,  1.49210229],
       [-1.52866749, -1.59978989, -2.08647336, ..., -1.49386793,
        -1.44493478, -1.76491643],
       [-0.76951747, -1.04300791, -0.26263666, ..., -0.79103687,
         0.20282377, -0.92850265]])

#### WITHOUT SCALING

In [24]:
from sklearn.svm import SVC 

model = SVC(kernel='rbf')
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

from sklearn.metrics import classification_report

report = classification_report(y_test,y_pred)
print(report)

model.n_iter_

              precision    recall  f1-score   support

       Besni       0.86      0.83      0.84       129
     Kecimen       0.85      0.87      0.86       141

    accuracy                           0.85       270
   macro avg       0.85      0.85      0.85       270
weighted avg       0.85      0.85      0.85       270



array([203], dtype=int32)

### with Scaling

In [25]:
from sklearn.svm import SVC 

model = SVC(kernel='rbf')
model.fit(X_train_scaled, y_train)

y_pred = model.predict(X_test_scaled)

from sklearn.metrics import classification_report

report = classification_report(y_test,y_pred)
print(report)

model.n_iter_

              precision    recall  f1-score   support

       Besni       0.87      0.86      0.86       129
     Kecimen       0.87      0.88      0.88       141

    accuracy                           0.87       270
   macro avg       0.87      0.87      0.87       270
weighted avg       0.87      0.87      0.87       270



array([343], dtype=int32)

### WITHOUT SCALING

In [26]:
from sklearn.svm import SVC 

model = SVC(kernel='linear')
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

from sklearn.metrics import classification_report

report = classification_report(y_test,y_pred)
print(report)

model.n_iter_

              precision    recall  f1-score   support

       Besni       0.82      0.89      0.86       129
     Kecimen       0.89      0.82      0.86       141

    accuracy                           0.86       270
   macro avg       0.86      0.86      0.86       270
weighted avg       0.86      0.86      0.86       270



array([98755368], dtype=int32)

In [27]:
from sklearn.svm import SVC 

model = SVC(kernel='linear')
model.fit(X_train_scaled, y_train)

y_pred = model.predict(X_test_scaled)

from sklearn.metrics import classification_report

report = classification_report(y_test,y_pred)
print(report)

model.n_iter_

              precision    recall  f1-score   support

       Besni       0.85      0.88      0.86       129
     Kecimen       0.88      0.86      0.87       141

    accuracy                           0.87       270
   macro avg       0.87      0.87      0.87       270
weighted avg       0.87      0.87      0.87       270



array([1048], dtype=int32)

#### USING PIPELINE 

In [38]:
from sklearn.pipeline import Pipeline

pipeline = Pipeline([
    ("scale", StandardScaler()),
    ("svc", SVC(kernel = "rbf"))
])

In [39]:
pipeline.fit(X_train, y_train)

y_pred = pipeline.predict(X_test)

from sklearn.metrics import classification_report

report = classification_report(y_test,y_pred)
print(report)

model.n_iter_

              precision    recall  f1-score   support

       Besni       0.87      0.86      0.86       129
     Kecimen       0.87      0.88      0.88       141

    accuracy                           0.87       270
   macro avg       0.87      0.87      0.87       270
weighted avg       0.87      0.87      0.87       270



array([1048], dtype=int32)