In [1]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix 

In [2]:
def RunSVM(train_dataset_path, test_size):
    train_df = pd.read_csv(train_dataset_path)

    le = LabelEncoder()
    train_df['label'] = le.fit_transform(train_df['label'])

    y = np.array(train_df['label'], dtype=np.float32)
    X = np.array(train_df.drop('label',axis=1), dtype=np.float32)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)

    print("data shapes: ", train_dataset_path)
    print("original X", X.shape)
    print("original y", y.shape)
    print("X_train", X_train.shape)
    print("X_test", X_test.shape)
    print("y_train", y_train.shape)
    print("y_test", y_test.shape)

    svm_model_linear = SVC(kernel = 'linear', C = 1).fit(X_train, y_train)
    svm_predictions = svm_model_linear.predict(X_test)
    # model accuracy for X_test 
    accuracy = svm_model_linear.score(X_test, y_test) 
    cm = confusion_matrix(y_test, svm_predictions) 
    print("====================================")
    print("Accuracy",accuracy)
    print("confusion_matrix")
    print(cm)

    # Find the indexes of failing predictions
    #failing_indexes = np.where(svm_predictions != y_test)[0]
    # Get the failing rows from X_test
    #failing_rows = X_test[failing_indexes]
    # Print the failing rows
    #print("failing_rows:")
    #print(failing_rows)


In [8]:
train_dataset_path = "D:/pythonProject/Image-research/vision/colorBased-40_data.csv"
test_size = 0.25
RunSVM(train_dataset_path, test_size)
print("===============================================================================")
print("===============================================================================")
train_dataset_path = "D:/pythonProject/Image-research/vision/colorBased-50_data.csv"
test_size = 0.25
RunSVM(train_dataset_path, test_size)

data shapes:  D:/pythonProject/Image-research/vision/colorBased-40_data.csv
original X (40, 24)
original y (40,)
X_train (30, 24)
X_test (10, 24)
y_train (30,)
y_test (10,)
Accuracy 1.0
confusion_matrix
[[6 0]
 [0 4]]
data shapes:  D:/pythonProject/Image-research/vision/colorBased-50_data.csv
original X (50, 24)
original y (50,)
X_train (37, 24)
X_test (13, 24)
y_train (37,)
y_test (13,)
Accuracy 0.9230769230769231
confusion_matrix
[[7 0]
 [1 5]]


In [11]:
train_dataset_path = "D:/pythonProject/Image-research/vision/colorBased-b-50_data.csv"
test_size = 0.25
RunSVM(train_dataset_path, test_size)

data shapes:  D:/pythonProject/Image-research/vision/colorBased-b-50_data.csv
original X (50, 24)
original y (50,)
X_train (37, 24)
X_test (13, 24)
y_train (37,)
y_test (13,)
Accuracy 0.8461538461538461
confusion_matrix
[[6 1]
 [1 5]]


In [12]:
train_dataset_path = "D:/pythonProject/Image-research/vision/colorBased-mixed_data.csv"
test_size = 0.25
RunSVM(train_dataset_path, test_size)

data shapes:  D:/pythonProject/Image-research/vision/colorBased-mixed_data.csv
original X (100, 24)
original y (100,)
X_train (75, 24)
X_test (25, 24)
y_train (75,)
y_test (25,)
Accuracy 0.8
confusion_matrix
[[5 1 1 0]
 [0 4 0 3]
 [0 0 7 0]
 [0 0 0 4]]


In [79]:
train_dataset_path = "D:/pythonProject/Image-research/vision/train-40_data.csv"
test_size = 0.25
RunSVM(train_dataset_path, test_size)

data shapes:
original X (40, 24)
original y (40,)
X_train (30, 24)
X_test (10, 24)
y_train (30,)
y_test (10,)
Accuracy 0.9
confusion_matrix
[[6 0]
 [1 3]]


In [80]:
train_dataset_path = "D:/pythonProject/Image-research/vision/train-50_data.csv"
test_size = 0.25
RunSVM(train_dataset_path, test_size)

data shapes:
original X (50, 24)
original y (50,)
X_train (37, 24)
X_test (13, 24)
y_train (37,)
y_test (13,)
Accuracy 1.0
confusion_matrix
[[7 0]
 [0 6]]


In [61]:
train_dataset_path = "D:/pythonProject/Image-research/vision/train-50b_data.csv"
test_size = 0.25
RunSVM(train_dataset_path, test_size)

data shapes:
original X (40, 24)
original y (40,)
X_train (30, 24)
X_test (10, 24)
y_train (30,)
y_test (10,)
Accuracy 0.8
confusion_matrix
[[6 2]
 [0 2]]


In [78]:
train_dataset_path = "D:/pythonProject/Image-research/vision/trainColor-50_data.csv"
test_size = 0.25
RunSVM(train_dataset_path, test_size)

data shapes:
original X (50, 24)
original y (50,)
X_train (37, 24)
X_test (13, 24)
y_train (37,)
y_test (13,)
Accuracy 0.5384615384615384
confusion_matrix
[[2 5]
 [1 5]]


In [98]:
train_dataset_path = "D:/pythonProject/Image-research/vision/train-b-50_data.csv"
test_size = 0.25
RunSVM(train_dataset_path, test_size)

data shapes:
original X (50, 24)
original y (50,)
X_train (37, 24)
X_test (13, 24)
y_train (37,)
y_test (13,)
Accuracy 0.9230769230769231
confusion_matrix
[[7 0]
 [1 5]]


In [100]:
train_dataset_path = "D:/pythonProject/Image-research/vision/train-mixed_data.csv"
test_size = 0.25
RunSVM(train_dataset_path, test_size)

data shapes:
original X (100, 24)
original y (100,)
X_train (75, 24)
X_test (25, 24)
y_train (75,)
y_test (25,)
Accuracy 0.6
confusion_matrix
[[6 0 1 0]
 [0 4 0 3]
 [3 1 3 0]
 [0 2 0 2]]


In [None]:
#observaciones
#es mejor tener una cantidad de fotos menor, pero de buena calidad donde se pueda apreciar la enfermedad
#el algoritmo tiene problemas con plantas que se ven muy similares a la hora de la clasificacion
#el algoritmo no tiene en cuenta la figura de la planta entonces multiples clases pueden parecer similares