SVM on data sets (Iris, Ecoli, SPECTF heart)
Links:
1. SPECTF heart: https://archive.ics.uci.edu/ml/machine-learning-databases/spect/SPECTF.train
2. Iris: https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data
3. Ecoli: https://archive.ics.uci.edu/ml/machine-learning-databases/ecoli/ecoli.data

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import tree
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

def delete_DublicateInstances(theDataset):
    theDataset.drop_duplicates(inplace=True)

def handle_MissingValues(theDataSet):
    for i in range(len(theDataSet.columns)):
        theDataSet.iloc[:,i].fillna(theDataSet.iloc[:,i].mean(), inplace=True)

In [2]:
#Iris dataSet 
url_iris = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
irisDataSet = pd.read_csv(url_iris, names=['Sepal length', 'Sepal width', 'Petal length', 'Petal width', 'class'])
delete_DublicateInstances(irisDataSet)
x_irisDataSet = irisDataSet.drop('class', axis=1)
y_irisDataSet = irisDataSet['class']
x_train_irisDataSet, x_test_irisDataSet, y_train_irisDataSet, y_test_irisDataSet = train_test_split(x_irisDataSet, y_irisDataSet, test_size=0.3)
SVM_irisDataSet = SVC()
SVM_irisDataSet.fit(x_train_irisDataSet, y_train_irisDataSet)
SVM_prediction_irisDataSet = SVM_irisDataSet.predict(x_test_irisDataSet)

In [3]:
#BezdekIris dataSet 
url_bezdekIris = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/bezdekIris.data'
bezdekIrisDataSet = pd.read_csv(url_bezdekIris, names=['Sepal length', 'Sepal width', 'Petal length', 'Petal width', 'class'])
delete_DublicateInstances(bezdekIrisDataSet)
x_bezdekIrisDataSet = bezdekIrisDataSet.drop('class', axis=1)
y_bezdekIrisDataSet = bezdekIrisDataSet['class']
x_train_bezdekIrisDataSet, x_test_bezdekIrisDataSet, y_train_bezdekIrisDataSet, y_test_bezdekIrisDataSet = train_test_split(x_bezdekIrisDataSet, y_bezdekIrisDataSet, test_size=0.3)
SVM_bezdekIrisDataSet = SVC()
SVM_bezdekIrisDataSet.fit(x_train_bezdekIrisDataSet, y_train_bezdekIrisDataSet)
SVM_prediction_bezdekIrisDataSet = SVM_bezdekIrisDataSet.predict(x_test_bezdekIrisDataSet)

In [4]:
#Ecoli dataSet 
url_ecoli = 'https://archive.ics.uci.edu/ml/machine-learning-databases/ecoli/ecoli.data'
ecoliDataSet = pd.read_csv(url_ecoli, names=['Sequence Name', 'mcg', 'gvh', 'lip', 'chg', 'aac', 'alm1', 'alm2', 'class'], sep='\s+')
delete_DublicateInstances(ecoliDataSet)
x_ecoliDataSet = ecoliDataSet.drop(columns=['Sequence Name','class'], axis=1)
y_ecoliDataSet = ecoliDataSet['class']
x_train_ecoliDataSet, x_test_ecoliDataSet, y_train_ecoliDataSet, y_test_ecoliDataSet = train_test_split(x_ecoliDataSet, y_ecoliDataSet, test_size=0.3)
SVM_ecoliDataSet = SVC()
SVM_ecoliDataSet.fit(x_train_ecoliDataSet, y_train_ecoliDataSet)
SVM_prediction_ecoliDataSet = SVM_ecoliDataSet.predict(x_test_ecoliDataSet)

In [5]:
#SPECTF Heart DataSet 
url_train_SPECTFDataSet = 'https://archive.ics.uci.edu/ml/machine-learning-databases/spect/SPECTF.train'
train_SPECTFDataSet = pd.read_csv(url_train_SPECTFDataSet, names=['OVERALL_DIAGNOSIS', 'F1R', 'F1S', 'F2R', 'F2S', 'F3R', 'F3S', 'F4R', 'F4S', 'F5R', 'F5S', 'F6R', 'F6S', 'F7R', 'F7S', 'F8R', 'F8S', 'F9R', 'F9S', 'F10R', 'F10S', 'F11R', 'F11S', 'F12R', 'F12S', 'F13R', 'F13S', 'F14R', 'F14S', 'F15R', 'F15S', 'F16R', 'F16S', 'F17R', 'F17S', 'F18R', 'F18S', 'F19R', 'F19S', 'F20R', 'F20S', 'F21R', 'F21S', 'F22R', 'F22S'])
url_test_SPECTFDataSet = 'https://archive.ics.uci.edu/ml/machine-learning-databases/spect/SPECTF.test'
test_SPECTFDataSet = pd.read_csv(url_test_SPECTFDataSet, names=['OVERALL_DIAGNOSIS', 'F1R', 'F1S', 'F2R', 'F2S', 'F3R', 'F3S', 'F4R', 'F4S', 'F5R', 'F5S', 'F6R', 'F6S', 'F7R', 'F7S', 'F8R', 'F8S', 'F9R', 'F9S', 'F10R', 'F10S', 'F11R', 'F11S', 'F12R', 'F12S', 'F13R', 'F13S', 'F14R', 'F14S', 'F15R', 'F15S', 'F16R', 'F16S', 'F17R', 'F17S', 'F18R', 'F18S', 'F19R', 'F19S', 'F20R', 'F20S', 'F21R', 'F21S', 'F22R', 'F22S'])
delete_DublicateInstances(train_SPECTFDataSet)
delete_DublicateInstances(test_SPECTFDataSet)
x_train_SPECTFDataSet = train_SPECTFDataSet.drop(columns=['OVERALL_DIAGNOSIS'], axis=1)
y_train_SPECTFDataSet = train_SPECTFDataSet['OVERALL_DIAGNOSIS']
x_test_SPECTFDataSet = test_SPECTFDataSet.drop(columns=['OVERALL_DIAGNOSIS'], axis=1)
y_test_SPECTFDataSet = test_SPECTFDataSet['OVERALL_DIAGNOSIS']
handle_MissingValues(x_train_SPECTFDataSet)
handle_MissingValues(x_test_SPECTFDataSet)
SVM_SPECTFDataSet = SVC()
SVM_SPECTFDataSet.fit(x_train_SPECTFDataSet, y_train_SPECTFDataSet)
SVM_prediction_SPECTFDataSet = SVM_SPECTFDataSet.predict(x_test_SPECTFDataSet)