In [1]:
# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix

# Import dataset
df = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv')

In [2]:
# Drop missing rows
df.dropna(axis = 0, how = 'any', subset = None, inplace = True)

# Convert non-numeric data using one-hot encoding
df = pd.get_dummies(df, columns=['sex', 'island'])

In [3]:
# Scale independent variables by dropping the dependent variable (sex)
scaler = StandardScaler()
scaler.fit(df.drop('species',axis=1))
scaled_features = scaler.transform(df.drop('species',axis=1))

In [4]:
# Assign X and y variables
X = scaled_features
y = df['species']

# Split data into test and training sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=True)

In [5]:
# Assign algorithm 
model = SVC()

# Fit algorithm to data
model.fit(X_train, y_train)

# Run algorithm on test data to make predictions
model_test = model.predict(X_test)

In [6]:
# Evaluate predictions
print(confusion_matrix(y_test, model_test)) 
print(classification_report(y_test, model_test))

[[46  0  0]
 [ 1 19  0]
 [ 0  0 34]]
              precision    recall  f1-score   support

      Adelie       0.98      1.00      0.99        46
   Chinstrap       1.00      0.95      0.97        20
      Gentoo       1.00      1.00      1.00        34

    accuracy                           0.99       100
   macro avg       0.99      0.98      0.99       100
weighted avg       0.99      0.99      0.99       100



In [7]:
# Data point to predict
penguin = [
	39, #bill_length_mm
	18.5, #bill_depth_mm
	180, #flipper_length_mm 
	3750, #body_mass_g
	0, #island_Biscoe    
	0, #island_Dream
	1, #island_Torgersen    
	1, #sex_Male
	0, #sex_Female
]

# Make prediction
new_penguin = model.predict([penguin])
new_penguin

array(['Adelie'], dtype=object)