In [None]:
# This is just a preamble that sets a bunch of options up.
%matplotlib inline

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

pd.set_option('display.mpl_style', 'default') # Make the graphs a bit prettier
plt.rcParams['figure.figsize'] = (15, 5)

In [None]:
# "Svm separating hyperplanes (SVG)" by User:ZackWeinberg, based on PNG version by User:Cyc
# This file was derived from:  Svm separating hyperplanes.png.
# Licensed under CC BY-SA 3.0 via Commons
# https://commons.wikimedia.org/wiki/File:Svm_separating_hyperplanes_(SVG).svg#/media/File:Svm_separating_hyperplanes_(SVG).svg

from IPython.display import Image
Image(filename='../data/svm.png', width=500)

In [None]:
dataset = pd.read_csv('../data/01_heights_weights_genders.csv')
print(dataset.head())

In [None]:
# Females
plt.plot(dataset[dataset['Gender']=='Female']['Height'], dataset[dataset['Gender']=='Female']['Weight'],
         '.', markersize=3, color='red')

# Males
plt.plot(dataset[dataset['Gender']=='Male']['Height'],dataset[dataset['Gender']=='Male']['Weight'],
         '.', markersize=3, color='blue')

In [None]:
from sklearn.cross_validation import train_test_split

# Split our dataset into 2 parts - 70% for training and 30% for testing
train, test = train_test_split(dataset, test_size = 0.3)

In [None]:
from sklearn import svm

# Train the classifier using fit()
clf = svm.SVC(kernel='linear')
clf.fit(train[['Height', 'Weight']], pd.Categorical.from_array(train['Gender']).labels)

In [None]:
plt.xlim(50, 80)
plt.ylim(50, 300)

height, weight = np.meshgrid(np.arange(50, 90, 1), np.arange(50, 300, 1))

gender = clf.predict(np.c_[height.ravel(), weight.ravel()])
gender = gender.reshape(height.shape)
cs = plt.contourf(height, weight, gender, len(train['Gender'].unique()), cmap=plt.cm.Paired, alpha=0.8)

# Females
plt.plot(dataset[dataset['Gender']=='Female']['Height'], dataset[dataset['Gender']=='Female']['Weight'],
         '.', markersize=3, color='red')

# Males
plt.plot(dataset[dataset['Gender']=='Male']['Height'],dataset[dataset['Gender']=='Male']['Weight'],
         '.', markersize=3, color='blue')

In [None]:
clf.fit(train[['Height', 'Weight']], pd.Categorical.from_array(train['Gender']))

In [None]:
# See how well we are classifying using score()
clf.score(test[['Height', 'Weight']], pd.Categorical.from_array(test['Gender']))