In [1]:
import pandas as pd

df = pd.read_csv('datasets/diamonds.csv', index_col=0)
df.head()

Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
1,0.23,Ideal,E,SI2,61.5,55.0,326,3.95,3.98,2.43
2,0.21,Premium,E,SI1,59.8,61.0,326,3.89,3.84,2.31
3,0.23,Good,E,VS1,56.9,65.0,327,4.05,4.07,2.31
4,0.29,Premium,I,VS2,62.4,58.0,334,4.2,4.23,2.63
5,0.31,Good,J,SI2,63.3,58.0,335,4.34,4.35,2.75


In [6]:
df['clarity'].unique()

array(['SI2', 'SI1', 'VS1', 'VS2', 'VVS2', 'VVS1', 'I1', 'IF'],
      dtype=object)

In [2]:
cut_class_dict = {'Fair' : 1, 'Good' : 2, 'Very Good' : 3, 'Premium' : 4, 'Ideal' : 5}
clarity_dict = {'I3':1,'I2':2,'I1':3,'SI2':4,'SI1':5,'VS2':6,'VS1':7,'VVS2':8,'VVS1':9,'IF':10,'FL':11}
color_dict = {'J':1,'I':2,'H':3,'G':4,'F':5,'E':6,'D':7}

df['cut'] = df['cut'].map(cut_class_dict)
df['clarity'] = df['clarity'].map(clarity_dict)
df['color'] = df['color'].map(color_dict)

df.head()

Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
1,0.23,5,6,4,61.5,55.0,326,3.95,3.98,2.43
2,0.21,4,6,5,59.8,61.0,326,3.89,3.84,2.31
3,0.23,2,6,7,56.9,65.0,327,4.05,4.07,2.31
4,0.29,4,2,6,62.4,58.0,334,4.2,4.23,2.63
5,0.31,2,1,4,63.3,58.0,335,4.34,4.35,2.75


In [3]:
import sklearn
from sklearn import svm, preprocessing
from sklearn.model_selection import train_test_split

df = sklearn.utils.shuffle(df)

X = df.drop('price', axis=1).values
X = preprocessing.scale(X)
y = df['price'].values

test_size = 200

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

clf = svm.SVR(kernel='linear')
clf.fit(X_train, y_train)

SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
  gamma='auto_deprecated', kernel='linear', max_iter=-1, shrinking=True,
  tol=0.001, verbose=False)

In [4]:
clf.score(X_test, y_test)

0.8674578274355168

In [5]:
for X,y in zip(X_test, y_test):
    print(f"Model: {clf.predict([X])[0]}, Actual: {y}")
    

Model: 2526.0044966947567, Actual: 2079
Model: 4863.143396913037, Actual: 4588
Model: 363.7987861895308, Actual: 838
Model: 12020.429657059936, Actual: 14452
Model: 4769.832933467214, Actual: 4295
Model: 1965.4924742999988, Actual: 1608
Model: 629.3235164894691, Actual: 789
Model: 1060.826940979669, Actual: 1077
Model: 6458.15064933593, Actual: 7243
Model: 3081.32679836989, Actual: 2739
Model: 549.7592956692497, Actual: 716
Model: 9441.707493569003, Actual: 10736
Model: 1045.6169653254033, Actual: 1064
Model: 525.9197507463746, Actual: 945
Model: 8501.911415894418, Actual: 9513
Model: 1213.244921295572, Actual: 886
Model: 1590.7939164331315, Actual: 1273
Model: 1244.794611963151, Actual: 931
Model: 14699.38586826667, Actual: 16427
Model: 10146.03637097461, Actual: 8048
Model: 5981.707242606199, Actual: 5822
Model: 3893.1819734073397, Actual: 3393
Model: 4087.308703735865, Actual: 3342
Model: 2882.46053651736, Actual: 2742
Model: 3775.0374659241375, Actual: 2913
Model: 8722.718400645463

In [6]:
clf = svm.SVR(kernel='rbf')
clf.fit(X_train, y_train)



SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
  gamma='auto_deprecated', kernel='rbf', max_iter=-1, shrinking=True,
  tol=0.001, verbose=False)

In [7]:
clf.score(X_test, y_test)

0.5351887454072337

In [8]:
for X,y in zip(X_test, y_test):
    print(f"Model: {clf.predict([X])[0]}, Actual: {y}")

Model: 2609.3212509734167, Actual: 2079
Model: 4313.789444926354, Actual: 4588
Model: 428.18448295140297, Actual: 838
Model: 4969.243737057409, Actual: 14452
Model: 3774.44236142294, Actual: 4295
Model: 1700.3668627356087, Actual: 1608
Model: 848.8768193381657, Actual: 789
Model: 1334.194866918782, Actual: 1077
Model: 6156.429124405235, Actual: 7243
Model: 2475.6136885386895, Actual: 2739
Model: 566.0053066799483, Actual: 716
Model: 6767.828530487101, Actual: 10736
Model: 1125.1131607963794, Actual: 1064
Model: 489.3188525460155, Actual: 945
Model: 5259.046343159251, Actual: 9513
Model: 1251.2717130045173, Actual: 886
Model: 1657.6834613952497, Actual: 1273
Model: 1023.3191661640067, Actual: 931
Model: 4496.014525294591, Actual: 16427
Model: 4294.751420070947, Actual: 8048
Model: 5611.406655781746, Actual: 5822
Model: 3869.960889936888, Actual: 3393
Model: 3547.2404166122587, Actual: 3342
Model: 2751.2696574416946, Actual: 2742
Model: 3711.6317364733704, Actual: 2913
Model: 6102.015604