In [1]:
import pandas as pd

df = pd.read_csv('diamonds.csv', index_col=0)
df.head()

Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
1,0.23,Ideal,E,SI2,61.5,55.0,326,3.95,3.98,2.43
2,0.21,Premium,E,SI1,59.8,61.0,326,3.89,3.84,2.31
3,0.23,Good,E,VS1,56.9,65.0,327,4.05,4.07,2.31
4,0.29,Premium,I,VS2,62.4,58.0,334,4.2,4.23,2.63
5,0.31,Good,J,SI2,63.3,58.0,335,4.34,4.35,2.75


In [2]:
df['cut'].unique()

array(['Ideal', 'Premium', 'Good', 'Very Good', 'Fair'], dtype=object)

In [3]:
df['cut'].astype('category').cat.codes

1        2
2        3
3        1
4        3
5        1
        ..
53936    2
53937    1
53938    4
53939    3
53940    2
Length: 53940, dtype: int8

In [4]:
cut_class_dict = {"Fair": 1, "Good": 2, "Very Good": 3, "Premium": 4, "Ideal": 5}
clarity_dict = {"I3": 1, "I2": 2, "I1": 3, "SI2": 4, "SI1": 5, "VS2": 6, "VS1": 7, "VVS2": 8, "VVS1": 9, "IF": 10, "FL": 11}
color_dict = {"J": 1,"I": 2,"H": 3,"G": 4,"F": 5,"E": 6,"D": 7}

df['cut'] = df['cut'].map(cut_class_dict)
df['clarity'] = df['clarity'].map(clarity_dict)
df['color'] = df['color'].map(color_dict)

df.head()

Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
1,0.23,5,6,4,61.5,55.0,326,3.95,3.98,2.43
2,0.21,4,6,5,59.8,61.0,326,3.89,3.84,2.31
3,0.23,2,6,7,56.9,65.0,327,4.05,4.07,2.31
4,0.29,4,2,6,62.4,58.0,334,4.2,4.23,2.63
5,0.31,2,1,4,63.3,58.0,335,4.34,4.35,2.75


In [8]:
import sklearn
from sklearn import svm, preprocessing

df = sklearn.utils.shuffle(df)

X = df.drop('price', axis=1).values
X = preprocessing.scale(X)
y = df['price'].values

test_size = 200

X_train = X[:-test_size]
y_train = y[:-test_size]

X_test = X[-test_size:]
y_test = y[-test_size:]

clf = svm.SVR(kernel='linear')
clf.fit(X_train, y_train)

SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
    gamma='auto_deprecated', kernel='linear', max_iter=-1, shrinking=True,
    tol=0.001, verbose=False)

In [6]:
X

array([[1.51, 2.  , 4.  , ..., 7.2 , 7.26, 4.58],
       [0.32, 3.  , 7.  , ..., 4.45, 4.49, 2.68],
       [1.01, 3.  , 7.  , ..., 6.33, 6.37, 4.02],
       ...,
       [0.31, 5.  , 6.  , ..., 4.4 , 4.37, 2.65],
       [0.7 , 2.  , 6.  , ..., 5.65, 5.6 , 3.5 ],
       [0.33, 5.  , 5.  , ..., 4.43, 4.46, 2.76]])

In [7]:
y

array([10085,   809,  5622, ...,   942,  1860,   666])

In [9]:
clf.score(X_test, y_test)

0.8870587646939209

In [10]:
for X, y in zip(X_test, y_test):
    print(f'Model: {clf.predict([X])[0]}, Actual: {y}')

Model: 1774.3136651376412, Actual: 1607
Model: 11543.852106328852, Actual: 10416
Model: 1557.1858490120908, Actual: 1238
Model: 2316.1532315327854, Actual: 2083
Model: 507.22428845237164, Actual: 854
Model: 8242.75081802294, Actual: 6300
Model: 467.5683081510083, Actual: 741
Model: 2517.1946503181125, Actual: 2337
Model: -249.23438957900498, Actual: 516
Model: 3955.0522023598755, Actual: 3360
Model: 1893.877773739364, Actual: 1590
Model: 222.98147112147126, Actual: 745
Model: 9490.395019685704, Actual: 13553
Model: 487.3954765173503, Actual: 694
Model: -186.91430841257352, Actual: 518
Model: 952.8639218626117, Actual: 1035
Model: 938.2707429990228, Actual: 791
Model: 5049.787978881978, Actual: 5666
Model: 54.44876132492709, Actual: 662
Model: 13472.011908827984, Actual: 12811
Model: 5982.7090718802865, Actual: 8317
Model: 5152.117635220179, Actual: 5068
Model: 12918.962487819312, Actual: 13403
Model: 7126.662649884535, Actual: 10004
Model: 909.2025604797846, Actual: 1105
Model: 4.87272

In [11]:
clf = svm.SVR(kernel='rbf')
clf.fit(X_train, y_train)



SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
    gamma='auto_deprecated', kernel='rbf', max_iter=-1, shrinking=True,
    tol=0.001, verbose=False)

In [12]:
print(clf.score(X_test, y_test))

for X, y in zip(X_test, y_test):
    print(f'Model: {clf.predict([X])[0]}, Actual: {y}')

0.593796980590623
Model: 1506.3574682953101, Actual: 1607
Model: 5385.9882165705785, Actual: 10416
Model: 1185.0544144315463, Actual: 1238
Model: 2399.778356154997, Actual: 2083
Model: 576.3283259855821, Actual: 854
Model: 5037.0678854693415, Actual: 6300
Model: 945.9779887772002, Actual: 741
Model: 2729.1426164461523, Actual: 2337
Model: 997.7235157770883, Actual: 516
Model: 3852.2659922874336, Actual: 3360
Model: 1744.0135887924177, Actual: 1590
Model: 956.9102614557123, Actual: 745
Model: 7523.528389718668, Actual: 13553
Model: 913.9775497051114, Actual: 694
Model: 335.1143647408271, Actual: 518
Model: 1050.6738951741868, Actual: 1035
Model: 1025.6920599597029, Actual: 791
Model: 5183.790947044592, Actual: 5666
Model: 1220.5416532606205, Actual: 662
Model: 3525.9986689902207, Actual: 12811
Model: 5499.698917253146, Actual: 8317
Model: 5000.539356261534, Actual: 5068
Model: 6688.242700604767, Actual: 13403
Model: 6175.162387927276, Actual: 10004
Model: 1079.1746242410836, Actual: 110