In [1]:
import pandas as pd

df = pd.read_csv("../datasets/diamonds.csv", index_col=0)
df.head()

Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
1,0.23,Ideal,E,SI2,61.5,55.0,326,3.95,3.98,2.43
2,0.21,Premium,E,SI1,59.8,61.0,326,3.89,3.84,2.31
3,0.23,Good,E,VS1,56.9,65.0,327,4.05,4.07,2.31
4,0.29,Premium,I,VS2,62.4,58.0,334,4.2,4.23,2.63
5,0.31,Good,J,SI2,63.3,58.0,335,4.34,4.35,2.75


In [3]:
cut_class_dict = {"Fair": 1, "Good": 2, "Very Good": 3, "Premium": 4, "Ideal": 5}
clarity_dict = {"I3": 1, "I2": 2, "I1": 3, "SI2": 4, "SI1": 5, "VS2": 6, "VS1": 7, "VVS2": 8, "VVS1": 9, "IF": 10, "FL": 11}
color_dict = {"J": 1,"I": 2,"H": 3,"G": 4,"F": 5,"E": 6,"D": 7}

df['cut'] = df['cut'].map(cut_class_dict)
df['clarity'] = df['clarity'].map(clarity_dict)
df['color'] = df['color'].map(color_dict)

df.head()

Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
1,0.23,5,6,4,61.5,55.0,326,3.95,3.98,2.43
2,0.21,4,6,5,59.8,61.0,326,3.89,3.84,2.31
3,0.23,2,6,7,56.9,65.0,327,4.05,4.07,2.31
4,0.29,4,2,6,62.4,58.0,334,4.2,4.23,2.63
5,0.31,2,1,4,63.3,58.0,335,4.34,4.35,2.75


In [8]:
import sklearn
from sklearn import svm, preprocessing


df = sklearn.utils.shuffle(df)

X = df.drop("price", axis=1).values
X = preprocessing.scale(X)
y = df['price'].values

test_size = 200

X_train = X[:-test_size]
y_train = y[:-test_size]

X_test = X[-test_size:]
y_test = y[-test_size:]


clf = svm.SVR(kernel="linear")
clf.fit(X_train,y_train)

SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',
    kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [10]:
clf.score(X_test, y_test)

0.8801819058221143

In [11]:
for X,y in zip(X_test, y_test):
    print(f"Model: {clf.predict([X])[0]}, Actual: {y}")

Model: -129.95540021219585, Actual: 491
Model: 3732.183410216282, Actual: 3624
Model: 2375.0984395900523, Actual: 1944
Model: 5984.614301082553, Actual: 5407
Model: 2155.711656463997, Actual: 1826
Model: 2039.1635295703384, Actual: 1754
Model: 1557.7661428020454, Actual: 1732
Model: 78.14807729028871, Actual: 698
Model: 6684.112753064848, Actual: 5437
Model: 1083.3650352810264, Actual: 1138
Model: 1876.5814167546366, Actual: 1580
Model: 12711.63831629706, Actual: 15684
Model: 4913.913291615041, Actual: 4719
Model: 456.6607932844631, Actual: 700
Model: 1626.0442506742222, Actual: 1187
Model: 563.0370078442425, Actual: 942
Model: 5694.523087046293, Actual: 7812
Model: 6191.601579123803, Actual: 7716
Model: 5586.112418734819, Actual: 7964
Model: 4497.141170375019, Actual: 2862
Model: 1189.0685526948482, Actual: 955
Model: 599.9205329290476, Actual: 607
Model: 5683.848084357305, Actual: 6780
Model: 12935.243671172524, Actual: 17473
Model: 11787.719171502249, Actual: 12884
Model: 5708.48329

In [12]:
clf = svm.SVR(kernel="rbf")
clf.fit(X_train,y_train)

SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [13]:
clf.score(X_test, y_test)

0.541927221621714

In [14]:
clf.score(X_test, y_test)

for X,y in zip(X_test, y_test):
    print(f"Model: {clf.predict([X])[0]}, Actual: {y}")

Model: 688.9134875655859, Actual: 491
Model: 3588.416289552665, Actual: 3624
Model: 1879.105037551953, Actual: 1944
Model: 5213.887719135729, Actual: 5407
Model: 2638.802740404067, Actual: 1826
Model: 1753.1184402009033, Actual: 1754
Model: 1371.9304486053313, Actual: 1732
Model: 635.9540170974078, Actual: 698
Model: 6150.049303554742, Actual: 5437
Model: 929.1670425831853, Actual: 1138
Model: 1631.2616077475536, Actual: 1580
Model: 5161.98494336942, Actual: 15684
Model: 4615.231220912682, Actual: 4719
Model: 930.4937288824362, Actual: 700
Model: 1262.575781000028, Actual: 1187
Model: 1541.3191155811955, Actual: 942
Model: 5225.943202048289, Actual: 7812
Model: 6148.771906564971, Actual: 7716
Model: 5701.742544266868, Actual: 7964
Model: 3395.5752128458075, Actual: 2862
Model: 960.3031848960641, Actual: 955
Model: 754.5512047472093, Actual: 607
Model: 5736.651497800247, Actual: 6780
Model: 5531.1033876828715, Actual: 17473
Model: 5569.936762511266, Actual: 12884
Model: 5139.55807289329