In [6]:
import pandas as pd

df = pd.read_csv("dataset/diamonds.csv", index_col=0)
df.head()

Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
1,0.23,Ideal,E,SI2,61.5,55.0,326,3.95,3.98,2.43
2,0.21,Premium,E,SI1,59.8,61.0,326,3.89,3.84,2.31
3,0.23,Good,E,VS1,56.9,65.0,327,4.05,4.07,2.31
4,0.29,Premium,I,VS2,62.4,58.0,334,4.2,4.23,2.63
5,0.31,Good,J,SI2,63.3,58.0,335,4.34,4.35,2.75


In [5]:
df["cut"].unique()

array(['Ideal', 'Premium', 'Good', 'Very Good', 'Fair'], dtype=object)

In [7]:
cut_class_dict = {"Fair": 1, "Good": 2, "Very Good": 3, "Premium": 4, "Ideal": 5}
clarity_dict = {"I3": 1, "I2": 2, "I1": 3, "SI2": 4, "SI1": 5, "VS2": 6, "VS1": 7, "VVS2": 8, "VVS1": 9, "IF": 10, "FL": 11}
color_dict = {"J": 1,"I": 2,"H": 3,"G": 4,"F": 5,"E": 6,"D": 7}

df["cut"] = df["cut"].map(cut_class_dict)
df["clarity"] = df["clarity"].map(clarity_dict)
df["color"] = df["color"].map(color_dict)

df.head()

Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
1,0.23,5,6,4,61.5,55.0,326,3.95,3.98,2.43
2,0.21,4,6,5,59.8,61.0,326,3.89,3.84,2.31
3,0.23,2,6,7,56.9,65.0,327,4.05,4.07,2.31
4,0.29,4,2,6,62.4,58.0,334,4.2,4.23,2.63
5,0.31,2,1,4,63.3,58.0,335,4.34,4.35,2.75


In [10]:
import sklearn
from sklearn import svm, preprocessing

df = sklearn.utils.shuffle(df)

X = df.drop("price", axis=1).values
X = preprocessing.scale(X)
y = df["price"].values

test_size = 200

X_train = X[:-test_size]
y_train = y[:-test_size]

X_test = X[-test_size:]
y_test = y[-test_size:]

clf = svm.SVR(kernel="linear")
clf.fit(X_train, y_train)

SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
  gamma='auto_deprecated', kernel='linear', max_iter=-1, shrinking=True,
  tol=0.001, verbose=False)

In [11]:
clf.score(X_test, y_test)

0.8944701541763354

In [13]:
for X,y in zip(X_test, y_test):
    print(f"Model: {clf.predict([X])[0]}, Actual: {y}")

Model: 1543.0456500117261, Actual: 1151
Model: 255.96369069641332, Actual: 599
Model: 1492.3858232327098, Actual: 1111
Model: 3614.8889440308194, Actual: 3282
Model: 721.7923990981885, Actual: 750
Model: 2496.393595814049, Actual: 2294
Model: 2442.889433650031, Actual: 2117
Model: 6116.4658087452735, Actual: 4791
Model: 2067.6837953600307, Actual: 1771
Model: 675.9424021596819, Actual: 967
Model: 3985.958496668828, Actual: 3968
Model: 1526.7869875597862, Actual: 1043
Model: 6760.554075681802, Actual: 8442
Model: 2407.047271246889, Actual: 2260
Model: 13452.979782414517, Actual: 17905
Model: 3686.3343842218387, Actual: 3655
Model: 861.0531613445546, Actual: 742
Model: 603.8776866555222, Actual: 1069
Model: -86.64585472898762, Actual: 471
Model: 1037.706787660477, Actual: 1089
Model: 11966.30187926441, Actual: 17393
Model: 7087.454561249773, Actual: 9240
Model: 8476.835340789778, Actual: 10153
Model: 670.9581862610712, Actual: 722
Model: -243.04959412935614, Actual: 506
Model: 3099.53269