In [1]:
#import the libraries:
import pandas as pd 
import numpy as np
from sklearn import metrics
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import Lasso
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import BayesianRidge 
from sklearn.linear_model import ElasticNet

# Models:

In [2]:
# import the clean datasets
#%store -r dmd_cl1
%store -r dmd_cl2
#%store -r dmd_cl3
#%store -r dmd_cl4

In [4]:
X = dmd_cl2.drop(columns = 'price')
y = dmd_cl2['price']

#### Lets divide the dataset into train and test

In [5]:
from sklearn.model_selection import train_test_split

In [6]:
X_train, X_test, y_train, y_test = train_test_split( X, y, train_size=0.8)

In [7]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(X_train)

X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

## 1. LINEAR REGRESSION

In [8]:
#Train the model
lr = LinearRegression()
lr.fit(X_train, y_train)

LinearRegression()

In [9]:
y_pred_train_lr = lr.predict(X_train)
y_pred_test_lr = lr.predict(X_test)

In [10]:
#TRAIN:
print('TRAIN : Mean Absolute Error:', metrics.mean_absolute_error(y_train, y_pred_train_lr ))
print('TRAIN : Mean Squared Error:', metrics.mean_squared_error(y_train, y_pred_train_lr ))
print('TRAIN : R2 Score:', r2_score(y_train, y_pred_train_lr ))
print('TRAIN : Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_train, y_pred_train_lr )))
print ('----------------------------------------------------------')
#TEST:
print('TEST : Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred_test_lr ))
print('TEST : Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred_test_lr ))
print('TEST : R2 Score:', r2_score(y_test, y_pred_test_lr))
print('TEST : Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred_test_lr )))

TRAIN : Mean Absolute Error: 1304.1276987304614
TRAIN : Mean Squared Error: 6294601.205873628
TRAIN : R2 Score: 0.6076579129808737
TRAIN : Root Mean Squared Error: 2508.9043835653897
----------------------------------------------------------
TEST : Mean Absolute Error: 1266.569839559313
TEST : Mean Squared Error: 6067442.905997233
TEST : R2 Score: 0.6256858917778776
TEST : Root Mean Squared Error: 2463.217998066195


#### Cross Validation:

In [11]:
scores_lr = cross_val_score(lr, X_train, y_train, cv=3, scoring='neg_root_mean_squared_error')
scores_lr

array([-2545.25132343, -2543.44519036, -2441.9018544 ])

## 2. LASSO

In [12]:
lasso = Lasso()
lasso.fit(X_train, y_train)

Lasso()

In [13]:
y_pred_train_lasso = lasso.predict(X_train)
y_pred_test_lasso = lasso.predict(X_test)

In [14]:
#TRAIN:
print('TRAIN : Mean Absolute Error:', metrics.mean_absolute_error(y_train, y_pred_train_lasso ))
print('TRAIN : Mean Squared Error:', metrics.mean_squared_error(y_train, y_pred_train_lasso ))
print('TRAIN : R2 Score:', r2_score(y_train, y_pred_train_lasso ))
print('TRAIN : Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_train, y_pred_train_lasso )))
print ('----------------------------------------------------------')
#TEST:
print('TEST : Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred_test_lasso ))
print('TEST : Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred_test_lasso ))
print('TEST : R2 Score:', r2_score(y_test, y_pred_test_lasso))
print('TEST : Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred_test_lasso )))

TRAIN : Mean Absolute Error: 1304.2569434389693
TRAIN : Mean Squared Error: 6295077.037071797
TRAIN : R2 Score: 0.6076282544529936
TRAIN : Root Mean Squared Error: 2508.9992102573083
----------------------------------------------------------
TEST : Mean Absolute Error: 1266.6524174072574
TEST : Mean Squared Error: 6070185.257749001
TEST : R2 Score: 0.6255167099715881
TEST : Root Mean Squared Error: 2463.774595564497


#### Cross Validation:

In [15]:
scores_lasso = cross_val_score(lasso, X_train, y_train, cv=3, scoring='neg_root_mean_squared_error')
scores_lasso

array([-2545.21540917, -2543.39472488, -2442.23813216])

## 3. RIDGE

In [16]:
ridge = Ridge(alpha = 1)
ridge.fit(X_train, y_train)

Ridge(alpha=1)

In [17]:
y_pred_train_ridge = ridge.predict(X_train)
y_pred_test_ridge = ridge.predict(X_test)

In [18]:
#TRAIN:
print('TRAIN : Mean Absolute Error:', metrics.mean_absolute_error(y_train, y_pred_train_ridge ))
print('TRAIN : Mean Squared Error:', metrics.mean_squared_error(y_train, y_pred_train_ridge ))
print('TRAIN : R2 Score:', r2_score(y_train, y_pred_train_ridge ))
print('TRAIN : Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_train, y_pred_train_ridge )))
print ('----------------------------------------------------------')
#TEST:
print('TEST : Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred_test_ridge ))
print('TEST : Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred_test_ridge ))
print('TEST : R2 Score:', r2_score(y_test, y_pred_test_ridge))
print('TEST : Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred_test_ridge )))

TRAIN : Mean Absolute Error: 1304.1289031825029
TRAIN : Mean Squared Error: 6294601.4704032615
TRAIN : R2 Score: 0.6076578964927586
TRAIN : Root Mean Squared Error: 2508.9044362835466
----------------------------------------------------------
TEST : Mean Absolute Error: 1266.5712824910386
TEST : Mean Squared Error: 6067505.495254546
TEST : R2 Score: 0.6256820305067624
TEST : Root Mean Squared Error: 2463.230702807706


#### Cross validation : 

In [19]:
scores_ridge = cross_val_score(lasso, X_train, y_train, cv=3, scoring='neg_root_mean_squared_error')
scores_ridge

array([-2545.21540917, -2543.39472488, -2442.23813216])

## 4. DECISSION TREE

In [20]:
tree = DecisionTreeClassifier(max_depth = 3)
tree.fit(X_train, y_train)
y_pred_train_tree = tree.predict(X_train)
y_pred_test_tree = tree.predict(X_test)

In [21]:
from sklearn import metrics
#TRAIN:
print('TRAIN : Mean Absolute Error:', metrics.mean_absolute_error(y_train, y_pred_train_tree))
print('TRAIN : Mean Squared Error:', metrics.mean_squared_error(y_train, y_pred_train_tree))
print('TRAIN : R2 Score:', r2_score(y_train, y_pred_train_tree ))
print('TRAIN : Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_train, y_pred_train_tree)))
print ('----------------------------------------------------------')
#TEST:
print('TEST : Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred_test_tree ))
print('TEST : Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred_test_tree ))
print('TRAIN : R2 Score:', r2_score(y_test, y_pred_test_tree ))
print('TEST : Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred_test_tree )))

TRAIN : Mean Absolute Error: 3203.715733531084
TRAIN : Mean Squared Error: 25853845.100791004
TRAIN : R2 Score: -0.6114684969793465
TRAIN : Root Mean Squared Error: 5084.667649000376
----------------------------------------------------------
TEST : Mean Absolute Error: 3191.1120998640463
TEST : Mean Squared Error: 25916329.262884688
TRAIN : R2 Score: -0.5988362522272741
TEST : Root Mean Squared Error: 5090.80831134749


#### Cross validation:

In [22]:
scores_tree = cross_val_score(tree, X_train, y_train, cv=3, scoring='neg_root_mean_squared_error')
scores_tree



array([-5116.28029536, -5027.9022846 , -5109.29616017])

## 5. RANDOM FOREST

In [23]:
randforest = RandomForestRegressor(n_estimators=20, random_state=10)
randforest.fit(X_train, y_train)
y_pred_train_randforest = randforest.predict(X_train)
y_pred_test_randforest = randforest.predict(X_test)

In [24]:
#TRAIN:
print('TRAIN : Mean Absolute Error:', metrics.mean_absolute_error(y_train, y_pred_train_randforest))
print('TRAIN : Mean Squared Error:', metrics.mean_squared_error(y_train, y_pred_train_randforest))
print('TRAIN : R2 Score:', r2_score(y_train, y_pred_train_randforest ))
print('TRAIN : Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_train, y_pred_train_randforest)))
print ('----------------------------------------------------------')
#TEST:
print('TEST : Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred_test_randforest ))
print('TEST : Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred_test_randforest ))
print('TRAIN : R2 Score:', r2_score(y_test, y_pred_test_randforest ))
print('TEST : Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred_test_randforest )))

TRAIN : Mean Absolute Error: 135.4225700288636
TRAIN : Mean Squared Error: 86534.17960434596
TRAIN : R2 Score: 0.9946063301686568
TRAIN : Root Mean Squared Error: 294.16692472870903
----------------------------------------------------------
TEST : Mean Absolute Error: 317.1717787353647
TEST : Mean Squared Error: 410285.42191746016
TRAIN : R2 Score: 0.9746885756980469
TEST : Root Mean Squared Error: 640.5352620406312


#### Cross validation:

In [25]:
scores_randforest = cross_val_score(randforest, X_train, y_train, cv=3, scoring='neg_root_mean_squared_error')
scores_randforest

array([-765.54139728, -709.08608438, -698.50908566])

## 6. K-NEAREST NEIGHBOR

In [26]:
neiclass = KNeighborsClassifier(n_neighbors=5)
neiclass.fit(X_train, y_train)

KNeighborsClassifier()

In [27]:
y_pred_train_neiclass = neiclass.predict(X_train)
y_pred_test_neiclass = neiclass.predict(X_test)

In [28]:
#TRAIN:
print('TRAIN : Mean Absolute Error:', metrics.mean_absolute_error(y_train, y_pred_train_neiclass))
print('TRAIN : Mean Squared Error:', metrics.mean_squared_error(y_train, y_pred_train_neiclass))
print('TRAIN : R2 Score:', r2_score(y_train, y_pred_train_neiclass ))
print('TRAIN : Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_train, y_pred_train_neiclass)))
print ('----------------------------------------------------------')
#TEST:
print('TEST : Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred_test_neiclass ))
print('TEST : Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred_test_neiclass ))
print('TRAIN : R2 Score:', r2_score(y_test, y_pred_test_neiclass ))
print('TEST : Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred_test_neiclass )))

TRAIN : Mean Absolute Error: 1193.4719132369298
TRAIN : Mean Squared Error: 7779061.573878383
TRAIN : R2 Score: 0.5151315940241301
TRAIN : Root Mean Squared Error: 2789.09691009086
----------------------------------------------------------
TEST : Mean Absolute Error: 1340.5354097144975
TEST : Mean Squared Error: 8437910.519836856
TRAIN : R2 Score: 0.4794464487883492
TEST : Root Mean Squared Error: 2904.8081726401238


#### Cross validation:

In [29]:
scores_neighclass = cross_val_score(neiclass, X_train, y_train, cv=3, scoring='neg_root_mean_squared_error')
scores_neighclass



array([-3147.97585556, -3007.36937344, -3113.58211335])

## 7. BAYESIAN RIDGE

In [30]:
bay = BayesianRidge() 
bay.fit(X_train, y_train) 

BayesianRidge()

In [31]:
y_pred_train_bay = bay.predict(X_train)
y_pred_test_bay = bay.predict(X_test)

In [32]:
#TRAIN:
print('TRAIN : Mean Absolute Error:', metrics.mean_absolute_error(y_train, y_pred_train_bay))
print('TRAIN : Mean Squared Error:', metrics.mean_squared_error(y_train, y_pred_train_bay))
print('TRAIN : R2 Score:', r2_score(y_train, y_pred_train_bay ))
print('TRAIN : Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_train, y_pred_train_bay)))
print ('----------------------------------------------------------')
#TEST:
print('TEST : Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred_test_bay  ))
print('TEST : Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred_test_bay  ))
print('TRAIN : R2 Score:', r2_score(y_test, y_pred_test_bay ))
print('TEST : Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred_test_bay  )))

TRAIN : Mean Absolute Error: 1304.138337504394
TRAIN : Mean Squared Error: 6294618.973318725
TRAIN : R2 Score: 0.6076568055371672
TRAIN : Root Mean Squared Error: 2508.9079244401787
----------------------------------------------------------
TEST : Mean Absolute Error: 1266.5821107433321
TEST : Mean Squared Error: 6067971.429239308
TRAIN : R2 Score: 0.6256532859982935
TEST : Root Mean Squared Error: 2463.3252788130335


#### Cross validation:

In [33]:
scores_bay = cross_val_score(bay, X_train, y_train, cv=3, scoring='neg_root_mean_squared_error')
scores_bay

array([-2545.20989299, -2543.4612539 , -2441.95107842])

## 8. ELASTIC NET

In [34]:
elnet = ElasticNet(random_state=0)
elnet.fit(X_train, y_train)

ElasticNet(random_state=0)

In [35]:
y_pred_train_elnet = elnet.predict(X_train)
y_pred_test_elnet = elnet.predict(X_test)

In [36]:
#TRAIN:
print('TRAIN : Mean Absolute Error:', metrics.mean_absolute_error(y_train, y_pred_train_elnet))
print('TRAIN : Mean Squared Error:', metrics.mean_squared_error(y_train, y_pred_train_elnet))
print('TRAIN : R2 Score:', r2_score(y_train, y_pred_train_elnet ))
print('TRAIN : Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_train, y_pred_train_elnet)))
print ('----------------------------------------------------------')
#TEST:
print('TEST : Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred_test_elnet))
print('TEST : Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred_test_elnet))
print('TRAIN : R2 Score:', r2_score(y_test, y_pred_test_elnet ))
print('TEST : Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred_test_elnet)))

TRAIN : Mean Absolute Error: 1702.5391464563374
TRAIN : Mean Squared Error: 7563046.926126579
TRAIN : R2 Score: 0.5285957730807097
TRAIN : Root Mean Squared Error: 2750.099439316073
----------------------------------------------------------
TEST : Mean Absolute Error: 1684.2830634220206
TEST : Mean Squared Error: 7491384.926264203
TRAIN : R2 Score: 0.53783972730068
TEST : Root Mean Squared Error: 2737.0394455075366


#### Cross validation:

In [37]:
scores_elnet = cross_val_score(elnet, X_train, y_train, cv=3, scoring='neg_root_mean_squared_error')
scores_elnet

array([-2752.05090597, -2797.38359603, -2704.55047918])

## Modify train_size

In [38]:
X_train2, X_test2, y_train2, y_test2 = train_test_split( X, y, train_size=0.9)

## 1.1 LINEAR REGRESSION

In [39]:
#We will try it with the model with the lower rmse, in this case : linear regression

In [40]:
#Train the model
lr_2 = LinearRegression()
lr_2.fit(X_train2, y_train2)

LinearRegression()

In [41]:
y_pred_train_lr_2 = lr_2.predict(X_train2)
y_pred_test_lr_2 = lr_2.predict(X_test2)

In [42]:
#TRAIN:
print('TRAIN : Mean Absolute Error:', metrics.mean_absolute_error(y_train2, y_pred_train_lr_2  ))
print('TRAIN : Mean Squared Error:', metrics.mean_squared_error(y_train2, y_pred_train_lr_2  ))
print('TRAIN : R2 Score:', r2_score(y_train2, y_pred_train_lr_2  ))
print('TRAIN : Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_train2, y_pred_train_lr_2  )))
print ('----------------------------------------------------------')
#TEST:
print('TEST : Mean Absolute Error:', metrics.mean_absolute_error(y_test2, y_pred_test_lr_2 ))
print('TEST : Mean Squared Error:', metrics.mean_squared_error(y_test2, y_pred_test_lr_2 ))
print('TEST : R2 Score:', r2_score(y_test2, y_pred_test_lr_2))
print('TEST : Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test2, y_pred_test_lr_2 )))

TRAIN : Mean Absolute Error: 1298.0266125646754
TRAIN : Mean Squared Error: 6285504.438870951
TRAIN : R2 Score: 0.6095864260297459
TRAIN : Root Mean Squared Error: 2507.0908317950807
----------------------------------------------------------
TEST : Mean Absolute Error: 1262.7281545652138
TEST : Mean Squared Error: 5912999.515263979
TEST : R2 Score: 0.6274497424063271
TEST : Root Mean Squared Error: 2431.6659958275477


#### Cross validation:

In [43]:
scores_lr_2 = cross_val_score(lr_2, X_train2, y_train2, cv=3, scoring='neg_root_mean_squared_error')
scores_lr_2

array([-2519.701224  , -2552.80215199, -2451.76998844])

# Lets introduce the second dataset

In [44]:
#import the dataset:
dmd_pred = pd.read_csv("../resources/predict.csv",index_col="id")

In [45]:
dmd_pred.head()

Unnamed: 0_level_0,carat,cut,color,clarity,depth,table,x,y,z
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,0.45,Premium,E,SI1,62.8,58.0,4.88,4.84,3.05
1,1.23,Ideal,H,SI1,61.0,56.0,6.96,6.92,4.23
2,0.33,Ideal,I,IF,61.8,55.0,4.46,4.47,2.76
3,0.51,Premium,D,VS2,58.0,60.0,5.29,5.26,3.06
4,0.4,Premium,E,VS2,62.2,59.0,4.71,4.74,2.94


In [46]:
# Same steps as with the dataset above:

In [47]:
dmd_pred = dmd_pred.drop(columns = ['x','y','z'])

In [48]:
#Value that are objcts have to be transformed to values that can be used with linear regression
#We create the dummies for the columns we want the type to change
dmd_cl1_dummy = pd.get_dummies(dmd_pred[['cut','color','clarity']], drop_first=True)
dmd_cl1_dummy.head()

dmd_pred = dmd_pred.drop(columns = ['cut','color','clarity'])

#I now join the dummies to the rest of the table
dmd_pred=dmd_pred.join(dmd_cl1_dummy)


In [49]:
#Replace outliers with the column mean:
mean_c = dmd_pred['carat'].mean()
mean_d = dmd_pred['depth'].mean()
mean_t = dmd_pred['table'].mean()

In [50]:
dmd_pred.loc[(dmd_pred.carat > 2),'carat'] = mean_c

dmd_pred.loc[(dmd_pred.depth > 64),'depth'] = mean_d
dmd_pred.loc[(dmd_pred.depth < 57),'depth'] = mean_d

dmd_pred.loc[(dmd_pred.depth > 64),'depth'] = mean_d
dmd_pred.loc[(dmd_pred.depth < 51),'depth'] = mean_d

In [51]:
dmd_pred.head()

Unnamed: 0_level_0,carat,depth,table,cut_Good,cut_Ideal,cut_Premium,cut_Very Good,color_E,color_F,color_G,color_H,color_I,color_J,clarity_IF,clarity_SI1,clarity_SI2,clarity_VS1,clarity_VS2,clarity_VVS1,clarity_VVS2
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
0,0.45,62.8,58.0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0
1,1.23,61.0,56.0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0
2,0.33,61.8,55.0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0
3,0.51,58.0,60.0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0
4,0.4,62.2,59.0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0


In [52]:
y_pred_predict_test = lr_2 .predict(dmd_pred)

In [53]:
y_pred_predict_test

array([1236.49149738, 7455.98457232, 1475.66098316, ..., 1149.61602761,
        625.65469808, 7029.86299058])

In [54]:
dmd_pred['price'] = y_pred_predict_test
dmd_pred.head()

Unnamed: 0_level_0,carat,depth,table,cut_Good,cut_Ideal,cut_Premium,cut_Very Good,color_E,color_F,color_G,...,color_I,color_J,clarity_IF,clarity_SI1,clarity_SI2,clarity_VS1,clarity_VS2,clarity_VVS1,clarity_VVS2,price
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0.45,62.8,58.0,0,0,1,0,1,0,0,...,0,0,0,1,0,0,0,0,0,1236.491497
1,1.23,61.0,56.0,0,1,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,7455.984572
2,0.33,61.8,55.0,0,1,0,0,0,0,0,...,1,0,1,0,0,0,0,0,0,1475.660983
3,0.51,58.0,60.0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,1,0,0,2505.469341
4,0.4,62.2,59.0,0,0,1,0,1,0,0,...,0,0,0,0,0,0,1,0,0,1334.302173


In [55]:
#export

In [56]:
dmd_pred["price"].to_csv("final2.csv",header= True )