In [1]:
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder

In [2]:
diamonds_df = pd.read_csv('../data/diamonds_train.csv')

In [3]:
diamonds_predict = pd.read_csv('../data/diamonds_test.csv')

In [4]:
#train

diamonds_df['cut']=diamonds_df['cut'].map({'Fair':0,'Good':1,'Very Good':2,'Premium':3, 'Ideal':4})
diamonds_df['color']=diamonds_df['color'].map({'J':0, 'I':1, 'H':2, 'G':3, 'F': 4, 'E': 5, 'D':6})
diamonds_df['clarity']=diamonds_df['clarity'].map({'I1':0,'SI2':1,'SI1':2,'VS2':3,'VS1':4,'VVS2':5,'VVS1':6,'IF':7})

#predict

diamonds_predict['cut']=diamonds_predict['cut'].map({'Fair':0,'Good':1,'Very Good':2,'Premium':3, 'Ideal':4})
diamonds_predict['color']=diamonds_predict['color'].map({'J':0, 'I':1, 'H':2, 'G':3, 'F': 4, 'E': 5, 'D':6})
diamonds_predict['clarity']=diamonds_predict['clarity'].map({'I1':0,'SI2':1,'SI1':2,'VS2':3,'VS1':4,'VVS2':5,'VVS1':6,'IF':7})

In [5]:
sc = StandardScaler()
columns_df = ['cut', 'color', 'clarity', 'carat', 'depth', 'table', 'x', 'y', 'z']
X = sc.fit_transform(diamonds_df[columns_df])
y = diamonds_df['price'].values

In [7]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X, y)

In [10]:
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor
from sklearn.ensemble import BaggingRegressor
from sklearn.metrics import mean_squared_error,r2_score
from sklearn.model_selection import KFold, RepeatedKFold
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler

### MODELOS

In [11]:
model = GradientBoostingRegressor(n_estimators=100)
model.fit(x_train, y_train)
y_pred = model.predict(x_test)

In [12]:
print(f"Mean squared error: {mean_squared_error(y_test, y_pred, squared=False)}")
R2 = r2_score(y_test,y_pred)
print(f"R Squared: {R2}")

Mean squared error: 616.0821423704786
R Squared: 0.9760795880378982


In [13]:
model = BaggingRegressor(n_estimators=100)
model.fit(x_train, y_train)
y_pred = model.predict(x_test)

In [14]:
print(f"Mean squared error: {mean_squared_error(y_test, y_pred, squared=False)}")
R2 = r2_score(y_test,y_pred)
print(f"R Squared: {R2}")

Mean squared error: 558.414536176828
R Squared: 0.9803480865134572


In [15]:
model = AdaBoostRegressor(n_estimators=100)
model.fit(x_train, y_train)
y_pred = model.predict(x_test)

In [16]:
print(f"Mean squared error: {mean_squared_error(y_test, y_pred, squared=False)}")
R2 = r2_score(y_test,y_pred)
print(f"R Squared: {R2}")

Mean squared error: 1390.3117636837642
R Squared: 0.8781807816533956


In [17]:
model = RandomForestRegressor(n_estimators=100)
model.fit(x_train, y_train)
y_pred = model.predict(x_test)

In [18]:
print(f"Mean squared error: {mean_squared_error(y_test, y_pred, squared=False)}")
R2 = r2_score(y_test,y_pred)
print(f"R Squared: {R2}")

Mean squared error: 561.182445355244
R Squared: 0.9801527852879849


In [19]:
import lightgbm as lgb

In [25]:
model = lgb.LGBMRegressor()
model.fit(x_train, y_train)
y_pred = model.predict(x_test)

In [26]:
print(f"Mean squared error: {mean_squared_error(y_test, y_pred, squared=False)}")
R2 = r2_score(y_test,y_pred)
print(f"R Squared: {R2}")

Mean squared error: 540.8860663707873
R Squared: 0.9815624589035227


In [32]:
from sklearn.model_selection import cross_val_score
scores = cross_val_score(model, 
                         X, 
                         y, 
                         scoring='neg_root_mean_squared_error', 
                         cv=5, n_jobs=-1)

In [33]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()


X_test = sc.transform(diamonds_predict[columns_df].values)
y_hat = model.predict(X_test).clip(0, 20000)
submission = pd.DataFrame({'id': diamonds_predict['id'], 'price': y_hat})
submission.to_csv('../data/resultados4.csv', index=False)
