In [93]:
import pandas as pd
import numpy as np

In [94]:
df=pd.read_csv(r"data\gemstone.csv")

In [95]:
df.sample(2)

Unnamed: 0,id,carat,cut,color,clarity,depth,table,x,y,z,price
29036,29036,1.13,Premium,J,VS2,61.2,59.0,6.71,6.67,4.1,5049
66208,66208,0.31,Very Good,H,VVS2,62.2,58.0,4.29,4.36,2.69,717


In [96]:
df=df.drop(labels="id",axis=True)

In [97]:
x=df.drop(labels="price",axis=1)
y=df["price"]
categorical_columns=x.select_dtypes(include='object').columns
numerical_columns=x.select_dtypes(exclude='object').columns

In [98]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler,OrdinalEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer


In [99]:
numerical_pipeline=Pipeline(
    steps=[
        ('imputer',SimpleImputer()),
        ('scaler',StandardScaler())
    ]

)

In [100]:
cut_categories = ['Fair', 'Good', 'Very Good','Premium','Ideal']
color_categories = ['D', 'E', 'F', 'G', 'H', 'I', 'J']
clarity_categories = ['I1','SI2','SI1','VS2','VS1','VVS2','VVS1','IF']

In [101]:
categorical_pipeline=Pipeline(
    
        steps=[
        ('imputer',SimpleImputer(strategy='most_frequent')),
        ('ordinalencoder',OrdinalEncoder(categories=[cut_categories,color_categories,clarity_categories]))
    ]
    
)
    


In [102]:
preprocessor=ColumnTransformer([
    ('numerical_pipeline',numerical_pipeline,numerical_columns),
    ('categorical_pipeline',categorical_pipeline,categorical_columns)]

)

In [103]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=.30)

In [104]:
preprocessor.fit_transform(x_train)


array([[-0.82208996, -0.38796581,  0.92005842, ...,  3.        ,
         3.        ,  3.        ],
       [-1.06006905,  0.07323712, -1.16229678, ...,  4.        ,
         1.        ,  2.        ],
       [-1.03843459, -1.1258905 ,  0.92005842, ...,  2.        ,
         1.        ,  6.        ],
       ...,
       [ 1.53606652,  0.9034024 ,  0.39946962, ...,  2.        ,
         1.        ,  1.        ],
       [ 0.8437637 , -0.01900346, -0.12111918, ...,  4.        ,
         5.        ,  3.        ],
       [ 0.49761229, -0.84916874,  0.39946962, ...,  3.        ,
         0.        ,  2.        ]])

In [105]:
x_train=pd.DataFrame(preprocessor.fit_transform(x_train),columns=preprocessor.get_feature_names_out())
x_test=pd.DataFrame(preprocessor.transform(x_test),columns=preprocessor.get_feature_names_out())

In [106]:
from sklearn.linear_model import LinearRegression,Lasso,Ridge,ElasticNet
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error

In [107]:
models={'LinearRegression':LinearRegression(),
        'Lasso':Lasso(),
        'Ridge':Ridge(),
        'ElasticNet':ElasticNet()}

In [108]:
models.values()

dict_values([LinearRegression(), Lasso(), Ridge(), ElasticNet()])

In [109]:
models.keys()

dict_keys(['LinearRegression', 'Lasso', 'Ridge', 'ElasticNet'])

In [110]:
trained_model_list=list(),
model_list=list(),
r2_list=list()

In [111]:
type(model_list)

tuple

In [None]:
for i in range(len(list(models))):
    model=(list(models.values())[i])
    model.fit(x_train,y_train)
    y_pred=model.predict(x_test)
    mae=mean_absolute_error(y_test,y_pred)
    mse=mean_squared_error(y_test,y_pred)
    rmse=np.sqrt(mse)
    r2_square=r2_score(y_test,y_pred)
    print(list(models.keys())[i])
    #model_list.append(list(models.keys())[i])

    print('Model Training Performance')
    print("RMSE:",rmse)
    print("MAE:",mae)
    print("R2 score",r2_square*100)

    r2_list.append(r2_square)
    
    print('='*35)
    print('\n')
    


LinearRegression
Model Training Performance
RMSE: 1018.3264149244563
MAE: 675.7259395104601
R2 score 93.61479340528336


Lasso
Model Training Performance
RMSE: 1018.3226876627255
MAE: 676.9046917868982
R2 score 93.61484014725585


Ridge
Model Training Performance
RMSE: 1018.3081465293509
MAE: 675.750091783326
R2 score 93.61502249966586


ElasticNet
Model Training Performance
RMSE: 1515.8190535160102
MAE: 1056.0391938476018
R2 score 85.8519802177274


