## Importing Libraries

In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import root_mean_squared_error,r2_score

## Data

In [2]:
data = pd.read_csv('diamonds.csv')
data.head(3)

Unnamed: 0.1,Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
0,1,0.23,Ideal,E,SI2,61.5,55.0,326,3.95,3.98,2.43
1,2,0.21,Premium,E,SI1,59.8,61.0,326,3.89,3.84,2.31
2,3,0.23,Good,E,VS1,56.9,65.0,327,4.05,4.07,2.31


## Missing data?

In [3]:
data.isna().sum()

Unnamed: 0    0
carat         0
cut           0
color         0
clarity       0
depth         0
table         0
price         0
x             0
y             0
z             0
dtype: int64

## Categorical 2 Numerical

In [4]:
lb = LabelEncoder()

In [5]:
categorical_cols = data.select_dtypes(include=['object', 'category']).columns.tolist()
for col in categorical_cols:
    data[col] = lb.fit_transform(data[col])

In [6]:
data.head(3)

Unnamed: 0.1,Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
0,1,0.23,2,1,3,61.5,55.0,326,3.95,3.98,2.43
1,2,0.21,3,1,2,59.8,61.0,326,3.89,3.84,2.31
2,3,0.23,1,1,4,56.9,65.0,327,4.05,4.07,2.31


## Data Splitting

In [7]:
X = data.drop(['price'],axis=1)
y = data['price']

In [8]:
X_train,X_test, y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=42)

## Model & Prediction

In [9]:
reg = RandomForestRegressor().fit(X_train,y_train)

In [10]:
pred = reg.predict(X_test)

## Performance Metrics

In [11]:
print(f'train score:{reg.score(X_train,y_train):.5f}')
print(f'test score:{reg.score(X_train,y_train):.5f}')

train score:0.99999
test score:0.99999


In [12]:
print(f'Root mean Squarred error: {root_mean_squared_error(y_test,pred):.2f}')
print(f'R^2 Score: {r2_score(y_test,pred):.5f}')

Root mean Squarred error: 44.10
R^2 Score: 0.99988
