In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy as sp

In [2]:
df = pd.read_csv('cardekho.csv')
# removing index column
df = df.iloc[:,:]
# checking the first 5 entries of dataset
df.head()
# This code is modified by Pratyush Poddar

Unnamed: 0,name,year,selling_price,km_driven,fuel,seller_type,transmission,owner,mileage(km/ltr/kg),engine,max_power,seats
0,Maruti Swift Dzire VDI,2014,450000,145500,Diesel,Individual,Manual,First Owner,23.4,1248.0,74.0,5.0
1,Skoda Rapid 1.5 TDI Ambition,2014,370000,120000,Diesel,Individual,Manual,Second Owner,21.14,1498.0,103.52,5.0
2,Honda City 2017-2020 EXi,2006,158000,140000,Petrol,Individual,Manual,Third Owner,17.7,1497.0,78.0,5.0
3,Hyundai i20 Sportz Diesel,2010,225000,127000,Diesel,Individual,Manual,First Owner,23.0,1396.0,90.0,5.0
4,Maruti Swift VXI BSIII,2007,130000,120000,Petrol,Individual,Manual,First Owner,16.1,1298.0,88.2,5.0


In [3]:
headers = ["name","year","selling_price","km_driven","fuel","seller_type","transmission","owner","mileage(km/ltr/kg)","engine","max_power","seats"]

In [4]:
missing_data = df.isnull()
missing_data.head(5)

for column in headers:
    print(column)
    print (missing_data[column].value_counts())
    print("") 

name
False    8128
Name: name, dtype: int64

year
False    8128
Name: year, dtype: int64

selling_price
False    8128
Name: selling_price, dtype: int64

km_driven
False    8128
Name: km_driven, dtype: int64

fuel
False    8128
Name: fuel, dtype: int64

seller_type
False    8128
Name: seller_type, dtype: int64

transmission
False    8128
Name: transmission, dtype: int64

owner
False    8128
Name: owner, dtype: int64

mileage(km/ltr/kg)
False    7907
True      221
Name: mileage(km/ltr/kg), dtype: int64

engine
False    7907
True      221
Name: engine, dtype: int64

max_power
False    7913
True      215
Name: max_power, dtype: int64

seats
False    7907
True      221
Name: seats, dtype: int64



In [5]:
# Standardizing column names for consistency
df.rename(columns = lambda x: x.strip().lower().replace(' ','_').replace('(', '').replace(')', ''),inplace =True)

In [6]:
df

Unnamed: 0,name,year,selling_price,km_driven,fuel,seller_type,transmission,owner,mileagekm/ltr/kg,engine,max_power,seats
0,Maruti Swift Dzire VDI,2014,450000,145500,Diesel,Individual,Manual,First Owner,23.40,1248.0,74,5.0
1,Skoda Rapid 1.5 TDI Ambition,2014,370000,120000,Diesel,Individual,Manual,Second Owner,21.14,1498.0,103.52,5.0
2,Honda City 2017-2020 EXi,2006,158000,140000,Petrol,Individual,Manual,Third Owner,17.70,1497.0,78,5.0
3,Hyundai i20 Sportz Diesel,2010,225000,127000,Diesel,Individual,Manual,First Owner,23.00,1396.0,90,5.0
4,Maruti Swift VXI BSIII,2007,130000,120000,Petrol,Individual,Manual,First Owner,16.10,1298.0,88.2,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...
8123,Hyundai i20 Magna,2013,320000,110000,Petrol,Individual,Manual,First Owner,18.50,1197.0,82.85,5.0
8124,Hyundai Verna CRDi SX,2007,135000,119000,Diesel,Individual,Manual,Fourth & Above Owner,16.80,1493.0,110,5.0
8125,Maruti Swift Dzire ZDi,2009,382000,120000,Diesel,Individual,Manual,First Owner,19.30,1248.0,73.9,5.0
8126,Tata Indigo CR4,2013,290000,25000,Diesel,Individual,Manual,First Owner,23.57,1396.0,70,5.0


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8128 entries, 0 to 8127
Data columns (total 12 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   name              8128 non-null   object 
 1   year              8128 non-null   int64  
 2   selling_price     8128 non-null   int64  
 3   km_driven         8128 non-null   int64  
 4   fuel              8128 non-null   object 
 5   seller_type       8128 non-null   object 
 6   transmission      8128 non-null   object 
 7   owner             8128 non-null   object 
 8   mileagekm/ltr/kg  7907 non-null   float64
 9   engine            7907 non-null   float64
 10  max_power         7913 non-null   object 
 11  seats             7907 non-null   float64
dtypes: float64(3), int64(3), object(6)
memory usage: 762.1+ KB


In [8]:
# simply drop whole row with NAN from ['mileagekm/ltr/kg','engine','max_power','seats'] column
cleaned_data = df.dropna(subset =['mileagekm/ltr/kg','engine','max_power','seats'],axis= 0,inplace =True)
# reset index,because we droped some of rows
df.reset_index(drop = True,inplace =True)

In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7907 entries, 0 to 7906
Data columns (total 12 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   name              7907 non-null   object 
 1   year              7907 non-null   int64  
 2   selling_price     7907 non-null   int64  
 3   km_driven         7907 non-null   int64  
 4   fuel              7907 non-null   object 
 5   seller_type       7907 non-null   object 
 6   transmission      7907 non-null   object 
 7   owner             7907 non-null   object 
 8   mileagekm/ltr/kg  7907 non-null   float64
 9   engine            7907 non-null   float64
 10  max_power         7907 non-null   object 
 11  seats             7907 non-null   float64
dtypes: float64(3), int64(3), object(6)
memory usage: 741.4+ KB


In [10]:
missing_data = df.isnull()
missing_data.head(5)

for column in missing_data.columns.values.tolist():
    print(column)
    print (missing_data[column].value_counts())
    print("") 

name
False    7907
Name: name, dtype: int64

year
False    7907
Name: year, dtype: int64

selling_price
False    7907
Name: selling_price, dtype: int64

km_driven
False    7907
Name: km_driven, dtype: int64

fuel
False    7907
Name: fuel, dtype: int64

seller_type
False    7907
Name: seller_type, dtype: int64

transmission
False    7907
Name: transmission, dtype: int64

owner
False    7907
Name: owner, dtype: int64

mileagekm/ltr/kg
False    7907
Name: mileagekm/ltr/kg, dtype: int64

engine
False    7907
Name: engine, dtype: int64

max_power
False    7907
Name: max_power, dtype: int64

seats
False    7907
Name: seats, dtype: int64



In [11]:
df['max_power'].unique()

array(['74', '103.52', '78', '90', '88.2', '81.86', '57.5', '37', '67.1',
       '68.1', '108.45', '60', '73.9', '67', '82', '88.5', '46.3',
       '88.73', '64.1', '98.6', '88.8', '83.81', '83.1', '47.3', '73.8',
       '34.2', '35', '81.83', '40.3', '121.3', '138.03', '160.77',
       '117.3', '116.3', '83.14', '67.05', '168.5', '100', '120.7',
       '98.63', '175.56', '103.25', '171.5', '100.6', '174.33', '187.74',
       '170', '78.9', '88.76', '86.8', '108.495', '108.62', '93.7',
       '103.6', '98.59', '189', '67.04', '68.05', '58.2', '82.85', '81.8',
       '73', '120', '94.68', '160', '65', '155', '69.01', '126.32',
       '138.1', '83.8', '126.2', '98.96', '62.1', '86.7', '188', '214.56',
       '177', '280', '148.31', '254.79', '190', '177.46', '204', '141',
       '117.6', '241.4', '282', '150', '147.5', '108.5', '103.5', '183',
       '181.04', '157.7', '164.7', '91.1', '400', '68', '75', '85.8',
       '87.2', '53', '118', '103.2', '83', '84', '58.16', '147.94',
       '

In [12]:
df['max_power'] = df['max_power'].replace(' ',np.nan).astype(float)

In [13]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7907 entries, 0 to 7906
Data columns (total 12 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   name              7907 non-null   object 
 1   year              7907 non-null   int64  
 2   selling_price     7907 non-null   int64  
 3   km_driven         7907 non-null   int64  
 4   fuel              7907 non-null   object 
 5   seller_type       7907 non-null   object 
 6   transmission      7907 non-null   object 
 7   owner             7907 non-null   object 
 8   mileagekm/ltr/kg  7907 non-null   float64
 9   engine            7907 non-null   float64
 10  max_power         7906 non-null   float64
 11  seats             7907 non-null   float64
dtypes: float64(4), int64(3), object(5)
memory usage: 741.4+ KB


In [14]:
from sklearn.preprocessing import OneHotEncoder
categorical_columns = ['name','fuel', 'seller_type', 'transmission', 'owner']
encoder = OneHotEncoder(drop='first', sparse_output=False) 
encoded_features = encoder.fit_transform(df[categorical_columns])
encoded_df = pd.DataFrame(encoded_features, columns=encoder.get_feature_names_out(categorical_columns))
df = pd.concat([df.drop(categorical_columns, axis=1), encoded_df], axis=1)

# Display the first few rows of the encoded DataFrame
print(df.head())

   year  selling_price  km_driven  mileagekm/ltr/kg  engine  max_power  seats  \
0  2014         450000     145500             23.40  1248.0      74.00    5.0   
1  2014         370000     120000             21.14  1498.0     103.52    5.0   
2  2006         158000     140000             17.70  1497.0      78.00    5.0   
3  2010         225000     127000             23.00  1396.0      90.00    5.0   
4  2007         130000     120000             16.10  1298.0      88.20    5.0   

   name_Ambassador Classic 2000 DSZ AC PS  \
0                                     0.0   
1                                     0.0   
2                                     0.0   
3                                     0.0   
4                                     0.0   

   name_Ambassador Grand 1500 DSZ BSIII  name_Ambassador Grand 2000 DSZ PW CL  \
0                                   0.0                                   0.0   
1                                   0.0                                   0.0   

In [15]:
df

Unnamed: 0,year,selling_price,km_driven,mileagekm/ltr/kg,engine,max_power,seats,name_Ambassador Classic 2000 DSZ AC PS,name_Ambassador Grand 1500 DSZ BSIII,name_Ambassador Grand 2000 DSZ PW CL,...,fuel_Diesel,fuel_LPG,fuel_Petrol,seller_type_Individual,seller_type_Trustmark Dealer,transmission_Manual,owner_Fourth & Above Owner,owner_Second Owner,owner_Test Drive Car,owner_Third Owner
0,2014,450000,145500,23.40,1248.0,74.00,5.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
1,2014,370000,120000,21.14,1498.0,103.52,5.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0
2,2006,158000,140000,17.70,1497.0,78.00,5.0,0.0,0.0,0.0,...,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0
3,2010,225000,127000,23.00,1396.0,90.00,5.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
4,2007,130000,120000,16.10,1298.0,88.20,5.0,0.0,0.0,0.0,...,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7902,2013,320000,110000,18.50,1197.0,82.85,5.0,0.0,0.0,0.0,...,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
7903,2007,135000,119000,16.80,1493.0,110.00,5.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0
7904,2009,382000,120000,19.30,1248.0,73.90,5.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
7905,2013,290000,25000,23.57,1396.0,70.00,5.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0


In [16]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7907 entries, 0 to 7906
Columns: 1999 entries, year to owner_Third Owner
dtypes: float64(1996), int64(3)
memory usage: 120.6 MB


In [17]:
x = df.drop('selling_price',axis =1)
y = df['selling_price']

In [18]:
from sklearn.model_selection import train_test_split

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(x,y,test_size =0.2, random_state = 42)

In [19]:
X_train

Unnamed: 0,year,km_driven,mileagekm/ltr/kg,engine,max_power,seats,name_Ambassador Classic 2000 DSZ AC PS,name_Ambassador Grand 1500 DSZ BSIII,name_Ambassador Grand 2000 DSZ PW CL,name_Ashok Leyland Stile LE,...,fuel_Diesel,fuel_LPG,fuel_Petrol,seller_type_Individual,seller_type_Trustmark Dealer,transmission_Manual,owner_Fourth & Above Owner,owner_Second Owner,owner_Test Drive Car,owner_Third Owner
2968,2006,62900,18.90,998.0,67.10,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
2439,2013,95000,20.46,1461.0,83.80,5.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
1765,2017,38000,23.01,999.0,67.00,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
6417,2014,65000,21.14,1498.0,103.52,5.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0
7692,2018,9500,21.50,1497.0,108.50,5.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5226,2015,70000,15.40,2179.0,120.00,7.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
5390,2018,20000,18.20,1199.0,88.70,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
860,2003,90000,13.20,1495.0,94.00,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0
7603,2012,60000,15.10,2179.0,140.00,7.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0


In [20]:
X_test

Unnamed: 0,year,km_driven,mileagekm/ltr/kg,engine,max_power,seats,name_Ambassador Classic 2000 DSZ AC PS,name_Ambassador Grand 1500 DSZ BSIII,name_Ambassador Grand 2000 DSZ PW CL,name_Ashok Leyland Stile LE,...,fuel_Diesel,fuel_LPG,fuel_Petrol,seller_type_Individual,seller_type_Trustmark Dealer,transmission_Manual,owner_Fourth & Above Owner,owner_Second Owner,owner_Test Drive Car,owner_Third Owner
3641,2014,68000,22.90,1248.0,74.00,5.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
5531,2015,110000,20.45,1461.0,83.80,5.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
3524,2012,185000,19.40,1405.0,70.00,5.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0
7479,2012,90000,21.43,1364.0,87.20,5.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0
3455,2018,15000,20.89,1197.0,81.80,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1317,2013,100000,20.54,1598.0,103.60,5.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0
622,2010,168000,14.00,2498.0,112.00,7.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
1419,2010,50000,20.00,1399.0,68.00,5.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0
4287,2017,9000,13.60,1999.0,177.00,5.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [21]:
y_train

2968     135000
2439     490000
1765     325000
6417     640000
7692     850000
         ...   
5226    1100000
5390     600000
860      110000
7603     700000
7270     600000
Name: selling_price, Length: 6325, dtype: int64

In [22]:
y_test

3641     501000
5531    1000000
3524     140000
7479     476999
3455     620000
         ...   
1317     330000
622      229999
1419     144000
4287    2711000
7821     680000
Name: selling_price, Length: 1582, dtype: int64

In [23]:
df

Unnamed: 0,year,selling_price,km_driven,mileagekm/ltr/kg,engine,max_power,seats,name_Ambassador Classic 2000 DSZ AC PS,name_Ambassador Grand 1500 DSZ BSIII,name_Ambassador Grand 2000 DSZ PW CL,...,fuel_Diesel,fuel_LPG,fuel_Petrol,seller_type_Individual,seller_type_Trustmark Dealer,transmission_Manual,owner_Fourth & Above Owner,owner_Second Owner,owner_Test Drive Car,owner_Third Owner
0,2014,450000,145500,23.40,1248.0,74.00,5.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
1,2014,370000,120000,21.14,1498.0,103.52,5.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0
2,2006,158000,140000,17.70,1497.0,78.00,5.0,0.0,0.0,0.0,...,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0
3,2010,225000,127000,23.00,1396.0,90.00,5.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
4,2007,130000,120000,16.10,1298.0,88.20,5.0,0.0,0.0,0.0,...,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7902,2013,320000,110000,18.50,1197.0,82.85,5.0,0.0,0.0,0.0,...,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
7903,2007,135000,119000,16.80,1493.0,110.00,5.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0
7904,2009,382000,120000,19.30,1248.0,73.90,5.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
7905,2013,290000,25000,23.57,1396.0,70.00,5.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0


In [24]:
df

Unnamed: 0,year,selling_price,km_driven,mileagekm/ltr/kg,engine,max_power,seats,name_Ambassador Classic 2000 DSZ AC PS,name_Ambassador Grand 1500 DSZ BSIII,name_Ambassador Grand 2000 DSZ PW CL,...,fuel_Diesel,fuel_LPG,fuel_Petrol,seller_type_Individual,seller_type_Trustmark Dealer,transmission_Manual,owner_Fourth & Above Owner,owner_Second Owner,owner_Test Drive Car,owner_Third Owner
0,2014,450000,145500,23.40,1248.0,74.00,5.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
1,2014,370000,120000,21.14,1498.0,103.52,5.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0
2,2006,158000,140000,17.70,1497.0,78.00,5.0,0.0,0.0,0.0,...,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0
3,2010,225000,127000,23.00,1396.0,90.00,5.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
4,2007,130000,120000,16.10,1298.0,88.20,5.0,0.0,0.0,0.0,...,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7902,2013,320000,110000,18.50,1197.0,82.85,5.0,0.0,0.0,0.0,...,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
7903,2007,135000,119000,16.80,1493.0,110.00,5.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0
7904,2009,382000,120000,19.30,1248.0,73.90,5.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
7905,2013,290000,25000,23.57,1396.0,70.00,5.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0


In [25]:
from sklearn.ensemble import RandomForestRegressor

# Initialize and train the Random Forest Regressor
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)


In [26]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Predict on test data
y_pred = model.predict(X_test)

# Calculate metrics
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Print results
print(f"Mean Absolute Error: {mae}")
print(f"Mean Squared Error: {mse}")
print(f"R² Score: {r2}")


Mean Absolute Error: 59806.06723589174
Mean Squared Error: 13100259756.352495
R² Score: 0.9824759552473552


In [None]:
from sklearn.model_selection import GridSearchCV

# Define parameter grid for Random Forest
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10]
}

# Initialize GridSearchCV
grid_search = GridSearchCV(RandomForestRegressor(random_state=42), param_grid, cv=5, scoring='r2')
grid_search.fit(X_train, y_train)

# Best parameters and score
print(f"Best Parameters: {grid_search.best_params_}")
print(f"Best R² Score: {grid_search.best_score_}")
