In [1]:
import numpy as np 
import pandas as pd 

In [2]:
df = pd.read_csv('laptop_price_processed.csv')

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1303 entries, 0 to 1302
Data columns (total 23 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   laptop_ID         1303 non-null   int64  
 1   Company           1303 non-null   object 
 2   TypeName          1303 non-null   object 
 3   Inches            1303 non-null   float64
 4   Ram               1303 non-null   int64  
 5   OpSys             1303 non-null   object 
 6   Weight            1303 non-null   float64
 7   Price_euros       1303 non-null   float64
 8   IPS_Panel         1303 non-null   int64  
 9   Retina_Display    1303 non-null   int64  
 10  Touchscreen       1303 non-null   int64  
 11  Resolution_X      1303 non-null   int64  
 12  Resolution_Y      1303 non-null   int64  
 13  Total_Pixels      1303 non-null   int64  
 14  High_Resolution   1303 non-null   int64  
 15  Product_Series    1303 non-null   object 
 16  Cpu_Brand         1303 non-null   object 


In [4]:
from sklearn.preprocessing import LabelEncoder
categorical_col = ['TypeName', 'OpSys', 'Cpu_Series', 'Gpu_Series', 'Memory_Type']

le = LabelEncoder()

for col in categorical_col:
  df[col] = le.fit_transform(df[col])

In [6]:
X = df[['TypeName', 'OpSys', 'Cpu_Series', 'Gpu_Series', 'Memory_Type', 'Price_euros', 'Total_Pixels']]
y = df['Memory_Size(GB)']

In [7]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=21)

In [21]:
from sklearn.ensemble import RandomForestRegressor

rfreg = RandomForestRegressor(n_estimators=312, max_features='log2', max_depth=143, criterion='squared_error', random_state=21, min_samples_split=2, min_samples_leaf=6, max_leaf_nodes=38)
rfreg.fit(X_train, y_train)

In [22]:
y_pred = rfreg.predict(X_test)
from sklearn.metrics import r2_score
print(f'Accuracy {r2_score(y_test, y_pred)}')

Accuracy 0.753419457572685


In [None]:
{'max_depth': 143, 'max_features': 'log2', 'max_leaf_nodes': 38, 'min_samples_leaf': 6, 'min_samples_split': 2, 'n_estimators': 312}
{'max_depth': 123, 'max_features': 'log2', 'max_leaf_nodes': 39, 'min_samples_leaf': 6, 'min_samples_split': 18, 'n_estimators': 220}
{'max_depth': 143, 'max_features': 'log2', 'max_leaf_nodes': 38, 'min_samples_leaf': 6, 'min_samples_split': 2, 'n_estimators': 312}

In [None]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint

param_distributions = {
    'n_estimators': randint(200, 400), 
    'max_depth': randint(70, 150), 
    'max_leaf_nodes': randint(30,45),         
    'min_samples_split': randint(2, 10),
    'min_samples_leaf': randint(2, 10), 
    'max_features': ['sqrt', 'log2']  
}

random_search = RandomizedSearchCV(
    estimator=RandomForestRegressor(criterion='poisson', random_state=21),
    param_distributions=param_distributions,
    n_iter=50, 
    cv=5,
    scoring='accuracy',
    n_jobs=-1,
    random_state=21 
)

random_search.fit(X_train, y_train)

print("Best Parameters:", random_search.best_params_)

 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan]


Best Parameters: {'max_depth': 143, 'max_features': 'log2', 'max_leaf_nodes': 38, 'min_samples_leaf': 6, 'min_samples_split': 2, 'n_estimators': 312}
Best Accuracy: nan


In [8]:
from xgboost import XGBRegressor
xgbr = XGBRegressor(n_estimators=15, max_depth=4, eta=0.1)
xgbr.fit(X_train, y_train)

In [9]:
y_pred = xgbr.predict(X_test)
from sklearn.metrics import r2_score
print(f'Accuracy {r2_score(y_test, y_pred)}')

Accuracy 0.7091402555981415


In [49]:
from sklearn.ensemble import RandomForestClassifier
rfclass = RandomForestClassifier(criterion='gini', n_estimators=234, max_depth=32, min_samples_split=3, min_samples_leaf=5, max_features='sqrt', bootstrap=False)
rfclass.fit(X_train, y_train)

In [50]:
y_pred = rfclass.predict(X_test)
from sklearn.metrics import r2_score
print(f'Accuracy {r2_score(y_test, y_pred)}')

Accuracy 0.7019907565254753


In [None]:
{'criterion': 'gini', 'max_depth': 67, 'min_samples_leaf': 4, 'min_samples_split': 3, 'n_estimators': 137}
{'criterion': 'log_loss', 'n_estimators': 373, 'max_depth': 11, 'min_samples_split': 10, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'bootstrap': True}
{'bootstrap': False, 'criterion': 'gini', 'max_depth': 67, 'max_features': 'sqrt', 'min_samples_leaf': 5, 'min_samples_split': 3, 'min_weight_fraction_leaf': 0.001, 'n_estimators': 234}
{'criterion': 'gini', 'n_estimators': 165, 'max_depth': 9, 'min_samples_split': 2, 'min_samples_leaf': 4, 'max_features': 'log2', 'bootstrap': False}
{'bootstrap': False, 'criterion': 'gini', 'max_depth': 32, 'max_features': 'sqrt', 'min_samples_leaf': 5, 'min_samples_split': 3, 'n_estimators': 234}

In [48]:
from sklearn.model_selection import GridSearchCV

params = {
  'n_estimators' : [134, 234, 389],
  'max_depth' : [32, 47],
  'min_samples_split' : [3, 6, 7],
  'min_samples_leaf' : [5, 4, 7],
  'criterion' : ['entropy', 'gini', 'log_loss'],
  'bootstrap' : [True, False],
  'max_features' : ['sqrt', 'log2']
}

grid = GridSearchCV(estimator=RandomForestClassifier(random_state=21), 
                    param_grid=params, 
                    cv=5, 
                    scoring='accuracy', 
                    n_jobs = -1)

grid.fit(X_train, y_train)

print("Best Parameters:", grid.best_params_)
rf_model = grid.best_estimator_



Best Parameters: {'bootstrap': False, 'criterion': 'gini', 'max_depth': 32, 'max_features': 'sqrt', 'min_samples_leaf': 5, 'min_samples_split': 3, 'n_estimators': 234}


In [45]:
import optuna
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score

# Define objective function
def objective(trial):
    params = {
        'criterion': trial.suggest_categorical('criterion', ['entropy', 'gini', 'log_loss']),
        'n_estimators': trial.suggest_int('n_estimators', 50, 500),
        'max_depth': trial.suggest_int('max_depth', 3, 30),
        'min_samples_split': trial.suggest_int('min_samples_split', 2, 10),
        'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 10),
        'max_features': trial.suggest_categorical('max_features', ['sqrt', 'log2', None]),
        'bootstrap': trial.suggest_categorical('bootstrap', [True, False]),
        'random_state': 21
    }
    
    model = RandomForestClassifier(**params)
    score = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy').mean()
    return score

# Run Optuna study
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)

# Best parameters
print("Best Parameters:", study.best_params)


[I 2025-02-18 17:11:29,976] A new study created in memory with name: no-name-e7bbcd65-f09d-4689-8340-7ae9a10d4b23
[I 2025-02-18 17:11:40,356] Trial 0 finished with value: 0.6813397129186602 and parameters: {'criterion': 'entropy', 'n_estimators': 442, 'max_depth': 28, 'min_samples_split': 5, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': True}. Best is trial 0 with value: 0.6813397129186602.
[I 2025-02-18 17:11:48,368] Trial 1 finished with value: 0.619976076555024 and parameters: {'criterion': 'gini', 'n_estimators': 240, 'max_depth': 16, 'min_samples_split': 9, 'min_samples_leaf': 1, 'max_features': None, 'bootstrap': False}. Best is trial 0 with value: 0.6813397129186602.
[I 2025-02-18 17:11:57,931] Trial 2 finished with value: 0.6823058520426942 and parameters: {'criterion': 'log_loss', 'n_estimators': 406, 'max_depth': 19, 'min_samples_split': 4, 'min_samples_leaf': 5, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 2 with value: 0.6823058520426942.
[I 2025

Best Parameters: {'criterion': 'gini', 'n_estimators': 165, 'max_depth': 9, 'min_samples_split': 2, 'min_samples_leaf': 4, 'max_features': 'log2', 'bootstrap': False}
