In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, f1_score

In [4]:
# load the dataset
df = pd.read_csv('C:\\xampp\\htdocs\\git\\MobilePhonePricePrediction\\Data\\Clean\\cleaned_cellphone.csv')
df.head()

Unnamed: 0,price,resolution,ppi,cpu_core,cpu_freq,internal_mem,ram,rear_cam,front_cam,battery,price_range
0,2357,5.2,424,8,1.35,16.0,3.0,13.0,8.0,2610,Mid
1,1749,4.0,233,2,1.3,4.0,1.0,3.15,0.0,1700,Mid
2,1916,4.7,312,4,1.2,8.0,1.5,13.0,5.0,2000,Mid
3,1315,4.0,233,2,1.3,4.0,0.512,3.15,0.0,1400,Low
4,2137,5.5,401,4,2.3,16.0,2.0,16.0,8.0,2500,Mid


In [5]:
# drop price_range column for features and set it as target variable
x = df.drop(['price_range'], axis=1) # x as input
y = df['price_range'] #y as output

In [None]:
# split the dataset into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42 stratify=y)

In [None]:
models = {
    'Random Forest': RandomForestClassifier(),
    'Gradient Boosting': GradientBoostingClassifier(),
    'XGBoost': XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42)
}

In [None]:
results = {}
for name, model in models.items():
    model.fit(x_train, y_train)
    y_pred = model.predict(x_test)
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='weighted')
    results[name] = {
        'accuracy': accuracy,
        'f1_score': f1
    }
    