In [1]:
import numpy as np
import pandas as pd

In [2]:
data = pd.read_csv('phone_data_cleaned.csv')

In [9]:
data.head()

Unnamed: 0,model,price,rating,company,is_dual_sim,has_5g,supports_volte,supports_wifi,supports_nfc,ram_amount,...,battery_life,charging_speed,screen_size,resolution_width,resolution_height,memory_card_supported,primary_camera_resolution,num_rear_cameras,clock_speed_ghz,screen_size_category
0,OnePlus 11 5G,54999.0,89.0,OnePlus,1,1,1,1,1,12,...,5000.0,100.0,6.7,1440.0,3216.0,0,50.0,3,3.2,large
1,OnePlus Nord CE 2 Lite 5G,19989.0,81.0,OnePlus,1,1,1,1,0,6,...,5000.0,33.0,6.59,1080.0,2412.0,1,64.0,3,2.2,large
2,Samsung Galaxy A14 5G,16499.0,75.0,Samsung,1,1,1,1,0,4,...,5000.0,15.0,6.6,1080.0,2408.0,1,50.0,3,2.4,large
3,Motorola Moto G62 5G,14999.0,81.0,Motorola,1,1,1,1,0,6,...,5000.0,33.0,6.55,1080.0,2400.0,1,50.0,3,2.2,large
4,Realme 10 Pro Plus,24999.0,82.0,Realme,1,1,1,1,0,6,...,5000.0,67.0,6.7,1080.0,2412.0,0,108.0,3,2.6,large


In [3]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Selecting features and target
features = data[['price', 'ram_amount', 'primary_camera_resolution', 'battery_life', 'clock_speed_ghz']]
target = data['rating']

# Splitting the data
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)


In [4]:
# Training the model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [5]:
# Predicting and evaluating
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r_squared = model.score(X_test, y_test)


In [6]:
# Extracting feature importances
importances = model.feature_importances_
feature_names = features.columns
feature_importance_df = pd.DataFrame({'feature': feature_names, 'importance': importances}).sort_values(by='importance', ascending=False)


In [7]:
# Compiling results into a DataFrame for display
results_df = pd.DataFrame({
    'Metric': ['Mean Absolute Error', 'Mean Squared Error', 'R-squared'],
    'Value': [mae, mse, r_squared]
})

# Display the DataFrame with metrics
print("Model Evaluation Metrics:")
print(results_df)


Model Evaluation Metrics:
                Metric     Value
0  Mean Absolute Error  1.739825
1   Mean Squared Error  6.122269
2            R-squared  0.883169


In [8]:
# Extracting feature importances into a DataFrame
feature_importance_df = pd.DataFrame({'feature': feature_names, 'importance': importances}).sort_values(by='importance', ascending=False)

# Display the feature importance DataFrame
print("\nFeature Importances:")
print(feature_importance_df)



Feature Importances:
                     feature  importance
1                 ram_amount    0.615506
0                      price    0.197653
2  primary_camera_resolution    0.121017
4            clock_speed_ghz    0.043186
3               battery_life    0.022638
