In [26]:
import pandas as pd;
import re;
from sklearn.preprocessing import LabelEncoder;
from sklearn.model_selection import train_test_split;
from sklearn.ensemble import RandomForestRegressor;
from sklearn.multioutput import MultiOutputRegressor;
from sklearn.metrics import r2_score, mean_squared_error;

In [27]:
df = pd.read_csv("nanoreview.csv")
sf = pd.read_csv("Mobilesdataset.csv", encoding="latin1")

In [28]:
processor = df[["tablescraper-selected-row","tablescraper-selected-row 2"]]

In [29]:
Aplus = processor.loc[processor["tablescraper-selected-row 2"] == 'A+',"tablescraper-selected-row"].to_list()
#Aplus = Aplus.append('A17 Bionic')
A = processor.loc[processor["tablescraper-selected-row 2"] == 'A',"tablescraper-selected-row"].to_list()
B = processor.loc[processor["tablescraper-selected-row 2"] == 'B',"tablescraper-selected-row"].to_list()
C = processor.loc[processor["tablescraper-selected-row 2"] == 'C',"tablescraper-selected-row"].to_list()
D = processor.loc[processor["tablescraper-selected-row 2"] == 'D',"tablescraper-selected-row"].to_list()


In [30]:
def performance(pro_name):
    rate = 0
    if(pro_name in A):
        rate += 8
    elif(pro_name in Aplus or pro_name == 'A17 Bionic'):
        rate += 10
    elif(pro_name in B):
        rate += 6
    elif(pro_name in C):
        rate += 4
    elif(pro_name in D):
        rate += 3
    else:
        rate += 3
    return round(min(rate,10))
sf['Performance'] = sf['Processor'].apply(performance)


In [31]:
def camera(frontcamera,backcamera):
    frontcamera = int(re.findall(r'\d+',frontcamera)[0])
    backcamera = int(re.findall(r'\d+',backcamera)[0])
    raw_score = (0.4 * frontcamera + 0.6 * backcamera) 
    return round(min(raw_score, 10), 1)
sf['Camera'] = sf.apply(lambda row: camera(row['Front Camera'], row['Back Camera']), axis=1)



In [32]:
def rate_battery(capacity_mah1):
    capacity_mah = re.findall(r'\d+',capacity_mah1)
    capacity_mah = int(''.join(capacity_mah))
    score = 0
    if capacity_mah >= 5500:
        score += 10
    elif capacity_mah >= 5000:
        score += 8
    elif capacity_mah >= 4500:
        score += 7
    elif capacity_mah >= 3500:
        score += 5
    else:
        score += 2
    return round(min(score, 10), 1)
sf['Battery'] = sf['Battery Capacity'].apply(rate_battery)


In [33]:
def screensize(size_inch1):
    size_inch = re.findall(r'\d+',size_inch1)
    if size_inch:
        size_inch = int(size_inch[0])
        score = 0
        if size_inch <= 4.5:
            score += 2
        elif size_inch <= 5.0:
            score += 3
        elif size_inch <= 5.5:
            score += 5
        elif size_inch <= 6.0:
            score += 8
        elif size_inch <= 6.8:
            score += 9
        else:
            score += 0
        return score
    else:
        return 0
sf['Screen_Formfactor'] = sf['Screen Size'].apply(screensize)

In [34]:
def rate_ram(ram_gb1):
    ram_gb = re.findall(r'\d+', ram_gb1)
    if ram_gb:  # Check if the list is not empty
        ram_gb = int(ram_gb[0])  # Convert the first number to an integer
        if ram_gb <= 3:
            return 2  # Very low, outdated
        elif ram_gb <= 4:
            return 4  # Entry-level
        elif ram_gb <= 6:
            return 6  # Basic multitasking
        elif ram_gb <= 8:
            return 8 # Mid-range
        elif ram_gb < 12:
            return 9   # Excellent multitasking and gaming
        else:
            return 10  # Flagship-level, future-proof
    else:
        return 0  # Default value if no number is found

sf['Multitasking'] = sf['RAM'].apply(rate_ram)

In [35]:
sf['RAM'] = sf['RAM'].str.extract(r'(\d+)').astype(int)
sf['Battery Capacity'] = sf['Battery Capacity'].str.replace(',','')
sf['Battery Capacity'] = sf['Battery Capacity'].str.extract(r'(\d+)').astype(int)
sf['Front Camera'] = sf['Front Camera'].str.extract(r'(\d+)').astype(int)
sf['Back Camera'] = sf['Back Camera'].str.extract(r'(\d+)').astype(int)
sf['Screen Size'] = sf['Screen Size'].str.extract(r'(\d+\.?\d*)').astype(float)
le = LabelEncoder()
sf['Processor'] = le.fit_transform(sf['Processor'])


print(sf)

    Company Name            Model Name Mobile Weight  RAM  Front Camera  \
0          Apple       iPhone 16 128GB          174g    6            12   
1          Apple       iPhone 16 256GB          174g    6            12   
2          Apple       iPhone 16 512GB          174g    6            12   
3          Apple  iPhone 16 Plus 128GB          203g    6            12   
4          Apple  iPhone 16 Plus 256GB          203g    6            12   
..           ...                   ...           ...  ...           ...   
925         Poco          Pad 5G 128GB          571g    8             8   
926         Poco          Pad 5G 256GB          571g    8             8   
927      Samsung  Galaxy Z Fold6 256GB          239g   12            10   
928      Samsung  Galaxy Z Fold6 512GB          239g   12            10   
929      Samsung    Galaxy Z Fold6 1TB          239g   12            10   

     Back Camera  Processor  Battery Capacity  Screen Size  \
0             48          7          

In [48]:
feature_target_map = {
    "Performance": ["Processor"],
    "Camera": ["Front Camera", "Back Camera"],
    "Battery": ["Battery Capacity"],
    "Screen_Formfactor": ["Screen Size"],
    "Multitasking": ["RAM"]
}
models = {}
scores = {}
for target, features in feature_target_map.items():
    X = sf[features]
    y = sf[target]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    models[target] = model

In [49]:
# New mobile input: [RAM, Processor, Battery Capacity, Front Camera, Back Camera, Screen Size]
new_mobile = [[12, le.transform(['A15 Bionic'])[0], 2000, 16, 64, 6.5]]

# Column order for unpacking
columns = ['RAM', 'Processor', 'Battery Capacity', 'Front Camera', 'Back Camera', 'Screen Size']

# Convert to dictionary for flexible feature selection
new_mobile_dict = dict(zip(columns, new_mobile[0]))

# Predict each target based on its respective features
predictions = {}

for target, features in feature_target_map.items():
    model = models[target]
    
    # Extract relevant features from new_mobile
    input_vector = [[new_mobile_dict[feat] for feat in features]]
    
    # Predict the target score
    pred = model.predict(input_vector)[0]
    predictions[target] = round(pred,2)

# Display final predictions
print("\n📱 New Mobile Rating Predictions:")
for target, score in predictions.items():
    print(f"   ➤ {target}: {score} / 10")



📱 New Mobile Rating Predictions:
   ➤ Performance: 8.0 / 10
   ➤ Camera: 10.0 / 10
   ➤ Battery: 2.0 / 10
   ➤ Screen_Formfactor: 8.0 / 10
   ➤ Multitasking: 10.0 / 10




In [50]:
import joblib

joblib.dump(models, 'model.pkl')                   # your dict of models
joblib.dump(le, 'processor_encoder.pkl')           # LabelEncoder used on 'Processor'


['processor_encoder.pkl']