In [1]:
import keras
import joblib
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.layers import Dense, Dropout, Input
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.compose import ColumnTransformer
import json
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import LabelEncoder
import random as rd
from itertools import product

In [3]:
cpu_path = "cpu_v1.csv"
mb_path = "mb_v1.csv"
gpu_path = "gpu_v1.csv"
ram_path = "ram_v2.csv"
ps_path = "ps_v1.csv"

gpu_df = pd.read_csv(gpu_path)
cpu_df = pd.read_csv(cpu_path)
mb_df = pd.read_csv(mb_path)
cpu_df = cpu_df[cpu_df['socket'].isin(mb_df['socket'].unique())]
ram_df = pd.read_csv(ram_path)
ps_df = pd.read_csv(ps_path)

In [42]:
cpu = cpu_df.sample()
mb = mb_df[(mb_df['socket'].isin(cpu['socket']))].sample()
print(mb)

                             name   price   socket form_factor  max_memory  \
604  Gigabyte Z490 AORUS ELITE AC  329.98  LGA1200         ATX         128   

     memory_slots  
604             4  


In [4]:
def calculate_new_values(row):
    modules = eval(row['modules'])  
    planks = len(modules) 
    size = modules[0]*modules[1] 
    row['planks'] = planks  
    row['size'] = size 
    return row

In [5]:
temp = ram_df.apply(calculate_new_values, axis=1)

In [None]:
temp.to_csv("ram_v2.csv", index=False)

In [None]:
with open("memory.json", "r", encoding='utf-8') as file:
    data = json.load(file)
    
ram_df = pd.DataFrame(data)

In [None]:
print(ram_df.head(10))

In [None]:
ram_df = ram_df.drop('color', axis='columns')
ram_df = ram_df.dropna()
ram_df.to_csv("ram_v1.csv", index=False)

In [None]:
with open("power-supply.json", "r", encoding='utf-8') as file:
    data = json.load(file)
    
ps_df = pd.DataFrame(data)

In [None]:
print(ps_df.head(10))

In [None]:
ps_df = ps_df.drop('color', axis='columns')
ps_df = ps_df.dropna()
ps_df.to_csv("ps_v1.csv", index=False)

In [145]:
print(mb_df.head(10))

                              name   price   socket form_factor  max_memory  \
0            MSI MAG B550 TOMAHAWK  169.99      AM4         ATX         128   
1                   MSI B550-A PRO  139.99      AM4         ATX         128   
2           MSI B550M PRO-VDH WIFI  119.99      AM4   Micro ATX         128   
3     Gigabyte Z790 AORUS ELITE AX  239.99  LGA1700         ATX         128   
4            Asus Prime B450M-A II   79.99      AM4   Micro ATX         128   
5  MSI MAG B660 TOMAHAWK WIFI DDR4  189.99  LGA1700         ATX         128   
6     Gigabyte B650 AORUS ELITE AX  202.30      AM5         ATX         128   
7     Asus ROG STRIX B550-A GAMING  169.99      AM4         ATX         128   
8       MSI MAG Z790 TOMAHAWK WIFI  276.79  LGA1700         ATX         192   
9       MSI MAG B650 TOMAHAWK WIFI  219.95      AM5         ATX         128   

   memory_slots  
0             4  
1             4  
2             4  
3             4  
4             4  
5             4  
6   

In [143]:
print(cpu_df.head(10))

                         cpuName    price  cpuMark  cpuValue  threadMark  \
1   AMD Ryzen Threadripper 3990X  8399.69    81568      9.71        2569   
2   AMD Ryzen Threadripper 3970X  2997.99    63835     21.29        2694   
4   AMD Ryzen Threadripper 3960X  1682.90    54757     32.54        2678   
5              AMD Ryzen 9 5950X   649.74    46195     71.10        3498   
6          Intel Core i9-12900KS   779.98    44243     56.72        4317   
7          Intel Core i9-12900KF   572.98    41163     71.84        4217   
8           Intel Core i9-12900K   599.95    41077     68.47        4209   
10             AMD Ryzen 9 5900X   393.03    39457    100.39        3494   
11             AMD Ryzen 9 3950X   650.00    39157     60.24        2738   
12           Intel Core i9-12900   509.00    38281     75.21        4162   

    threadValue    TDP  powerPerf  cores  testDate   socket category  
1          0.31  280.0     291.31     64      2020    sTRX4  Desktop  
2          0.90  280.

In [144]:
print(gpu_df.head(10))

               gpuName  G3Dmark  G2Dmark    price  gpuValue    TDP  \
0  GeForce RTX 3080 Ti    26887     1031  1199.99     22.41  350.0   
1     GeForce RTX 3090    26395      999  1749.99     15.08  350.0   
2    Radeon RX 6900 XT    25458     1102  1120.31     22.72  300.0   
3     GeForce RTX 3080    24853     1003   999.00     24.88  320.0   
4  GeForce RTX 3070 Ti    23367     1003   749.99     31.16  290.0   
5    Radeon RX 6800 XT    23364     1078   859.00     27.20  300.0   
6     GeForce RTX 3070    22093      969   719.99     30.69  220.0   
7  GeForce RTX 2080 Ti    21796      940   998.59     21.83  250.0   
8       Radeon RX 6800    20667     1030   758.99     27.23  250.0   
9  GeForce RTX 3060 Ti    20206      961   599.99     33.68  200.0   

   powerPerformance  testDate category  
0             76.82      2021  Desktop  
1             75.41      2020  Desktop  
2             84.86      2020  Desktop  
3             77.66      2020  Desktop  
4             80.58     

In [6]:
def generate_combinations(cpu_data, gpu_data, motherboard_data, ram_data, psu_data,max_combinations=1000):
    combinations = []
    counter = 0
    while counter <= max_combinations:
        combination = []
        combination.append(cpu_data.sample())
        combination.append(gpu_data.sample())
        mb = motherboard_data.sample()
        combination.append(mb)
        combination.append(mb['memory_slots'])
        combination.append(ram_data.sample())
        combination.append(psu_data.sample())
        if check_compatibility(combination[0], combination[2], combination[1], combination[5], combination[4]):
            combinations.append(combination)
            counter += 1
        
    return combinations

In [7]:
def check_compatibility(cpu, motherboard, gpu, psu, ram):
    if cpu['socket'].values.tolist()[0] != motherboard['socket'].values.tolist()[0]:
        return False
    if cpu['TDP'].values.tolist()[0] + gpu['TDP'].values.tolist()[0] > psu['wattage'].values.tolist()[0]:
        return False
    if ram['planks'].values.tolist()[0] > motherboard['memory_slots'].values.tolist()[0]:
        return False
    return True

In [8]:
def convert_range(x, min_x=1, max_x=40, min_y=1, max_y=100):
    return ((x - min_x) / (max_x - min_x)) * (max_y - min_y) + min_y

In [9]:
compatible_configs = generate_combinations(cpu_df, gpu_df, mb_df, ram_df, ps_df, 5000)

In [10]:
types = ['Gaming', 'Professional', 'Office', 'Multimedia', 'All-in-One']
ratings_weights = {
        'Gaming': {'cpuMark': 0.3, 'threadMark': 0.3, 'G3Dmark': 0.3, 'ram_size': 0.1},
        'Professional': {'G3Dmark': 0.5, 'G2Dmark': 0.2, 'ram_size': 0.3},
        'Office': {'threadMark': 0.6, 'ram_size': 0.4},
        'Multimedia': {'cpuMark': 0.5, 'G2Dmark': 0.4, 'ram_size': 0.1},
        'All-in-One': {'cpuMark': 0.2, 'threadMark': 0.2, 'G3Dmark': 0.2, 'G2Dmark': 0.2, 'ram_size': 0.2}
}    

In [11]:
configurations = []
for config in compatible_configs:
        pc_type = rd.choice(types)
        rating = 0
        price = config[0]['price'].values[0] + config[1]['price'].values[0] + config[2]['price'].values[0] + config[4]['price'].values[0] + config[5]['price'].values[0]
        
        if pc_type == 'Gaming':
            rating = config[0]['cpuMark'].values[0] * ratings_weights['Gaming']['cpuMark'] / 100 + config[0]['threadMark'].values[0] * ratings_weights['Gaming']['threadMark'] / 10 + config[1]['G3Dmark'].values[0] * ratings_weights['Gaming']['G3Dmark'] / 100 + config[4]['size'].values[0] * ratings_weights['Gaming']['ram_size'] * 10
        elif pc_type == 'Professional':
            rating = (config[1]['G3Dmark'].values[0] * ratings_weights['Professional']['G3Dmark'] / 100 + 
                config[1]['G2Dmark'].values[0] * ratings_weights['Professional']['G2Dmark'] / 1 + config[4]['size'].values[0] * ratings_weights['Professional']['ram_size'] * 10)
        elif pc_type == 'Office':
            rating = config[0]['threadMark'].values[0] * ratings_weights['Office']['threadMark'] / 10 + config[4]['size'].values[0] * ratings_weights['Office']['ram_size'] * 10
        elif pc_type == 'Multimedia':
            rating = (config[0]['cpuMark'].values[0] * ratings_weights['Multimedia']['cpuMark'] / 100 + 
                       config[1]['G2Dmark'].values[0] * ratings_weights['Multimedia']['G2Dmark'] / 1 + config[4]['size'].values[0] * ratings_weights['Multimedia']['ram_size'] * 10)
        else:
           rating = (config[0]['cpuMark'].values[0] * ratings_weights['All-in-One']['cpuMark'] / 100 + config[0]['threadMark'].values[0] * ratings_weights['All-in-One']['threadMark'] / 10 + config[1]['G3Dmark'].values[0] * ratings_weights['All-in-One']['G3Dmark'] / 100 + 
            config[1]['G2Dmark'].values[0] * ratings_weights['All-in-One']['G2Dmark'] / 1 + config[4]['size'].values[0] * ratings_weights['All-in-One']['ram_size'] * 10)
        
        rating = rating / price * 100
        configurations.append({'cpu': config[0]['cpuName'].values[0], 
                               'motherboard': config[2]['name'].values[0], 
                               'gpu': config[1]['gpuName'].values[0], 
                               'ram': config[4]['name'].values[0], 
                               'psu': config[5]['name'].values[0], 
                               'price' : price, 'type': pc_type,'rating': rating})

In [12]:
pc_df = pd.DataFrame(configurations)

In [13]:
result = pc_df.groupby('type')['rating'].agg(['max', 'min', 'mean']).reset_index()

print(result)

           type         max       min       mean
0    All-in-One   48.378779  3.917373  23.678741
1        Gaming   28.799526  3.099192  14.235477
2    Multimedia   57.837973  8.184230  30.650555
3        Office  105.660722  4.872517  20.515666
4  Professional   61.764482  3.647711  22.287426


In [134]:
pc_df.head(10)

Unnamed: 0,price,rating
count,5001.0,5001.0
mean,1580.917227,22.257847
std,614.521365,9.391289
min,449.01,3.707186
25%,1153.95,14.994484
50%,1443.19,20.991128
75%,1878.18,28.195875
max,11292.96,74.426149


In [14]:
pc_df.to_csv("pc_v2.csv", index=False)

In [15]:
features = ['cpu', 'motherboard', 'gpu', 'ram', 'psu', 'type', 'price']
target = 'rating'

In [16]:
X_train, X_test, y_train, y_test = train_test_split(pc_df[features], pc_df[target], test_size=0.2, random_state=42)

In [17]:
categorical_features = ['cpu', 'motherboard', 'gpu', 'ram', 'psu', 'type']

In [18]:
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ], remainder='passthrough')

In [19]:
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(random_state=42))
])

In [20]:
model.fit(X_train, y_train)

In [21]:
y_pred = model.predict(X_test)

In [22]:
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

Mean Squared Error: 28.684227935525797


In [23]:
new_pc = pd.DataFrame([{
    'cpu': 'Intel Core i9-12900K',
    'motherboard': 'Asus PRIME Z690M-PLUS D4',
    'gpu': 'Radeon RX 6800 XT',
    'ram': 'Corsair Vengeance LPX 32GB',
    'psu': 'Corsair RM850x',
    'type': 'Professional',
    'price': 2500.00
}])

new_rating = model.predict(new_pc)
print("Predicted rating for new PC configuration:", new_rating[0])

Predicted rating for new PC configuration: 17.187184815517806


In [24]:
types = {'Gaming' : 15, 'Professional' : 20, 'Office' : 20, 'Multimedia' : 30, 'All-in-One' : 25}

In [25]:
def generate_random_combination(type, max_price):
    cpu, gpu, ram, psu, mb, combination = None,None,None,None,None,None
    price = 0
    while True:
        cpu = cpu_df.sample()
        gpu = gpu_df.sample()
        ram = ram_df.sample()
        psu = ps_df.sample()
        mb = mb_df.sample()
        price = cpu['price'].values[0] + mb['price'].values[0] + gpu['price'].values[0] + ram['price'].values[0] + psu['price'].values[0]
    
        combination = pd.DataFrame([{
                'cpu': cpu['cpuName'].values[0],
                'motherboard': mb['name'].values[0],
                'gpu': gpu['gpuName'].values[0],
                'ram': ram['name'].values[0],
                'psu': psu['name'].values[0],
                'type': type,
                'price': price
        }])
        
        rating = model.predict(combination)[0]
        rating_threshold = types.get(type)
        if check_compatibility(cpu, mb, gpu, psu, ram) and max_price >= price >= (max_price * 0.7) and rating >= rating_threshold:
            break
                
    return combination

In [28]:
config = generate_random_combination('Gaming', 1700)

In [27]:
joblib.dump(model, 'model.pkl')

['model.pkl']

In [34]:
config['cpu'].values[0]

'AMD Ryzen 7 5800X'

In [35]:
config

Unnamed: 0,cpu,motherboard,gpu,ram,psu,type,price
0,AMD Ryzen 7 5800X,ASRock A520M Phantom Gaming 4,GeForce RTX 3070,Kingston KVR13N9K3/24 24 GB,SeaSonic FOCUS GX,Gaming,1443.94
