In [2]:
import joblib
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import LabelEncoder
import random as rd

In [6]:
file_path = "CPU.csv"

cpu_df = pd.read_csv(file_path)

cpu_df = cpu_df.dropna()
cpu_df = cpu_df[cpu_df['category'] == 'Desktop']
cpu_df = cpu_df[cpu_df['testDate'].astype(int) >= 2017]

In [14]:
print(cpu_df.head(20))

                              cpuName    price  cpuMark  cpuValue  threadMark  \
4   AMD Ryzen Threadripper PRO 3995WX  6807.98    83971     12.33        2626   
5        AMD Ryzen Threadripper 3990X  8399.69    81568      9.71        2569   
17       AMD Ryzen Threadripper 3970X  2997.99    63835     21.29        2694   
18  AMD Ryzen Threadripper PRO 3975WX  4499.99    63495     14.11        2666   
27       AMD Ryzen Threadripper 3960X  1682.90    54757     32.54        2678   
39                  AMD Ryzen 9 5950X   649.74    46195     71.10        3498   
44              Intel Core i9-12900KS   779.98    44243     56.72        4317   
53              Intel Core i9-12900KF   572.98    41163     71.84        4217   
54               Intel Core i9-12900K   599.95    41077     68.47        4209   
61  AMD Ryzen Threadripper PRO 3955WX  1995.00    40225     20.16        2696   
63                  AMD Ryzen 9 5900X   393.03    39457    100.39        3494   
64                  AMD Ryze

In [15]:
print(cpu_df.shape)

(270, 12)


In [16]:
print(cpu_df.columns)

Index(['cpuName', 'price', 'cpuMark', 'cpuValue', 'threadMark', 'threadValue',
       'TDP', 'powerPerf', 'cores', 'testDate', 'socket', 'category'],
      dtype='object')


In [17]:
cpu_df.to_csv("cpu_v1.csv", index=False)

In [155]:
file_path = "mb_v1.csv"

mb_df = pd.read_csv(file_path)
mb_df = mb_df.dropna()

In [156]:
print(mb_df.head(10))

                              name   price   socket form_factor  max_memory  \
0            MSI MAG B550 TOMAHAWK  169.99      AM4         ATX         128   
1                   MSI B550-A PRO  139.99      AM4         ATX         128   
2           MSI B550M PRO-VDH WIFI  119.99      AM4   Micro ATX         128   
3     Gigabyte Z790 AORUS ELITE AX  239.99  LGA1700         ATX         128   
4            Asus Prime B450M-A II   79.99      AM4   Micro ATX         128   
5  MSI MAG B660 TOMAHAWK WIFI DDR4  189.99  LGA1700         ATX         128   
6     Gigabyte B650 AORUS ELITE AX  202.30      AM5         ATX         128   
7     Asus ROG STRIX B550-A GAMING  169.99      AM4         ATX         128   
8       MSI MAG Z790 TOMAHAWK WIFI  276.79  LGA1700         ATX         192   
9       MSI MAG B650 TOMAHAWK WIFI  219.95      AM5         ATX         128   

   memory_slots  
0             4  
1             4  
2             4  
3             4  
4             4  
5             4  
6   

In [157]:
mb_df.to_csv("mb_v1.csv", index=False)

In [24]:
file_path = "GPU.csv"

gpu_df = pd.read_csv(file_path)
gpu_df = gpu_df.dropna()
gpu_df = gpu_df[gpu_df['category'] == 'Desktop']
gpu_df = gpu_df[gpu_df['testDate'].astype(int) >= 2017]

In [29]:
print(gpu_df.head(40))

                          gpuName  G3Dmark  G2Dmark    price  gpuValue    TDP  \
1             GeForce RTX 3080 Ti    26887     1031  1199.99     22.41  350.0   
2                GeForce RTX 3090    26395      999  1749.99     15.08  350.0   
3               Radeon RX 6900 XT    25458     1102  1120.31     22.72  300.0   
4                GeForce RTX 3080    24853     1003   999.00     24.88  320.0   
5             GeForce RTX 3070 Ti    23367     1003   749.99     31.16  290.0   
6               Radeon RX 6800 XT    23364     1078   859.00     27.20  300.0   
9                GeForce RTX 3070    22093      969   719.99     30.69  220.0   
10            GeForce RTX 2080 Ti    21796      940   998.59     21.83  250.0   
12                 Radeon RX 6800    20667     1030   758.99     27.23  250.0   
13            GeForce RTX 3060 Ti    20206      961   599.99     33.68  200.0   
16         GeForce RTX 2080 SUPER    19519      934   683.99     28.54  250.0   
20              Radeon RX 67

In [28]:
print(gpu_df.shape)

(48, 9)


In [30]:
gpu_df.to_csv("gpu_v1.csv", index=False)

In [183]:
def transform_number(x, old_min, old_max, new_min, new_max):
    # print(((x - old_min) * (new_max - new_min) / (old_max - old_min) + new_min))
    return ((x - old_min) * (new_max - new_min) / (old_max - old_min) + new_min) * 100

pc_types = ['Gaming', 'Professional', 'Office', 'Multimedia', 'All-in-One']

In [158]:
cpu_df = pd.read_csv("cpu_v1.csv")
mb_df = pd.read_csv("mb_v1.csv")
gpu_df = pd.read_csv("gpu_v1.csv")

In [159]:
cpu_df = cpu_df[cpu_df['socket'].isin(mb_df['socket'].unique())]

In [214]:
def generate_configurations(num_configurations):
    configurations = []
    for _ in range(num_configurations):
        cpu = cpu_df.sample(1).iloc[0]        
        motherboard = mb_df[mb_df['socket'] == cpu['socket']].sample(1).iloc[0]
        gpu = gpu_df.sample(1).iloc[0]
        
        random_index = rd.randrange(len(pc_types))
        pc_type = pc_types[random_index]
        rating = 0
        
        price = (gpu['price'] + gpu['price'] + motherboard['price'])
        
        if pc_type == 'Gaming':
            rating = ((transform_number(gpu['G3Dmark'],1,29000,1,100) + 
                      transform_number(cpu['cpuMark'],1,109000,1,100) +
                      transform_number(cpu['threadMark'],1,4500,1,100))) / 3 / price

        elif pc_type == 'Professional':
            rating = ((transform_number(gpu['G3Dmark'],1,29000,1,100) + 
                      transform_number(gpu['G2Dmark'],1,1200,1,100))) / 2 / price
        elif pc_type == 'Office':
            rating = (transform_number(gpu['G2Dmark'],1,1200,1,100) +
                      transform_number(cpu['threadMark'],1,4500,1,100)) / 2 / price
        elif pc_type == 'Multimedia':
            rating = (transform_number(gpu['G2Dmark'],1,1200,1,1000) +
                      transform_number(cpu['cpuMark'],1,109000,1,100)) / 2 /  price
        else:
            rating = ((transform_number(gpu['G3Dmark'],1,29000,1,100) + 
                      transform_number(gpu['G2Dmark'],1,1200,1,100) +
                      transform_number(cpu['cpuMark'],1,109000,1,100) +
                      transform_number(cpu['threadMark'],1,4500,1,100))) / 4 / price
        
        rating = rating * rd.uniform(0.5, 1.5)   
        configurations.append({'cpu': cpu['cpuName'], 'motherboard': motherboard['name'], 'gpu': gpu['gpuName'], 
                               # 'gpu_3d' : gpu['G3Dmark'],
                               # 'gpu_2d' : gpu['G2Dmark'],
                               # 'cpu_st' : cpu['cpuMark'],
                               # 'cpu_mt' : cpu['threadMark'],
                               'price' : price, 'type': pc_type,'rating': rating})
    return configurations

In [215]:
configurations = generate_configurations(1000)

In [216]:
configurations_df = pd.DataFrame(configurations)

In [217]:
configurations_df.describe()

Unnamed: 0,price,rating
count,1000.0,1000.0
mean,1856.3993,7.954298
std,1027.594987,10.534346
min,283.97,0.496626
25%,1114.7,2.279459
50%,1538.185,3.809487
75%,2325.48,7.562777
max,5835.55,63.130971


In [252]:
print(configurations_df.head())

                              cpu                     motherboard  \
0             Intel Core i9-12900        Asus PRIME Z690M-PLUS D4   
1   Intel Core i5-10600 @ 3.30GHz        ASRock Z590 Steel Legend   
2            Intel Core i5-12500T        Asus PRIME B660M-A AC D4   
3  Intel Core i5-11500T @ 1.50GHz       ASRock B460M Steel Legend   
4             Intel Core i5-12500  Asus TUF GAMING Z690-PLUS WIFI   

                   gpu    price          type     rating  
0        Radeon RX 550   537.63  Professional   7.045596  
1    Radeon RX 6800 XT  1832.50    Multimedia  22.892680  
2        Radeon RX 580   847.99  Professional   3.107432  
3     GeForce GTX 1650   620.24        Office  11.394660  
4  GeForce RTX 2080 Ti  2164.17  Professional   2.311534  


In [330]:
configurations_df.to_csv("pc_v1.csv", index=False)

In [353]:
data = pd.read_csv("pc_v1.csv")

In [315]:
label_encoder = LabelEncoder()
data['cpu'] = label_encoder.fit_transform(data['cpu'])
data['motherboard'] = label_encoder.fit_transform(data['motherboard'])
data['gpu'] = label_encoder.fit_transform(data['gpu'])
data['type'] = label_encoder.fit_transform(data['type'])

In [316]:
X = data.drop('rating', axis=1)
y = data['rating']

In [317]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [318]:
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [309]:
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

Mean Squared Error: 81.18989272161859


NameError: name 'joblib' is not defined