In [13]:
# Import
import pandas as pd
from scipy.io import arff
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor

In [2]:
data, meta = arff.loadarff('../../Data/Regression/fps in video games/fps-in-video-games.arff')
df = pd.DataFrame(data)

In [3]:
df

Unnamed: 0,CpuName,CpuNumberOfCores,CpuNumberOfThreads,CpuBaseClock,CpuCacheL1,CpuCacheL2,CpuCacheL3,CpuDieSize,CpuFrequency,CpuMultiplier,...,GpuNumberOfShadingUnits,GpuNumberOfTMUs,GpuTextureRate,GpuNumberOfTransistors,GpuVulkan,GameName,GameResolution,GameSetting,Dataset,FPS
0,b'Intel Core i7-920',4.0,8.0,133.0,256.0,1024.0,8.0,0.000263,2666.0,20.0,...,2304.0,144.0,182300.0,5700.0,b'1.2.131',b'counterStrikeGlobalOffensive',1080.0,b'low',b'userbenchmark',70.0
1,b'Intel Core i7-920',4.0,8.0,133.0,256.0,1024.0,8.0,0.000263,2666.0,20.0,...,2304.0,144.0,182300.0,5700.0,b'1.2.131',b'counterStrikeGlobalOffensive',1080.0,b'low',b'userbenchmark',230.0
2,b'Intel Core i5-4690',4.0,4.0,100.0,256.0,1024.0,6.0,0.000177,3400.0,34.0,...,2560.0,160.0,277300.0,7200.0,b'1.2.131',b'counterStrikeGlobalOffensive',1080.0,b'low',b'userbenchmark',250.0
3,b'Intel Core i5-4690',4.0,4.0,100.0,256.0,1024.0,6.0,0.000177,3400.0,34.0,...,2560.0,160.0,277300.0,7200.0,b'1.2.131',b'counterStrikeGlobalOffensive',1080.0,b'low',b'userbenchmark',300.0
4,b'Intel Core i5-4690',4.0,4.0,100.0,256.0,1024.0,6.0,0.000177,3400.0,34.0,...,896.0,56.0,61600.0,2080.0,b'1.2.131',b'counterStrikeGlobalOffensive',1080.0,b'low',b'userbenchmark',200.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
425828,b'AMD Ryzen 5 3600',6.0,12.0,100.0,576.0,3072.0,32.0,0.000074,3600.0,36.0,...,2048.0,128.0,159200.0,5700.0,b'1.2.131',b'rainbowSixSiege',1080.0,b'max',b'fpsbenchmark',51.5
425829,b'AMD Ryzen 5 2600X',6.0,12.0,100.0,576.0,3072.0,16.0,0.000192,3600.0,36.0,...,2048.0,128.0,159200.0,5700.0,b'1.2.131',b'rainbowSixSiege',1080.0,b'max',b'fpsbenchmark',49.9
425830,b'AMD Ryzen 5 1600X',6.0,12.0,100.0,576.0,3072.0,16.0,0.000192,3600.0,36.0,...,2048.0,128.0,159200.0,5700.0,b'1.2.131',b'rainbowSixSiege',1080.0,b'max',b'fpsbenchmark',46.9
425831,b'AMD Ryzen 5 2600',6.0,12.0,100.0,576.0,3072.0,16.0,0.000192,3400.0,34.0,...,2048.0,128.0,159200.0,5700.0,b'1.2.131',b'rainbowSixSiege',1080.0,b'max',b'fpsbenchmark',46.4


In [4]:
for col in df.columns:
    if df[col].dtype == 'object':  # Vérifie si le type de la colonne est object, souvent utilisé pour les strings
        df[col] = df[col].apply(lambda x: x.decode('utf-8') if isinstance(x, bytes) else x)

In [5]:
# Séparer les features et la target
X = df.drop('FPS', axis=1)  # Remplacez 'target' par le nom de votre colonne cible
y = df['FPS']

In [6]:
# Normalisation et OneHotEncoding
numeric_features = []
categorical_features = []

for col in X.columns:
    if X[col].dtype == 'float64':
        numeric_features.append(col)
    elif X[col].dtype == 'object':
        categorical_features.append(col)

numeric_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(drop='first')

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Appliquer le pipeline
X_transformed = preprocessor.fit_transform(X)

numeric_columns = numeric_features
categorical_columns = preprocessor.named_transformers_['cat'].get_feature_names_out(categorical_features)
all_columns = list(numeric_columns) + list(categorical_columns)

# Convertir en DataFrame
X = pd.DataFrame(X_transformed.toarray(), columns=all_columns)
X

Unnamed: 0,CpuNumberOfCores,CpuNumberOfThreads,CpuBaseClock,CpuCacheL1,CpuCacheL2,CpuCacheL3,CpuDieSize,CpuFrequency,CpuMultiplier,CpuProcessSize,...,GameName_rust,GameName_seaOfThieves,GameName_starcraft2,GameName_totalWar3Kingdoms,GameName_warframe,GameName_worldOfTanks,GameSetting_low,GameSetting_max,GameSetting_med,Dataset_userbenchmark
0,-0.510392,0.027760,0.866200,-0.547289,-0.534639,-0.266482,1.118556,-1.860735,-2.079169,3.940991,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0
1,-0.510392,0.027760,0.866200,-0.547289,-0.534639,-0.266482,1.118556,-1.860735,-2.079169,3.940991,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0
2,-0.510392,-1.014235,-0.305095,-0.547289,-0.534639,-0.594163,-0.285375,-0.214926,0.089496,0.669158,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0
3,-0.510392,-1.014235,-0.305095,-0.547289,-0.534639,-0.594163,-0.285375,-0.214926,0.089496,0.669158,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0
4,-0.510392,-1.014235,-0.305095,-0.547289,-0.534639,-0.594163,-0.285375,-0.214926,0.089496,0.669158,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
425828,0.650855,1.069755,-0.305095,1.353179,0.599462,3.665695,-1.966827,0.233523,0.399305,-1.464646,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
425829,0.650855,1.069755,-0.305095,1.353179,0.599462,1.044244,-0.040503,0.233523,0.399305,-0.753378,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
425830,0.650855,1.069755,-0.305095,1.353179,0.599462,1.044244,-0.040503,0.233523,0.399305,-0.468870,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
425831,0.650855,1.069755,-0.305095,1.353179,0.599462,1.044244,-0.040503,-0.214926,0.089496,-0.753378,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [7]:
# COMPLETUDE 
# Suppression des colonnes 
X = X.dropna(axis=1)

In [16]:
# Créer le modèle de régression linéaire
model = XGBRegressor()

In [17]:
# Validation croisée
scores = cross_val_score(model, X, y, cv=5, scoring='neg_mean_absolute_error')

# Calculer la moyenne et l'écart-type des scores
mean_score = -scores.mean()  # Négatif car l'erreur absolue moyenne est une perte
std_score = scores.std()

print(f"Mean MAE: {mean_score}")
print(f"Standard Deviation of MAE: {std_score}")

Mean MAE: 41.201828574780656
Standard Deviation of MAE: 9.024505114148033
