In [9]:
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import pandas as pd

In [10]:
df = pd.read_csv('Data/video_games_sales_completo.csv')

In [11]:
X = df.drop(columns=['Ventas_Clase', 'Global_Sales', 'NA_Sales', 'EU_Sales', 'JP_Sales', 'Other_Sales', 'Name', 'User Ratings Count', 'Platforms Info'])  
y = df['Ventas_Clase']

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
numeric_features = X.select_dtypes(include=['int64', 'float64']).columns.tolist()
categorical_features = X.select_dtypes(include=['object', 'category']).columns.tolist()

In [13]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ]
)

In [14]:
rf_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(random_state=42))
])

In [None]:
rf_pipeline.fit(X_train, y_train)

In [None]:
y_pred = rf_pipeline.predict(X_test)
print("Random Forest Classification Report:")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

Random Forest Classification Report:
              precision    recall  f1-score   support

        Alta       0.35      0.34      0.34       653
        Baja       0.63      0.67      0.65      1292
  Gran Éxito       0.41      0.36      0.38       326
       Media       0.40      0.39      0.39       981

    accuracy                           0.49      3252
   macro avg       0.45      0.44      0.44      3252
weighted avg       0.48      0.49      0.48      3252

Confusion Matrix:
[[221 123  86 223]
 [106 864  23 299]
 [105  43 117  61]
 [202 339  59 381]]
