In [20]:
# Tratamiento de datos
# ==============================================================================
import numpy as np
import pandas as pd
from math import ceil

# Preprocesado y modelado
# ==============================================================================
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
# Gráficos
# ==============================================================================
import matplotlib.pyplot as plt
import seaborn as sns

# Configuración warnings
# ==============================================================================
# import warnings
# warnings.filterwarnings('ignore')

# Carga de datos
# ==============================================================================
steam_store = pd.read_csv('steam_store_data_2024.csv')
steam = pd.read_csv('steam-200k.csv')
video_games = pd.read_csv('Video_Games.csv')
cars = pd.read_csv('cars_clus.csv')
car_data = pd.read_csv('Car_Data.csv')

In [21]:
car_data

Unnamed: 0,ID,Brand,Model,Year,Color,Mileage,Price,Condition
0,1,Mazda,Generic Model 3,2003,Blue,22932,65732,New
1,2,Jaguar,Generic Model 2,2003,Silver,89937,58620,New
2,3,Land Rover,Generic Model 2,2022,Green,36616,50574,New
3,4,Porsche,Generic Model 2,1997,Black,82812,35436,Used
4,5,Land Rover,Generic Model 3,2000,Black,184616,63880,Used
...,...,...,...,...,...,...,...,...
99995,99996,Lexus,Generic Model 1,2018,Red,24034,31762,New
99996,99997,Nissan,Generic Model 1,2015,Green,30029,78376,New
99997,99998,Land Rover,Generic Model 1,2009,White,42313,45681,Used
99998,99999,Toyota,Tacoma,2010,White,120989,15085,Used


In [22]:
car_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100000 entries, 0 to 99999
Data columns (total 8 columns):
 #   Column     Non-Null Count   Dtype 
---  ------     --------------   ----- 
 0   ID         100000 non-null  int64 
 1   Brand      100000 non-null  object
 2   Model      100000 non-null  object
 3   Year       100000 non-null  int64 
 4   Color      100000 non-null  object
 5   Mileage    100000 non-null  int64 
 6   Price      100000 non-null  int64 
 7   Condition  100000 non-null  object
dtypes: int64(4), object(4)
memory usage: 6.1+ MB


In [23]:
car_data.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
ID,100000.0,50000.5,28867.657797,1.0,25000.75,50000.5,75000.25,100000.0
Year,100000.0,2006.03409,9.52722,1990.0,1998.0,2006.0,2014.0,2022.0
Mileage,100000.0,99819.395,57710.087058,2.0,49996.25,99807.0,149841.25,199998.0
Price,100000.0,42534.51937,21632.296692,5000.0,23826.75,42563.5,61197.5,80000.0


In [10]:
cars

Unnamed: 0,manufact,model,sales,resale,type,price,engine_s,horsepow,wheelbas,width,length,curb_wgt,fuel_cap,mpg,lnsales,partition
0,Acura,Integra,16.919,16.360,0,21.50,1.8,140,101.2,67.3,172.4,2.639,13.2,28.0,2.828,0
1,Acura,TL,39.384,19.875,0,28.40,3.2,225,108.1,70.3,192.9,3.517,17.2,25.0,3.673,0
2,Acura,RL,8.588,29.725,0,42.00,3.5,210,114.6,71.4,196.6,3.850,18.0,22.0,2.150,0
3,Audi,A4,20.397,22.255,0,23.99,1.8,150,102.6,68.2,178.0,2.998,16.4,27.0,3.015,0
4,Audi,A6,18.780,23.555,0,33.95,2.8,200,108.7,76.1,192.0,3.561,18.5,22.0,2.933,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
112,Volkswagen,Golf,9.761,11.425,0,14.90,2.0,115,98.9,68.3,163.3,2.767,14.5,26.0,2.278,0
113,Volkswagen,Jetta,83.721,13.240,0,16.70,2.0,115,98.9,68.3,172.3,2.853,14.5,26.0,4.427,0
114,Volkswagen,Passat,51.102,16.725,0,21.20,1.8,150,106.4,68.5,184.1,3.043,16.4,27.0,3.934,0
115,Volkswagen,Cabrio,9.569,16.575,0,19.99,2.0,115,97.4,66.7,160.4,3.079,13.7,26.0,2.259,0


In [12]:
cars.columns

Index(['manufact', 'model', 'sales', 'resale', 'type', 'price', 'engine_s',
       'horsepow', 'wheelbas', 'width', 'length', 'curb_wgt', 'fuel_cap',
       'mpg', 'lnsales', 'partition'],
      dtype='object')

In [16]:
cars['type'].value_counts()

type
0    88
1    29
Name: count, dtype: int64

In [13]:
cars.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 117 entries, 0 to 116
Data columns (total 16 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   manufact   117 non-null    object 
 1   model      117 non-null    object 
 2   sales      117 non-null    float64
 3   resale     117 non-null    float64
 4   type       117 non-null    int64  
 5   price      117 non-null    float64
 6   engine_s   117 non-null    float64
 7   horsepow   117 non-null    int64  
 8   wheelbas   117 non-null    float64
 9   width      117 non-null    float64
 10  length     117 non-null    float64
 11  curb_wgt   117 non-null    float64
 12  fuel_cap   117 non-null    float64
 13  mpg        117 non-null    float64
 14  lnsales    117 non-null    float64
 15  partition  117 non-null    int64  
dtypes: float64(11), int64(3), object(2)
memory usage: 14.8+ KB


In [14]:
cars.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
sales,117.0,59.112316,75.058933,0.11,16.767,32.299,76.029,540.561
resale,117.0,18.031538,11.605632,5.16,11.24,14.01,19.875,67.55
type,117.0,0.247863,0.433629,0.0,0.0,0.0,0.0,1.0
price,117.0,25.969487,14.149699,9.235,16.98,21.665,29.465,82.6
engine_s,117.0,3.048718,1.055169,1.0,2.2,3.0,3.8,8.0
horsepow,117.0,181.282051,58.591786,55.0,140.0,175.0,210.0,450.0
wheelbas,117.0,107.326496,8.050588,92.6,102.4,107.0,111.6,138.7
width,117.0,71.189744,3.530151,62.6,68.5,70.4,73.6,79.3
length,117.0,187.717949,13.849926,149.4,177.5,187.8,196.5,224.5
curb_wgt,117.0,3.324051,0.597177,1.895,2.911,3.34,3.823,5.115


In [17]:
# Crear una nueva columna 'car_category' basada en reglas
def assign_category(row):
    if row['type'] in ['Sedan', 'Hatchback']:
        return 'Turismo'
    elif row['type'] in ['SUV', 'Crossover']:
        return 'SUV'
    elif row['type'] in ['Pickup']:
        return 'Camion'
    elif row['type'] in ['Convertible', 'Coupe', 'Roadster']:
        return 'Deportivo'
    elif row['type'] in ['Minivan', 'Station Wagon']:
        return 'Familiar'
    else:
        return 'Otro'

cars['car_category'] = cars.apply(assign_category, axis=1)

In [18]:
cars

Unnamed: 0,manufact,model,sales,resale,type,price,engine_s,horsepow,wheelbas,width,length,curb_wgt,fuel_cap,mpg,lnsales,partition,car_category
0,Acura,Integra,16.919,16.360,0,21.50,1.8,140,101.2,67.3,172.4,2.639,13.2,28.0,2.828,0,Otro
1,Acura,TL,39.384,19.875,0,28.40,3.2,225,108.1,70.3,192.9,3.517,17.2,25.0,3.673,0,Otro
2,Acura,RL,8.588,29.725,0,42.00,3.5,210,114.6,71.4,196.6,3.850,18.0,22.0,2.150,0,Otro
3,Audi,A4,20.397,22.255,0,23.99,1.8,150,102.6,68.2,178.0,2.998,16.4,27.0,3.015,0,Otro
4,Audi,A6,18.780,23.555,0,33.95,2.8,200,108.7,76.1,192.0,3.561,18.5,22.0,2.933,0,Otro
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
112,Volkswagen,Golf,9.761,11.425,0,14.90,2.0,115,98.9,68.3,163.3,2.767,14.5,26.0,2.278,0,Otro
113,Volkswagen,Jetta,83.721,13.240,0,16.70,2.0,115,98.9,68.3,172.3,2.853,14.5,26.0,4.427,0,Otro
114,Volkswagen,Passat,51.102,16.725,0,21.20,1.8,150,106.4,68.5,184.1,3.043,16.4,27.0,3.934,0,Otro
115,Volkswagen,Cabrio,9.569,16.575,0,19.99,2.0,115,97.4,66.7,160.4,3.079,13.7,26.0,2.259,0,Otro


In [6]:
steam

Unnamed: 0,151603712,The Elder Scrolls V Skyrim,purchase,1.0,0
0,151603712,The Elder Scrolls V Skyrim,play,273.0,0
1,151603712,Fallout 4,purchase,1.0,0
2,151603712,Fallout 4,play,87.0,0
3,151603712,Spore,purchase,1.0,0
4,151603712,Spore,play,14.9,0
...,...,...,...,...,...
199994,128470551,Titan Souls,play,1.5,0
199995,128470551,Grand Theft Auto Vice City,purchase,1.0,0
199996,128470551,Grand Theft Auto Vice City,play,1.5,0
199997,128470551,RUSH,purchase,1.0,0


In [8]:
video_games

Unnamed: 0,Name,Platform,Year_of_Release,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales,Critic_Score,Critic_Count,User_Score,User_Count,Developer,Rating
0,Wii Sports,Wii,2006.0,Sports,Nintendo,41.36,28.96,3.77,8.45,82.53,76.0,51.0,8,322.0,Nintendo,E
1,Super Mario Bros.,NES,1985.0,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24,,,,,,
2,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,15.68,12.76,3.79,3.29,35.52,82.0,73.0,8.3,709.0,Nintendo,E
3,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,15.61,10.93,3.28,2.95,32.77,80.0,73.0,8,192.0,Nintendo,E
4,Pokemon Red/Pokemon Blue,GB,1996.0,Role-Playing,Nintendo,11.27,8.89,10.22,1.00,31.37,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16714,Samurai Warriors: Sanada Maru,PS3,2016.0,Action,Tecmo Koei,0.00,0.00,0.01,0.00,0.01,,,,,,
16715,LMA Manager 2007,X360,2006.0,Sports,Codemasters,0.00,0.01,0.00,0.00,0.01,,,,,,
16716,Haitaka no Psychedelica,PSV,2016.0,Adventure,Idea Factory,0.00,0.00,0.01,0.00,0.01,,,,,,
16717,Spirits & Spells,GBA,2003.0,Platform,Wanadoo,0.01,0.00,0.00,0.00,0.01,,,,,,


In [4]:
steam_store

Unnamed: 0,title,description,price,salePercentage,recentReviews,allReviews
0,Ori and the Will of the Wisps,Play the critically acclaimed masterpiece. Emb...,$9.89,-67%,Overwhelmingly Positive,Overwhelmingly Positive
1,"Flashing Lights - Police, Firefighting, Emerge...",Play solo or in up to 10-player multiplayer co...,$8.49,-66%,Very Positive,Very Positive
2,Thronefall,A minimalist game about building and defending...,$5.24,-25%,Overwhelmingly Positive,Overwhelmingly Positive
3,DRAGON QUEST® XI S: Echoes of an Elusive Age™ ...,The Definitive Edition includes the critically...,$23.99,-40%,Very Positive,Very Positive
4,UNDYING,"As Anling’s zombie infection sets in, her days...",$13.99,-30%,Mostly Positive,Mostly Positive
...,...,...,...,...,...,...
81,Bendy and the Dark Revival,Bendy and the Dark Revival® is a first-person ...,$5.99,-80%,Very Positive,Very Positive
82,STAR WARS™ - The Force Unleashed™ Ultimate Sit...,"A game that will show gamers the deepest, dark...",$6.99,-65%,Very Positive,Very Positive
83,Thymesia,Thymesia is a gruelling action-RPG with fast-p...,$14.99,-40%,Very Positive,Very Positive
84,Last Train Home,The Great War is over - the fight continues. C...,$26.39,-34%,Very Positive,Very Positive
