In [173]:
import numpy as np
import pandas as pd
import ast
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import r2_score, mean_squared_error

In [174]:

rows = []
with open("steam_games.json") as f:
    for line in f.readlines():
        rows.append(ast.literal_eval(line))

In [175]:
df = pd.DataFrame(rows)
df

Unnamed: 0,publisher,genres,app_name,title,url,release_date,tags,discount_price,reviews_url,specs,price,early_access,id,developer,sentiment,metascore
0,Kotoshiro,"[Action, Casual, Indie, Simulation, Strategy]",Lost Summoner Kitty,Lost Summoner Kitty,http://store.steampowered.com/app/761140/Lost_...,2018-01-04,"[Strategy, Action, Indie, Casual, Simulation]",4.49,http://steamcommunity.com/app/761140/reviews/?...,[Single-player],4.99,False,761140,Kotoshiro,,
1,"Making Fun, Inc.","[Free to Play, Indie, RPG, Strategy]",Ironbound,Ironbound,http://store.steampowered.com/app/643980/Ironb...,2018-01-04,"[Free to Play, Strategy, Indie, RPG, Card Game...",,http://steamcommunity.com/app/643980/reviews/?...,"[Single-player, Multi-player, Online Multi-Pla...",Free To Play,False,643980,Secret Level SRL,Mostly Positive,
2,Poolians.com,"[Casual, Free to Play, Indie, Simulation, Sports]",Real Pool 3D - Poolians,Real Pool 3D - Poolians,http://store.steampowered.com/app/670290/Real_...,2017-07-24,"[Free to Play, Simulation, Sports, Casual, Ind...",,http://steamcommunity.com/app/670290/reviews/?...,"[Single-player, Multi-player, Online Multi-Pla...",Free to Play,False,670290,Poolians.com,Mostly Positive,
3,彼岸领域,"[Action, Adventure, Casual]",弹炸人2222,弹炸人2222,http://store.steampowered.com/app/767400/2222/,2017-12-07,"[Action, Adventure, Casual]",0.83,http://steamcommunity.com/app/767400/reviews/?...,[Single-player],0.99,False,767400,彼岸领域,,
4,,,Log Challenge,,http://store.steampowered.com/app/773570/Log_C...,,"[Action, Indie, Casual, Sports]",1.79,http://steamcommunity.com/app/773570/reviews/?...,"[Single-player, Full controller support, HTC V...",2.99,False,773570,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32130,Ghost_RUS Games,"[Casual, Indie, Simulation, Strategy]",Colony On Mars,Colony On Mars,http://store.steampowered.com/app/773640/Colon...,2018-01-04,"[Strategy, Indie, Casual, Simulation]",1.49,http://steamcommunity.com/app/773640/reviews/?...,"[Single-player, Steam Achievements]",1.99,False,773640,"Nikita ""Ghost_RUS""",,
32131,Sacada,"[Casual, Indie, Strategy]",LOGistICAL: South Africa,LOGistICAL: South Africa,http://store.steampowered.com/app/733530/LOGis...,2018-01-04,"[Strategy, Indie, Casual]",4.24,http://steamcommunity.com/app/733530/reviews/?...,"[Single-player, Steam Achievements, Steam Clou...",4.99,False,733530,Sacada,,
32132,Laush Studio,"[Indie, Racing, Simulation]",Russian Roads,Russian Roads,http://store.steampowered.com/app/610660/Russi...,2018-01-04,"[Indie, Simulation, Racing]",1.39,http://steamcommunity.com/app/610660/reviews/?...,"[Single-player, Steam Achievements, Steam Trad...",1.99,False,610660,Laush Dmitriy Sergeevich,,
32133,SIXNAILS,"[Casual, Indie]",EXIT 2 - Directions,EXIT 2 - Directions,http://store.steampowered.com/app/658870/EXIT_...,2017-09-02,"[Indie, Casual, Puzzle, Singleplayer, Atmosphe...",,http://steamcommunity.com/app/658870/reviews/?...,"[Single-player, Steam Achievements, Steam Cloud]",4.99,False,658870,"xropi,stev3ns",1 user reviews,


In [176]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32135 entries, 0 to 32134
Data columns (total 16 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   publisher       24083 non-null  object 
 1   genres          28852 non-null  object 
 2   app_name        32133 non-null  object 
 3   title           30085 non-null  object 
 4   url             32135 non-null  object 
 5   release_date    30068 non-null  object 
 6   tags            31972 non-null  object 
 7   discount_price  225 non-null    float64
 8   reviews_url     32133 non-null  object 
 9   specs           31465 non-null  object 
 10  price           30758 non-null  object 
 11  early_access    32135 non-null  bool   
 12  id              32133 non-null  object 
 13  developer       28836 non-null  object 
 14  sentiment       24953 non-null  object 
 15  metascore       2677 non-null   object 
dtypes: bool(1), float64(1), object(14)
memory usage: 3.7+ MB


In [177]:
df_filtrado = df[["genres", "release_date" , "tags", "specs", "price", "sentiment", "metascore"]]

In [178]:

#Todos los que no tengan el formato de fecha correcto de la columna release_date los descartamos 
df_filtrado = df_filtrado[df_filtrado['release_date'].str.contains(r'\d{4}-\d{2}-\d{2}', na=False)]

#El % de datos que son nulos o no tienen el formato correcto de columna release_date  es del 7.32% asi que los descartamos 
fecha_basura = 1-(len(df_filtrado)/len(df))
print(f'{fecha_basura:.2%}')


7.32%


In [179]:

def set_price_to_zero(row):
    genres = row['genres']
    price = row['price']
    if isinstance(genres, list) and 'Free to Play' in genres:
        return 0
    return price

# Utiliza el método apply para aplicar la función a cada fila del DataFrame
df_filtrado['price'] = df_filtrado.apply(set_price_to_zero, axis=1)


In [180]:
df_filtrado

Unnamed: 0,genres,release_date,tags,specs,price,sentiment,metascore
0,"[Action, Casual, Indie, Simulation, Strategy]",2018-01-04,"[Strategy, Action, Indie, Casual, Simulation]",[Single-player],4.99,,
1,"[Free to Play, Indie, RPG, Strategy]",2018-01-04,"[Free to Play, Strategy, Indie, RPG, Card Game...","[Single-player, Multi-player, Online Multi-Pla...",0,Mostly Positive,
2,"[Casual, Free to Play, Indie, Simulation, Sports]",2017-07-24,"[Free to Play, Simulation, Sports, Casual, Ind...","[Single-player, Multi-player, Online Multi-Pla...",0,Mostly Positive,
3,"[Action, Adventure, Casual]",2017-12-07,"[Action, Adventure, Casual]",[Single-player],0.99,,
5,"[Action, Adventure, Simulation]",2018-01-04,"[Action, Adventure, Simulation, FPS, Shooter, ...","[Single-player, Steam Achievements]",3.99,Mixed,
...,...,...,...,...,...,...,...
32129,"[Action, Adventure, Casual, Indie]",2018-01-04,"[Action, Indie, Casual, Violent, Adventure]","[Single-player, Steam Achievements, Steam Cloud]",1.99,2 user reviews,
32130,"[Casual, Indie, Simulation, Strategy]",2018-01-04,"[Strategy, Indie, Casual, Simulation]","[Single-player, Steam Achievements]",1.99,,
32131,"[Casual, Indie, Strategy]",2018-01-04,"[Strategy, Indie, Casual]","[Single-player, Steam Achievements, Steam Clou...",4.99,,
32132,"[Indie, Racing, Simulation]",2018-01-04,"[Indie, Simulation, Racing]","[Single-player, Steam Achievements, Steam Trad...",1.99,,


In [181]:
df_filtrado["price"].unique()

array([4.99, 0, 0.99, 3.99, nan, 10.99, 2.99, 1.59, 1.99, 9.99, 8.99,
       6.99, 7.99, 39.99, 'Free', 19.99, 7.49, 14.99, 12.99, 5.99, 2.49,
       15.99, 1.25, 29.99, 'Free to Play', 24.99, 17.99, 61.99, 3.49,
       18.99, 11.99, 13.99, 'Free Demo', 34.99, 1.49, 32.99, 99.99, 14.95,
       59.99, 69.99, 5.0, 49.99, 13.98, 29.96, 119.99, 79.99, 109.99,
       16.99, 771.71, 'Install Now', 21.99,
       'Play WARMACHINE: Tactics Demo', 0.98, 'Free To Play', 4.29, 64.99,
       54.99, 74.99, 'Install Theme', 0.89, 'Third-party', 0.5,
       'Play Now', 89.99, 299.99, 44.99, 3.0, 15.0, 5.49, 23.99, 49.0,
       20.99, 10.93, 1.39, 'Free HITMAN™ Holiday Pack', 36.99, 4.49, 2.0,
       4.0, 149.99, 234.99, 1.95, 1.5, 199.0, 189.0, 6.66, 27.99, 10.49,
       129.99, 179.0, 26.99, 399.99, 31.99, 399.0, 20.0, 40.0, 3.33,
       22.99, 320.0, 995.0, 27.49, 6.0, 1.29, 499.99, 199.99, 16.06, 4.68,
       202.76, 1.0, 2.3, 0.95, 172.24, 2.97, 10.96, 19.95, 10.0, 30.0,
       2.66, 6.48, 19.29, 

In [182]:

df_filtrado['price'] = pd.to_numeric(df_filtrado['price'], errors='coerce')

# Elimina las filas con valores NaN en la columna "price"
df_filtrado = df_filtrado.dropna(subset=['price'])



In [183]:
df_filtrado.info()

<class 'pandas.core.frame.DataFrame'>
Index: 28187 entries, 0 to 32133
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   genres        27022 non-null  object 
 1   release_date  28187 non-null  object 
 2   tags          28062 non-null  object 
 3   specs         27574 non-null  object 
 4   price         28187 non-null  float64
 5   sentiment     21903 non-null  object 
 6   metascore     2580 non-null   object 
dtypes: float64(1), object(6)
memory usage: 1.7+ MB


In [184]:
df['sentiment'].unique()

array([nan, 'Mostly Positive', 'Mixed', '1 user reviews',
       '3 user reviews', '8 user reviews', 'Very Positive',
       'Overwhelmingly Positive', '6 user reviews', '5 user reviews',
       '2 user reviews', 'Very Negative', 'Positive', 'Mostly Negative',
       '9 user reviews', 'Negative', '4 user reviews', '7 user reviews',
       'Overwhelmingly Negative'], dtype=object)

In [185]:


# Filtrar las filas que contienen la cadena "user reviews" en la columna "ratings"
df_filtrado = df_filtrado[~df_filtrado['sentiment'].str.contains('user reviews', na=True, regex=False)]

# Ahora, las filas que contenían la cantidad de reseñas han sido eliminadas del DataFrame.



In [186]:
df_filtrado

Unnamed: 0,genres,release_date,tags,specs,price,sentiment,metascore
1,"[Free to Play, Indie, RPG, Strategy]",2018-01-04,"[Free to Play, Strategy, Indie, RPG, Card Game...","[Single-player, Multi-player, Online Multi-Pla...",0.00,Mostly Positive,
2,"[Casual, Free to Play, Indie, Simulation, Sports]",2017-07-24,"[Free to Play, Simulation, Sports, Casual, Ind...","[Single-player, Multi-player, Online Multi-Pla...",0.00,Mostly Positive,
5,"[Action, Adventure, Simulation]",2018-01-04,"[Action, Adventure, Simulation, FPS, Shooter, ...","[Single-player, Steam Achievements]",3.99,Mixed,
27,"[Action, Indie, Racing]",1997-06-30,"[Racing, Action, Classic, Indie, Gore, 1990's,...","[Single-player, Multi-player, Steam Trading Ca...",9.99,Very Positive,
28,[Action],1998-11-08,"[FPS, Classic, Action, Sci-fi, Singleplayer, S...","[Single-player, Multi-player, Valve Anti-Cheat...",9.99,Overwhelmingly Positive,96
...,...,...,...,...,...,...,...
32113,"[Action, Casual, Indie]",2003-11-01,"[Action, Casual, Indie, Shooter]","[Single-player, Multi-player, Steam Trading Ca...",6.99,Mixed,
32114,[Action],2003-05-01,"[FPS, World War II, Multiplayer, Action, Shoot...","[Multi-player, Valve Anti-Cheat enabled]",4.99,Very Positive,79
32115,"[Strategy, RPG, Indie]",2003-07-01,"[RPG, Indie, Strategy, Turn-Based Combat, Isom...",[Single-player],19.99,Positive,
32116,[Action],2004-03-16,"[FPS, Arena Shooter, Action, Classic, Multipla...","[Single-player, Multi-player, Steam Trading Ca...",14.99,Overwhelmingly Positive,93


In [187]:
df_filtrado.info()

<class 'pandas.core.frame.DataFrame'>
Index: 13551 entries, 1 to 32117
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   genres        13396 non-null  object 
 1   release_date  13551 non-null  object 
 2   tags          13534 non-null  object 
 3   specs         13364 non-null  object 
 4   price         13551 non-null  float64
 5   sentiment     13551 non-null  object 
 6   metascore     2482 non-null   object 
dtypes: float64(1), object(6)
memory usage: 846.9+ KB


In [188]:

# Convertir la columna "release_date" a un formato de fecha
df_filtrado['release_date'] = pd.to_datetime(df_filtrado['release_date'])

# Extraer el año, mes y día como características categóricas adicionales
df_filtrado['year'] = df_filtrado['release_date'].dt.year


# Eliminar la columna original "release_date" si ya no es necesaria
df_filtrado.drop('release_date', axis=1, inplace=True)


In [189]:
df_filtrado

Unnamed: 0,genres,tags,specs,price,sentiment,metascore,year
1,"[Free to Play, Indie, RPG, Strategy]","[Free to Play, Strategy, Indie, RPG, Card Game...","[Single-player, Multi-player, Online Multi-Pla...",0.00,Mostly Positive,,2018
2,"[Casual, Free to Play, Indie, Simulation, Sports]","[Free to Play, Simulation, Sports, Casual, Ind...","[Single-player, Multi-player, Online Multi-Pla...",0.00,Mostly Positive,,2017
5,"[Action, Adventure, Simulation]","[Action, Adventure, Simulation, FPS, Shooter, ...","[Single-player, Steam Achievements]",3.99,Mixed,,2018
27,"[Action, Indie, Racing]","[Racing, Action, Classic, Indie, Gore, 1990's,...","[Single-player, Multi-player, Steam Trading Ca...",9.99,Very Positive,,1997
28,[Action],"[FPS, Classic, Action, Sci-fi, Singleplayer, S...","[Single-player, Multi-player, Valve Anti-Cheat...",9.99,Overwhelmingly Positive,96,1998
...,...,...,...,...,...,...,...
32113,"[Action, Casual, Indie]","[Action, Casual, Indie, Shooter]","[Single-player, Multi-player, Steam Trading Ca...",6.99,Mixed,,2003
32114,[Action],"[FPS, World War II, Multiplayer, Action, Shoot...","[Multi-player, Valve Anti-Cheat enabled]",4.99,Very Positive,79,2003
32115,"[Strategy, RPG, Indie]","[RPG, Indie, Strategy, Turn-Based Combat, Isom...",[Single-player],19.99,Positive,,2003
32116,[Action],"[FPS, Arena Shooter, Action, Classic, Multipla...","[Single-player, Multi-player, Steam Trading Ca...",14.99,Overwhelmingly Positive,93,2004


In [190]:
df_filtrado['metascore'] = df_filtrado['metascore'].replace('NA', None)
df_filtrado.dropna(subset=['metascore'], inplace=True)


In [191]:
df_filtrado

Unnamed: 0,genres,tags,specs,price,sentiment,metascore,year
28,[Action],"[FPS, Classic, Action, Sci-fi, Singleplayer, S...","[Single-player, Multi-player, Valve Anti-Cheat...",9.99,Overwhelmingly Positive,96,1998
39,[Strategy],"[Turn-Based Strategy, Strategy, Classic, Atmos...","[Single-player, Multi-player, Co-op]",6.99,Mostly Positive,84,2006
40,[Strategy],"[Strategy, Turn-Based Strategy, Fantasy, Turn-...","[Single-player, Multi-player, Co-op]",6.99,Very Positive,80,2006
41,"[Action, Indie, RPG]","[Multiplayer, Indie, Action, First-Person, Ste...","[Single-player, Multi-player, Steam Trading Ca...",9.99,Very Positive,76,2006
55,[Action],"[Action, Sci-fi, Story Rich, Singleplayer, Thi...","[Single-player, Steam Trading Cards]",9.99,Mostly Positive,70,2005
...,...,...,...,...,...,...,...
32109,[Action],"[FPS, Action, Sci-fi, Singleplayer, Shooter, A...",[Single-player],4.99,Very Positive,71,2001
32112,"[Action, Adventure]","[Action, Adventure, Violent, Story Rich, Class...",[Single-player],14.99,Overwhelmingly Positive,88,2002
32114,[Action],"[FPS, World War II, Multiplayer, Action, Shoot...","[Multi-player, Valve Anti-Cheat enabled]",4.99,Very Positive,79,2003
32116,[Action],"[FPS, Arena Shooter, Action, Classic, Multipla...","[Single-player, Multi-player, Steam Trading Ca...",14.99,Overwhelmingly Positive,93,2004


In [192]:
rating_mapping = {
    'Overwhelmingly Negative': 0,
    'Very Negative': 1,
    'Negative': 2,
    'Mostly Negative': 3,
    'Mixed': 4,
    'Mostly Positive': 5,
    'Positive': 6,
    'Very Positive': 7,
    'Overwhelmingly Positive': 8
}
df_filtrado['sentiment'] = df_filtrado['sentiment'].replace(rating_mapping)
df_filtrado['sentiment'] = pd.to_numeric(df_filtrado['sentiment'])
df_filtrado['metascore'] = df_filtrado['metascore'].astype(int)

In [193]:
df_filtrado.head(10)

Unnamed: 0,genres,tags,specs,price,sentiment,metascore,year
28,[Action],"[FPS, Classic, Action, Sci-fi, Singleplayer, S...","[Single-player, Multi-player, Valve Anti-Cheat...",9.99,8,96,1998
39,[Strategy],"[Turn-Based Strategy, Strategy, Classic, Atmos...","[Single-player, Multi-player, Co-op]",6.99,5,84,2006
40,[Strategy],"[Strategy, Turn-Based Strategy, Fantasy, Turn-...","[Single-player, Multi-player, Co-op]",6.99,7,80,2006
41,"[Action, Indie, RPG]","[Multiplayer, Indie, Action, First-Person, Ste...","[Single-player, Multi-player, Steam Trading Ca...",9.99,7,76,2006
55,[Action],"[Action, Sci-fi, Story Rich, Singleplayer, Thi...","[Single-player, Steam Trading Cards]",9.99,5,70,2005
56,"[Indie, Strategy]","[Strategy, Indie, Multiplayer, Wargame, Cold W...","[Single-player, Multi-player]",9.99,7,84,2006
61,"[Casual, Indie]","[Casual, Indie, Singleplayer]",[Single-player],4.99,4,69,2006
62,[Strategy],"[Strategy, Comedy, Puzzle, Simulation, Classic...",[Single-player],4.99,7,81,2003
64,"[Adventure, RPG]","[Adventure, Female Protagonist, Story Rich, Fa...",[Single-player],19.99,7,75,2006
65,[Racing],"[Racing, Destruction, Multiplayer, Great Sound...",[Single-player],9.99,7,76,2006


In [194]:
df_filtrado.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2425 entries, 28 to 32117
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   genres     2419 non-null   object 
 1   tags       2424 non-null   object 
 2   specs      2422 non-null   object 
 3   price      2425 non-null   float64
 4   sentiment  2425 non-null   int64  
 5   metascore  2425 non-null   int32  
 6   year       2425 non-null   int32  
dtypes: float64(1), int32(2), int64(1), object(3)
memory usage: 132.6+ KB


In [195]:
df_filtrado.dropna(subset=['genres'], inplace=True)
# Obtener la lista completa de géneros presentes en el DataFrame
all_genres = set()
for genres_list in df_filtrado['genres']:
    if isinstance(genres_list, list):  # Verificar si es una lista antes de iterar
        all_genres.update(genres_list)

# Crear columnas binarias para cada género
for genre in all_genres:
    df_filtrado[genre] = df_filtrado['genres'].apply(lambda genres_list: 1 if isinstance(genres_list, list) and genre in genres_list else 0)

# Eliminar la columna original de géneros
df_filtrado.drop(columns=['genres'], inplace=True)

# Ahora el DataFrame df_filtrado tiene las columnas binarias para cada género, listas para usar en la regresión



In [196]:
df_filtrado.drop(columns=['tags', 'specs','sentiment'], inplace=True)

In [197]:
df_filtrado.corr()

Unnamed: 0,price,metascore,year,Simulation,Indie,Adventure,Early Access,Massively Multiplayer,Free to Play,Action,RPG,Sports,Casual,Racing,Strategy
price,1.0,0.152787,0.336765,0.091034,-0.163809,-0.011906,-0.013221,-0.123763,-0.242812,0.01401,0.040737,0.166124,-0.140219,0.068541,0.037309
metascore,0.152787,1.0,-0.113739,-0.056534,-0.081107,-0.044948,0.019156,0.024484,0.012839,-0.023768,0.047507,0.014336,-0.022153,-0.011068,0.01526
year,0.336765,-0.113739,1.0,0.049112,0.42966,0.203963,-0.026657,0.014145,0.034424,0.011399,0.089064,0.10094,0.038457,0.008283,-0.073477
Simulation,0.091034,-0.056534,0.049112,1.0,-0.057825,-0.18695,0.031451,0.008717,-0.006753,-0.198726,-0.056484,0.15951,0.018452,0.064372,0.237757
Indie,-0.163809,-0.081107,0.42966,-0.057825,1.0,0.230704,0.030243,-0.065759,-0.066607,0.002008,0.015059,-0.045676,0.166834,-0.084267,-0.094479
Adventure,-0.011906,-0.044948,0.203963,-0.18695,0.230704,1.0,0.008359,-0.041492,-0.059969,-0.032806,-0.029108,-0.129605,0.063699,-0.106949,-0.321168
Early Access,-0.013221,0.019156,-0.026657,0.031451,0.030243,0.008359,1.0,0.101301,0.082124,0.02977,-0.01407,-0.004966,-0.009234,-0.005523,0.015249
Massively Multiplayer,-0.123763,0.024484,0.014145,0.008717,-0.065759,-0.041492,0.101301,1.0,0.617229,0.016894,0.161954,-0.024035,-0.044696,-0.010386,-0.001558
Free to Play,-0.242812,0.012839,0.034424,-0.006753,-0.066607,-0.059969,0.082124,0.617229,1.0,0.040874,0.100576,-0.014439,-0.037409,-0.032653,0.02222
Action,0.01401,-0.023768,0.011399,-0.198726,0.002008,-0.032806,0.02977,0.016894,0.040874,1.0,-0.042943,-0.0681,-0.116895,-0.060423,-0.322975


In [201]:
def predict_price(year, genres, metascore):


    # Supongamos que tienes un DataFrame llamado df_filtrado con las columnas price, early_access, sentiment, metascore, year, month y day

    # Seleccionar las variables independientes (predictores) y la variable dependiente (precio)
    y = df_filtrado['price']
    X = df_filtrado.drop(columns=['price'])


    # Dividir el conjunto de datos en datos de entrenamiento y datos de prueba
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Crear características polinómicas de grado 2
    poly = PolynomialFeatures(degree=2)
    X_train_poly = poly.fit_transform(X_train)
    X_test_poly = poly.transform(X_test)

    # Crear y entrenar el modelo de regresión lineal con características polinómicas
    poly_regression_model = LinearRegression()
    poly_regression_model.fit(X_train_poly, y_train)

    # Realizar predicciones en el conjunto de prueba
    y_pred_poly = poly_regression_model.predict(X_test_poly)

    mse_poly = mean_squared_error(y_test, y_pred_poly)
    rmse_poly = (mse_poly ** 0.5)


    # Creamos el DataFrame X_new con las características para las que deseamos hacer una predicción
    # Aquí tenemos un solo conjunto de características para un nuevo producto:
    new_data = {
        'metascore': [metascore],
        'year': [year],
        'Indie': [0],
        'Early Access': [0],
        'Massively Multiplayer': [0],
        'Strategy': [0],
        'RPG': [0],
        'Action': [0],
        'Casual': [0],
        'Free to Play': [0],
        'Racing': [0],
        'Adventure': [0],
        'Simulation': [0],
        'Sports': [0]
    }

    X_new = pd.DataFrame(new_data)

    # Establecemos a 1 las columnas correspondientes a los géneros del nuevo producto en el DataFrame X_new
    for genre in genres:
        X_new[genre] = 1

    # Verificamos si "Free to Play" está presente en la lista de géneros
    if 'Free to Play' in genres:
        return 0

    # Asegurarse de que las columnas en X_new tengan el mismo orden que en X_train
    X_new = X_new[X_train.columns]

    # Generamos las características polinómicas para X_new usando el mismo objeto 'poly'
    X_new_poly = poly.transform(X_new)

    # Realizamos la predicción de precios para X_new
    y_pred_new = poly_regression_model.predict(X_new_poly)[0]

    return y_pred_new,rmse_poly



In [203]:
predict_price(2020,"Free to Play",90)

0

In [200]:
df_filtrado.to_csv('steam_games_model.csv', index=False)