In [37]:
import pandas as pd 
import json

# Ruta de tu nuevo archivo JSON
json_file_path = 'output_steam_games.json'

# Lista para almacenar los diccionarios convertidos
data_list = []

# Leer el archivo JSON línea por línea y convertir cada línea en un diccionario
with open(json_file_path, 'r', encoding='utf-8') as file:
    for line in file:
        # Convertir la línea en un diccionario usando json.loads
        data_dict = json.loads(line)
        # Añadir el diccionario a la lista
        data_list.append(data_dict)

# Convertir la lista de diccionarios en un dataframe
df = pd.DataFrame(data_list)

# Mostrar el dataframe resultante
print(df)

              publisher                                 genres  \
0                   NaN                                    NaN   
1                   NaN                                    NaN   
2                   NaN                                    NaN   
3                   NaN                                    NaN   
4                   NaN                                    NaN   
...                 ...                                    ...   
120440  Ghost_RUS Games  [Casual, Indie, Simulation, Strategy]   
120441           Sacada              [Casual, Indie, Strategy]   
120442     Laush Studio            [Indie, Racing, Simulation]   
120443         SIXNAILS                        [Casual, Indie]   
120444              NaN                                    NaN   

                        app_name                     title  \
0                            NaN                       NaN   
1                            NaN                       NaN   
2                    

In [38]:
# Reemplazar los valores NaN por None
df = df.where(pd.notna(df), None)

print(df)

              publisher                                 genres  \
0                  None                                   None   
1                  None                                   None   
2                  None                                   None   
3                  None                                   None   
4                  None                                   None   
...                 ...                                    ...   
120440  Ghost_RUS Games  [Casual, Indie, Simulation, Strategy]   
120441           Sacada              [Casual, Indie, Strategy]   
120442     Laush Studio            [Indie, Racing, Simulation]   
120443         SIXNAILS                        [Casual, Indie]   
120444             None                                   None   

                        app_name                     title  \
0                           None                      None   
1                           None                      None   
2                    

In [39]:
df.columns

Index(['publisher', 'genres', 'app_name', 'title', 'url', 'release_date',
       'tags', 'reviews_url', 'specs', 'price', 'early_access', 'id',
       'developer'],
      dtype='object')

In [40]:
df_user_gamer = df[['id', 'price']]

# Eliminar filas con muchos datos faltantes
df_user_gamer = df_user_gamer.dropna(thresh=1)

df_user_gamer

Unnamed: 0,id,price
88310,761140,4.99
88311,643980,Free To Play
88312,670290,Free to Play
88313,767400,0.99
88314,773570,2.99
...,...,...
120440,773640,1.99
120441,733530,4.99
120442,610660,1.99
120443,658870,4.99


In [48]:
df_user_gamer = df_user_gamer.rename(columns={'id': 'item_id'})

In [49]:
df_user_gamer.to_csv('id-gamer-price.csv', index=False)

In [42]:
df_desarrolladores = df[['publisher', 'release_date', 'price', 'early_access']]

# Eliminar filas con muchos datos faltantes
df_desarrolladores_limpio = df_desarrolladores.dropna(thresh=3)

df_desarrolladores_limpio

Unnamed: 0,publisher,release_date,price,early_access
88310,Kotoshiro,2018-01-04,4.99,False
88311,"Making Fun, Inc.",2018-01-04,Free To Play,False
88312,Poolians.com,2017-07-24,Free to Play,False
88313,彼岸领域,2017-12-07,0.99,False
88315,Trickjump Games Ltd,2018-01-04,3.99,False
...,...,...,...,...
120439,Bidoniera Games,2018-01-04,1.99,False
120440,Ghost_RUS Games,2018-01-04,1.99,False
120441,Sacada,2018-01-04,4.99,False
120442,Laush Studio,2018-01-04,1.99,False


In [43]:
# Convertir la columna 'release_date' a formato de fecha con manejo de errores
# Utilizamos .loc[] para evitar SettingWithCopyWarning al convertir 'release_date' a formato de fecha

df_desarrolladores_limpio.loc[:, 'release_date'] = pd.to_datetime(df_desarrolladores_limpio['release_date'], errors='coerce')

df_desarrolladores_limpio

Unnamed: 0,publisher,release_date,price,early_access
88310,Kotoshiro,2018-01-04 00:00:00,4.99,False
88311,"Making Fun, Inc.",2018-01-04 00:00:00,Free To Play,False
88312,Poolians.com,2017-07-24 00:00:00,Free to Play,False
88313,彼岸领域,2017-12-07 00:00:00,0.99,False
88315,Trickjump Games Ltd,2018-01-04 00:00:00,3.99,False
...,...,...,...,...
120439,Bidoniera Games,2018-01-04 00:00:00,1.99,False
120440,Ghost_RUS Games,2018-01-04 00:00:00,1.99,False
120441,Sacada,2018-01-04 00:00:00,4.99,False
120442,Laush Studio,2018-01-04 00:00:00,1.99,False


In [44]:
df_desarrolladores_limpio.to_csv('desarrolladores.csv', index=False)

In [45]:
df_desarrolladores_limpio.columns

Index(['publisher', 'release_date', 'price', 'early_access'], dtype='object')

In [46]:
# guardar el DataFrame modificado en un nuevo archivo CSV
df.to_csv('output_steam_games.csv', index=False)

# guardar el DataFrame modificado en un nuevo archivo Parquet
#df.to_parquet('output_steam_games.parquet', index=False)