<h2>Descomprimiendo archivos</h2>

In [1]:
import gzip
import shutil
import os

archivo_comprimido = 'PI MLOps - STEAM\\users_items.json.gz'

ruta_descomprimido = os.path.splitext(archivo_comprimido)[0]

with gzip.open(archivo_comprimido, 'rb') as f_in:
    with open(ruta_descomprimido, 'wb') as f_out:
        shutil.copyfileobj(f_in, f_out)

print(f'Archivo descomprimido guardado en: {ruta_descomprimido}')

Archivo descomprimido guardado en: PI MLOps - STEAM\users_items.json


<h2>Función para leer archivos JSON</h2>

In [2]:
import pandas as pd
import os
import ast

def leerJSON(archivo_json):

    datos = []

    i = 0
    try: 
        
        with open(archivo_json, 'r',encoding='utf-8') as file:
            try:
                for line in file:
                    i = i+1
                    try:
                        dato = ast.literal_eval(line.strip()) 
                        if dato:
                            datos.append(dato)
                    except:
                        print(f"Error al cargar JSON en la línea:")

                    if(i==1000):
                        break
            except UnicodeDecodeError as e :
                print(e)
            
    except FileNotFoundError:
        print(f"El archivo {archivo_json} no fue encontrado.")

    df = pd.DataFrame(datos)

    return df

<h1>Leyendo archivos JSON users_reviews</h1>

In [3]:
archivo_json = "PI MLOps - STEAM/user_reviews.json"

df_user_reviews = leerJSON(archivo_json)

df_user_reviews.head(5)

Unnamed: 0,user_id,user_url,reviews
0,76561197970982479,http://steamcommunity.com/profiles/76561197970...,"[{'funny': '', 'posted': 'Posted November 5, 2..."
1,js41637,http://steamcommunity.com/id/js41637,"[{'funny': '', 'posted': 'Posted June 24, 2014..."
2,evcentric,http://steamcommunity.com/id/evcentric,"[{'funny': '', 'posted': 'Posted February 3.',..."
3,doctr,http://steamcommunity.com/id/doctr,"[{'funny': '', 'posted': 'Posted October 14, 2..."
4,maplemage,http://steamcommunity.com/id/maplemage,"[{'funny': '3 people found this review funny',..."


In [4]:
df_user_reviews.isna().sum()

user_id     0
user_url    0
reviews     0
dtype: int64

In [5]:
df_user_reviews.isna().sum(axis=1)

0      0
1      0
2      0
3      0
4      0
      ..
995    0
996    0
997    0
998    0
999    0
Length: 1000, dtype: int64

<h3>Explode User Reviews</h3>

In [6]:
# Usar la función explode para descomponer la lista de revisiones en filas separadas
df_user_reviews_explode = df_user_reviews.explode('reviews')

# Resetear el índice del DataFrame
df_user_reviews_explode.reset_index(drop=True, inplace=True)

# Utilizar pd.json_normalize para crear columnas para cada clave en los diccionarios
df_user_reviews_explode = pd.concat([df_user_reviews_explode, pd.json_normalize(df_user_reviews_explode['reviews'])], axis=1)

# Eliminar la columna 'reviews' original
df_user_reviews_explode.drop(columns=['reviews'], inplace=True)

# Imprimir el DataFrame resultante
df_user_reviews_explode.head(2)

Unnamed: 0,user_id,user_url,funny,posted,last_edited,item_id,helpful,recommend,review
0,76561197970982479,http://steamcommunity.com/profiles/76561197970...,,"Posted November 5, 2011.",,1250,No ratings yet,True,Simple yet with great replayability. In my opi...
1,76561197970982479,http://steamcommunity.com/profiles/76561197970...,,"Posted July 15, 2011.",,22200,No ratings yet,True,It's unique and worth a playthrough.


<h3>Seleccionar fecha de "posted" y crear columna fecha</h3>

In [7]:
df_user_reviews_explode['posted'] = df_user_reviews_explode['posted'].astype(str)
df_user_reviews_explode['fecha'] = df_user_reviews_explode['posted'].apply(lambda x: x[7:])

In [8]:
df_user_reviews_explode.head(2)

Unnamed: 0,user_id,user_url,funny,posted,last_edited,item_id,helpful,recommend,review,fecha
0,76561197970982479,http://steamcommunity.com/profiles/76561197970...,,"Posted November 5, 2011.",,1250,No ratings yet,True,Simple yet with great replayability. In my opi...,"November 5, 2011."
1,76561197970982479,http://steamcommunity.com/profiles/76561197970...,,"Posted July 15, 2011.",,22200,No ratings yet,True,It's unique and worth a playthrough.,"July 15, 2011."


<h3>Cambiar el formato de "Fecha" y eliminar columna "posted"</h3>

In [9]:
df_user_reviews_explode['fecha'] = df_user_reviews_explode['fecha'].str.rstrip('.')
df_user_reviews_explode['fecha'] = pd.to_datetime(df_user_reviews_explode['fecha'], errors='coerce', format='%B %d, %Y')
df_user_reviews_explode = df_user_reviews_explode.dropna(subset=['fecha'])
df_user_reviews_final = df_user_reviews_explode.drop(columns=['posted', 'funny'])
df_user_reviews_final.head(2)



Unnamed: 0,user_id,user_url,last_edited,item_id,helpful,recommend,review,fecha
0,76561197970982479,http://steamcommunity.com/profiles/76561197970...,,1250,No ratings yet,True,Simple yet with great replayability. In my opi...,2011-11-05
1,76561197970982479,http://steamcommunity.com/profiles/76561197970...,,22200,No ratings yet,True,It's unique and worth a playthrough.,2011-07-15


In [10]:
df_user_reviews_explode.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1928 entries, 0 to 2453
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   user_id      1928 non-null   object        
 1   user_url     1928 non-null   object        
 2   funny        1928 non-null   object        
 3   posted       1928 non-null   object        
 4   last_edited  1928 non-null   object        
 5   item_id      1928 non-null   object        
 6   helpful      1928 non-null   object        
 7   recommend    1928 non-null   object        
 8   review       1928 non-null   object        
 9   fecha        1928 non-null   datetime64[ns]
dtypes: datetime64[ns](1), object(9)
memory usage: 165.7+ KB


<h3>Guardando dataframe en archivo csv</h3>

In [11]:
df_user_reviews_final.to_csv("df_user_reviews_final.csv")

<h1>Leyendo archivo JSON users_items</h1>

In [12]:
archivo_json = "PI MLOps - STEAM/users_items.json"

df_user_items = leerJSON(archivo_json)

df_user_items.head(2)

Unnamed: 0,user_id,items_count,steam_id,user_url,items
0,76561197970982479,277,76561197970982479,http://steamcommunity.com/profiles/76561197970...,"[{'item_id': '10', 'item_name': 'Counter-Strik..."
1,js41637,888,76561198035864385,http://steamcommunity.com/id/js41637,"[{'item_id': '10', 'item_name': 'Counter-Strik..."


In [13]:
df_user_items.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   user_id      1000 non-null   object
 1   items_count  1000 non-null   int64 
 2   steam_id     1000 non-null   object
 3   user_url     1000 non-null   object
 4   items        1000 non-null   object
dtypes: int64(1), object(4)
memory usage: 39.2+ KB


<h3>Explode User items</h3>

In [14]:
#Descompone la lista en filas separadas
df_user_items_explode = df_user_items.explode('items')

#Resetear índece del dataframe
df_user_items_explode.reset_index(drop=True, inplace=True)

#Creamos las nuevas columnas para cada clave
df_user_items_explode = pd.concat([df_user_items_explode, pd.json_normalize(df_user_items_explode['items'])], axis=1)

#Eliminamos 'reviews' original
df_user_items_explode.drop(columns=['items'], inplace=True)

df_user_items_explode.head(2)


Unnamed: 0,user_id,items_count,steam_id,user_url,item_id,item_name,playtime_forever,playtime_2weeks
0,76561197970982479,277,76561197970982479,http://steamcommunity.com/profiles/76561197970...,10,Counter-Strike,6.0,0.0
1,76561197970982479,277,76561197970982479,http://steamcommunity.com/profiles/76561197970...,20,Team Fortress Classic,0.0,0.0


In [15]:
df_user_items_explode.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 89415 entries, 0 to 89414
Data columns (total 8 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   user_id           89415 non-null  object 
 1   items_count       89415 non-null  int64  
 2   steam_id          89415 non-null  object 
 3   user_url          89415 non-null  object 
 4   item_id           89280 non-null  object 
 5   item_name         89280 non-null  object 
 6   playtime_forever  89280 non-null  float64
 7   playtime_2weeks   89280 non-null  float64
dtypes: float64(2), int64(1), object(5)
memory usage: 5.5+ MB


<h3>Eliminamos columnas con valores nulos</h3>

In [16]:
df_user_items_final = df_user_items_explode.dropna(subset=['item_id','playtime_forever'])

In [17]:
df_user_items_final.to_csv('df_user_items_final.csv')

<h1>Leyendo archivo JSON Steam Games</h1>

In [18]:
import pandas as pd
import json

data = []
with open('PI MLOps - STEAM/steam_games.json', 'r') as file:
    for line in file:
        data.append(json.loads(line))

df_steam_games = pd.DataFrame(data)


In [19]:
df_steam_games.head(3)

Unnamed: 0,publisher,genres,app_name,title,url,release_date,tags,reviews_url,discount_price,specs,price,early_access,id,metascore,developer,user_id,steam_id,items,items_count
0,,,,,,,,,,,,,,,,76561197970982479,76561197970982479,"[{'item_id': '10', 'item_name': 'Counter-Strik...",277.0
1,,,,,,,,,,,,,,,,js41637,76561198035864385,"[{'item_id': '10', 'item_name': 'Counter-Strik...",888.0
2,,,,,,,,,,,,,,,,evcentric,76561198007712555,"[{'item_id': '1200', 'item_name': 'Red Orchest...",137.0


In [20]:
df_steam_games.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 120445 entries, 0 to 120444
Data columns (total 19 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   publisher       24083 non-null  object 
 1   genres          28852 non-null  object 
 2   app_name        32133 non-null  object 
 3   title           30085 non-null  object 
 4   url             32135 non-null  object 
 5   release_date    30068 non-null  object 
 6   tags            31972 non-null  object 
 7   reviews_url     32133 non-null  object 
 8   discount_price  225 non-null    float64
 9   specs           31465 non-null  object 
 10  price           30758 non-null  object 
 11  early_access    32135 non-null  object 
 12  id              32133 non-null  object 
 13  metascore       2677 non-null   object 
 14  developer       28836 non-null  object 
 15  user_id         88310 non-null  object 
 16  steam_id        88310 non-null  object 
 17  items           88310 non-nul

<h4>Eliminando columnas</h4>

In [21]:
drop_columnas = ['user_id', 'steam_id', 'items', 'items_count']
df_steam_games.drop(drop_columnas, axis=1, inplace=True)
df_steam_games.head(2)

Unnamed: 0,publisher,genres,app_name,title,url,release_date,tags,reviews_url,discount_price,specs,price,early_access,id,metascore,developer
0,,,,,,,,,,,,,,,
1,,,,,,,,,,,,,,,


<h4>Verificar cuántas filas tienen toda las columnas nulas</h4>

In [22]:
nulos_por_fila = df_steam_games.isna().sum(axis=1)

# Contar las filas que tienen exactamente 10 valores nulos
cantidad_filas = (nulos_por_fila > 14).sum()

cantidad_filas

88310

<h4>Eliminando filas con todos las columnas con valores nulos</h4>

In [23]:
df_steam_games.dropna(how='all', inplace=True)

In [24]:
df_steam_games.info()

<class 'pandas.core.frame.DataFrame'>
Index: 32135 entries, 88310 to 120444
Data columns (total 15 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   publisher       24083 non-null  object 
 1   genres          28852 non-null  object 
 2   app_name        32133 non-null  object 
 3   title           30085 non-null  object 
 4   url             32135 non-null  object 
 5   release_date    30068 non-null  object 
 6   tags            31972 non-null  object 
 7   reviews_url     32133 non-null  object 
 8   discount_price  225 non-null    float64
 9   specs           31465 non-null  object 
 10  price           30758 non-null  object 
 11  early_access    32135 non-null  object 
 12  id              32133 non-null  object 
 13  metascore       2677 non-null   object 
 14  developer       28836 non-null  object 
dtypes: float64(1), object(14)
memory usage: 3.9+ MB


<h4>Verificando duplicados</h4>

In [25]:
df_steam_games['title'].unique().size

30055

<h4>Items con "title" duplicados</h4>

In [26]:

conteo_title = df_steam_games['title'].value_counts()


ids_repitentes = conteo_title[conteo_title > 1].index.tolist()


for title in ids_repitentes:
    print(f"'title': {title}, Cantidad de repeticiones: {conteo_title[title]}")


'title': Soundtrack, Cantidad de repeticiones: 3
'title': Black Rose, Cantidad de repeticiones: 2
'title': MORE SWEATER? OK!, Cantidad de repeticiones: 2
'title': Total Extreme Wrestling, Cantidad de repeticiones: 2
'title': The Dream Machine: Chapter 4, Cantidad de repeticiones: 2
'title': New York Bus Simulator, Cantidad de repeticiones: 2
'title': Prism, Cantidad de repeticiones: 2
'title': Invasion, Cantidad de repeticiones: 2
'title': Batman: Arkham City - Game of the Year Edition, Cantidad de repeticiones: 2
'title': Warhammer Quest, Cantidad de repeticiones: 2
'title': Parasite, Cantidad de repeticiones: 2
'title': Castles, Cantidad de repeticiones: 2
'title': Goro, Cantidad de repeticiones: 2
'title': Borealis, Cantidad de repeticiones: 2
'title': Alter Ego, Cantidad de repeticiones: 2
'title': Altitude, Cantidad de repeticiones: 2
'title': Invisible Apartment Zero, Cantidad de repeticiones: 2
'title': Hero, Cantidad de repeticiones: 2
'title': Aliens: Colonial Marines - Reconn

<h4>Verificando filas con "id" duplicados</h4>

In [27]:
df_steam_games['id'].unique().size

32133

In [28]:
conteo_ids = df_steam_games['id'].value_counts()

ids_repitentes = conteo_ids[conteo_ids > 1].index.tolist()

for id in ids_repitentes:
    print(f"'id': {id}, Cantidad de repeticiones: {conteo_ids[id]}")


'id': 612880, Cantidad de repeticiones: 2


In [29]:
df_steam_games[df_steam_games['id']=='612880']

Unnamed: 0,publisher,genres,app_name,title,url,release_date,tags,reviews_url,discount_price,specs,price,early_access,id,metascore,developer
102204,Bethesda Softworks,[Action],Wolfenstein II: The New Colossus,Wolfenstein II: The New Colossus,http://store.steampowered.com/app/612880/,2017-10-26,"[Action, FPS, Gore, Violent, Alternate History...",http://steamcommunity.com/app/612880/reviews/?...,,"[Single-player, Steam Achievements, Full contr...",59.99,False,612880,86,Machine Games
102883,Bethesda Softworks,[Action],Wolfenstein II: The New Colossus,Wolfenstein II: The New Colossus,http://store.steampowered.com/app/612880/Wolfe...,2017-10-26,"[Action, FPS, Gore, Violent, Alternate History...",http://steamcommunity.com/app/612880/reviews/?...,,"[Single-player, Steam Achievements, Full contr...",59.99,False,612880,86,Machine Games


<h4>Eliminando fila duplicada</h4>

In [30]:
indice_a_eliminar = df_steam_games[df_steam_games['id'] == '612880'].index[1]

# Eliminar la fila especificando su índice
df_steam_games.drop(indice_a_eliminar, inplace=True)

<h4>Agregar columna de precios</h4>

In [31]:
df_steam_games.dropna(subset=['price'], inplace=True)

unique_values = df_steam_games['price'].unique()

unique_strings = [value for value in unique_values if isinstance(value, str)]

print(unique_strings)


['Free To Play', 'Free to Play', 'Free', 'Free Demo', 'Play for Free!', 'Install Now', 'Play WARMACHINE: Tactics Demo', 'Free Mod', 'Install Theme', 'Third-party', 'Play Now', 'Free HITMAN™ Holiday Pack', 'Play the Demo', 'Starting at $499.00', 'Starting at $449.00', 'Free to Try', 'Free Movie', 'Free to Use']


In [32]:
def convertir_precio(valor):
    try:
        return float(valor)
    except ValueError:
        return 0.0 

df_steam_games['price2'] = df_steam_games['price'].apply(convertir_precio)



<h4>Eliminar nulos de columna "genres"</h4>

In [33]:
#Eliminar nan y cambiar de tipo a la columma
df_steam_games.dropna(subset=['genres'], inplace=True)
df_steam_games['genres'] = df_steam_games['genres'].astype('str')


In [34]:
import numpy as np

<h4>Eliminando nulos de columna "developer"</h4>

In [35]:
df_steam_games.dropna(subset=['developer'], inplace=True)

<h4>Obtener año de columna "release_date"</h4>

In [36]:
df_steam_games['releasedate2'] = df_steam_games['release_date'].apply(pd.to_datetime, errors='coerce')
df_steam_games.dropna(subset=['releasedate2'], inplace=True)
df_steam_games['Año'] = df_steam_games['releasedate2'].dt.year

<h4>Eliminando columnas que no utilizamos</h4>

In [37]:
df_steam_games.drop(columns=['tags', 'reviews_url', 'discount_price', 'early_access', 'metascore', 'price', 'release_date'], inplace=True)
df_steam_games.head(2)

Unnamed: 0,publisher,genres,app_name,title,url,specs,id,developer,price2,releasedate2,Año
88310,Kotoshiro,"['Action', 'Casual', 'Indie', 'Simulation', 'S...",Lost Summoner Kitty,Lost Summoner Kitty,http://store.steampowered.com/app/761140/Lost_...,[Single-player],761140,Kotoshiro,4.99,2018-01-04,2018
88311,"Making Fun, Inc.","['Free to Play', 'Indie', 'RPG', 'Strategy']",Ironbound,Ironbound,http://store.steampowered.com/app/643980/Ironb...,"[Single-player, Multi-player, Online Multi-Pla...",643980,Secret Level SRL,0.0,2018-01-04,2018


In [38]:
df_steam_games.to_csv('df_steam_games_final.csv')

<h3>def userdata(User_id)</h3>

In [39]:
def userdata(user_id):
    df_user_items_id = df_user_items_final[df_user_items_final['user_id']==user_id]['item_id']
    respuesta = ''
    
    items = df_user_items_id.values
    precios =  []
    for i in items:
        precio = df_steam_games[df_steam_games['id']== i]['price2']
        if not precio.empty:
            precios.append(precio.values[0])

    df_user_reviews_id = df_user_reviews_final[df_user_reviews_final['user_id']==user_id]['recommend']

    j = 0
    for i in df_user_reviews_id.values:
        if(i == True):
            j = j + 1

    recomendacion = j/len(df_user_reviews_id.values)*100

    suma = 0

    for precio in precios:
        suma += precio

    item_count = df_user_items_final[df_user_items_final['user_id']=='js41637']['items_count'].iloc[0]

    respuesta = {'recomendación:': recomendacion, 
                'Cantidad de dinero:' : suma, 
                'Cantidad de items': item_count}
    
    return respuesta
    

In [40]:
userdata('js41637')

{'recomendación:': 100.0,
 'Cantidad de dinero:': 8356.449999999882,
 'Cantidad de items': 888}

<h3>def countreviews(`YYYY-MM-DD`,`YYYY-MM-DD`)</h3>

In [41]:
def countreviews(fecha1, fecha2):
    df_user_fechas=df_user_reviews_final[['user_id']][df_user_reviews_final['fecha']
                                                  .between(fecha1, fecha2)].value_counts()
                                                  
    primeros_valores = df_user_fechas.index.get_level_values('user_id').tolist()

    recomendaciones = []
    for i in primeros_valores:
        recomendacion = (userdata(i)[list(userdata(i).keys())[0]])
        recomendaciones.append(recomendacion)
    
    respuesta = {'usuario': primeros_valores, 'recomendación':recomendaciones}

    df = pd.DataFrame(respuesta)

    respuesta = df.to_dict(orient='records')

    return respuesta

In [42]:
countreviews('2011-01-01','2012-01-01')

[{'usuario': 'GamerFag', 'recomendación': 100.0},
 {'usuario': '76561197970982479', 'recomendación': 100.0},
 {'usuario': '76561197974500703', 'recomendación': 90.0},
 {'usuario': 'FunkMeister1', 'recomendación': 88.88888888888889},
 {'usuario': 'chidvd', 'recomendación': 100.0},
 {'usuario': '2xDelorean', 'recomendación': 100.0},
 {'usuario': '76561197994969639', 'recomendación': 100.0},
 {'usuario': '76561198040530605', 'recomendación': 100.0}]

<h3>def genre(género:):</h3>
<h4>Devuelve el `puesto` en el que se encuentra un género sobre el ranking de los mismos analizado bajo la columna PlayTimeForever.</h4>

In [43]:
# Convertir la columna 'Generos' en una lista de listas
generos_lista = df_steam_games['genres'].tolist()
# Crear una lista plana de todos los géneros
generos = []
for lista in generos_lista:
    lista = eval(lista)
    for gen in lista:
        generos.append(gen)

generos_unicos = list(set(generos))

print(generos_unicos)


['Racing', 'Design &amp; Illustration', 'Animation &amp; Modeling', 'RPG', 'Strategy', 'Free to Play', 'Sports', 'Indie', 'Education', 'Action', 'Casual', 'Adventure', 'Audio Production', 'Massively Multiplayer', 'Utilities', 'Accounting', 'Video Production', 'Simulation', 'Web Publishing', 'Software Training', 'Photo Editing', 'Early Access']


In [44]:
playtimes = []
for genero in generos_unicos:
    df_steam_games_generos = df_steam_games[df_steam_games['genres']
                                                    .apply(lambda generos: genero in generos)]
    lista_items = df_steam_games_generos['id'].tolist()

    df_user_items_final_items = df_user_items_final[df_user_items_final['item_id'].isin(lista_items)]
    df_user_items_final_items

    playtime = df_user_items_final_items['playtime_forever'].sum()

    playtimes.append(playtime)

In [45]:
dicc_genres = {'genero': generos_unicos, 'Time': playtimes}

In [46]:
df_generos_times = pd.DataFrame(dicc_genres)

df_generos_times = df_generos_times.sort_values('Time', ascending=False)

df_generos_times = df_generos_times.reset_index(drop=True)

In [47]:
df_generos_times.head(2)

Unnamed: 0,genero,Time
0,Action,72525881.0
1,Indie,35979167.0


In [48]:
df_generos_times.index = df_generos_times.index + 1
df_generos_times.reset_index(0, inplace=True)
df_generos_times.rename(columns={'index': 'Rank'}, inplace=True)
df_generos_times.to_csv('rankgenres.csv', index=False)

In [49]:
def genre(genero):
    df = pd.read_csv('rankgenres.csv')
    df = df[df['genero'] == genero]
    if(len(df)==0):
        rank = 'Genero no existe'
    else:
        rank = df['Rank'].values[0]

    return rank

In [50]:
genre('Action')

1

<h3>def userforgenre(género):
</h3>
<h4>`Top 5` de usuarios con más horas de juego en el género dado, con su URL (del user) y user_id.</h4>

In [51]:
def userforgenre(genero):
    df_steam_games_generos = df_steam_games[df_steam_games['genres']
                                                      .apply(lambda generos: genero in generos)]

    juegos = df_steam_games_generos['id'].values
    df_user_items_final_genres = df_user_items_final[df_user_items_final['item_id'].isin(juegos)]

    df_top_users = df_user_items_final_genres[['user_id','user_url', 'playtime_forever']].groupby(['user_id', 'user_url']).sum('playtime_forever')

    df_top_users_reset = df_top_users.reset_index().head(5)

    respuesta = df_top_users_reset.to_dict(orient='records')
    
    return respuesta

In [52]:
userforgenre('Action')

[{'user_id': '-AnimeIsMyThing-',
  'user_url': 'http://steamcommunity.com/id/-AnimeIsMyThing-',
  'playtime_forever': 166425.0},
 {'user_id': '-Azsael-',
  'user_url': 'http://steamcommunity.com/id/-Azsael-',
  'playtime_forever': 154523.0},
 {'user_id': '-GM-Dragon',
  'user_url': 'http://steamcommunity.com/id/-GM-Dragon',
  'playtime_forever': 24000.0},
 {'user_id': '1122305938',
  'user_url': 'http://steamcommunity.com/id/1122305938',
  'playtime_forever': 53144.0},
 {'user_id': '1234865654',
  'user_url': 'http://steamcommunity.com/id/1234865654',
  'playtime_forever': 30316.0}]

<h3>def developer(desarrollador:)</h3>
<h4>Cantidad` de items y `porcentaje` de contenido Free por año según empresa desarrolladora.
Ejemplo de salida</h4>

In [53]:
# Supongamos que tienes un DataFrame llamado df con las columnas 'developer', 'price2', y 'año'

# Filtrar los juegos con precio igual a 0.0 y agrupar por 'developer' y 'año'
juegos_gratis = df_steam_games[df_steam_games['price2'] == 0.0].groupby(['developer', 'Año']).size().reset_index(name='total gratis')

# Agrupar todos los juegos por 'developer' y 'año' y contar el total
todos_los_juegos = df_steam_games.groupby(['developer', 'Año']).size().reset_index(name='total')

# Combinar ambos DataFrames en uno solo
developer_año = pd.merge(todos_los_juegos, juegos_gratis, on=['developer', 'Año'], how='left').fillna(0)

developer_año.head(20)


Unnamed: 0,developer,Año,total,total gratis
0,+7 Software,2016,1,0.0
1,"+Mpact Games, LLC.",2017,1,0.0
2,.M.Y.W.,2016,1,0.0
3,.ez Games,2017,1,0.0
4,07th Expansion,2015,2,0.0
5,07th Expansion,2016,3,0.0
6,07th Expansion,2017,3,0.0
7,08 Games,2016,1,0.0
8,100 Plus Games llc,2017,1,0.0
9,100 Stones Interactive,2016,1,0.0


In [54]:
def contenidoFree(fila):
    if(fila['total gratis']==0.0):
        return 0.0
    else:
        return round((fila['total gratis']/fila['total'])*100,2)

In [55]:
developer_año['Contenido Free'] = developer_año.apply(contenidoFree, axis=1)

In [56]:
developer_año.head(5)

Unnamed: 0,developer,Año,total,total gratis,Contenido Free
0,+7 Software,2016,1,0.0,0.0
1,"+Mpact Games, LLC.",2017,1,0.0,0.0
2,.M.Y.W.,2016,1,0.0,0.0
3,.ez Games,2017,1,0.0,0.0
4,07th Expansion,2015,2,0.0,0.0


In [57]:
developer_año.to_csv('developerByAño.csv', index=False)

In [58]:
def developer(developer):
    df_developer = pd.read_csv('developerByAño.csv')
    df_developer = df_developer[df_developer['developer']==developer]

    if(len(df_developer)==0):
        respuesta = 'No hay developer con ese nombre'
    else:
        respuesta = df_developer.to_dict(orient='records')
    return respuesta

In [59]:
developer('100 Stones Interactive')

[{'developer': '100 Stones Interactive',
  'Año': 2016,
  'total': 1,
  'total gratis': 0.0,
  'Contenido Free': 0.0},
 {'developer': '100 Stones Interactive',
  'Año': 2017,
  'total': 3,
  'total gratis': 0.0,
  'Contenido Free': 0.0}]