# ETL

Primero importamos todas las librerias necesarias

In [1]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import requests
import re
import pandas as pd
import numpy as np
import ast
from dateutil import parser

  from .autonotebook import tqdm as notebook_tqdm


Posteriormete cargamos nuestro modelo preentrenado para realizar el analisis de sentimientos llamado "bertweet-base-sentimen-analysis"

In [2]:
# Load model directly
tokenizer = AutoTokenizer.from_pretrained("finiteautomata/bertweet-base-sentiment-analysis")
model = AutoModelForSequenceClassification.from_pretrained("finiteautomata/bertweet-base-sentiment-analysis")

Cargamos el archivo de user_review a nuestro dataframe llamado df_aplanado

In [3]:
# Cargar los datos desde el archivo JSON
rows = []
#with open('datasets/user_reviews.json') as  f:
with open('datasets/user_reviews.json') as  f:
    for line in f.readlines():
        rows.append(ast.literal_eval(line))

# Normalizar el JSON y crear un DataFrame
df_aplanado = pd.json_normalize(rows, 'reviews', ['user_id', 'user_url'])

Verificamos que el dataframe se haya cargado correctamente

In [4]:
df_aplanado

Unnamed: 0,funny,posted,last_edited,item_id,helpful,recommend,review,user_id,user_url
0,,"Posted November 5, 2011.",,1250,No ratings yet,True,Simple yet with great replayability. In my opi...,76561197970982479,http://steamcommunity.com/profiles/76561197970...
1,,"Posted July 15, 2011.",,22200,No ratings yet,True,It's unique and worth a playthrough.,76561197970982479,http://steamcommunity.com/profiles/76561197970...
2,,"Posted April 21, 2011.",,43110,No ratings yet,True,Great atmosphere. The gunplay can be a bit chu...,76561197970982479,http://steamcommunity.com/profiles/76561197970...
3,,"Posted June 24, 2014.",,251610,15 of 20 people (75%) found this review helpful,True,I know what you think when you see this title ...,js41637,http://steamcommunity.com/id/js41637
4,,"Posted September 8, 2013.",,227300,0 of 1 people (0%) found this review helpful,True,For a simple (it's actually not all that simpl...,js41637,http://steamcommunity.com/id/js41637
...,...,...,...,...,...,...,...,...,...
59300,,Posted July 10.,,70,No ratings yet,True,a must have classic from steam definitely wort...,76561198312638244,http://steamcommunity.com/profiles/76561198312...
59301,,Posted July 8.,,362890,No ratings yet,True,this game is a perfect remake of the original ...,76561198312638244,http://steamcommunity.com/profiles/76561198312...
59302,1 person found this review funny,Posted July 3.,,273110,1 of 2 people (50%) found this review helpful,True,had so much fun plaing this and collecting res...,LydiaMorley,http://steamcommunity.com/id/LydiaMorley
59303,,Posted July 20.,,730,No ratings yet,True,:D,LydiaMorley,http://steamcommunity.com/id/LydiaMorley


Ahora creamos la funcion con la que realizaremos el analisis de sentimientos

In [5]:
def sentiment_score(review):
    tokens = tokenizer.encode(review,return_tensors='pt')
    result = model(tokens)
    return int(torch.argmax(result.logits))


Creamos una nueva columna llamada sentiment_analysis a la cual le aplicaremos la funcion creada anterioremente para que nos de el analisis de sentimientos

In [None]:
df_aplanado['sentiment_analysis'] = df_aplanado['review'].apply(lambda x: sentiment_score(x[:110]))

In [216]:
#df_aplanado.to_csv('datasets_clean/df_respaldo_sentiment.csv', index=False)
df_aplanado.to_json('datasets_clean/df_respaldo_sentiment.json', orient='records', lines=True)

Verificamos que se haya creado correctamente la nueva columna

In [10]:
df_aplanado

Unnamed: 0,funny,posted,last_edited,item_id,helpful,recommend,review,user_id,user_url,sentiment_analysis
0,,"Posted November 5, 2011.",,1250,No ratings yet,True,Simple yet with great replayability. In my opi...,76561197970982479,http://steamcommunity.com/profiles/76561197970...,2
1,,"Posted July 15, 2011.",,22200,No ratings yet,True,It's unique and worth a playthrough.,76561197970982479,http://steamcommunity.com/profiles/76561197970...,2
2,,"Posted April 21, 2011.",,43110,No ratings yet,True,Great atmosphere. The gunplay can be a bit chu...,76561197970982479,http://steamcommunity.com/profiles/76561197970...,2
3,,"Posted June 24, 2014.",,251610,15 of 20 people (75%) found this review helpful,True,I know what you think when you see this title ...,js41637,http://steamcommunity.com/id/js41637,1
4,,"Posted September 8, 2013.",,227300,0 of 1 people (0%) found this review helpful,True,For a simple (it's actually not all that simpl...,js41637,http://steamcommunity.com/id/js41637,2
...,...,...,...,...,...,...,...,...,...,...
59300,,Posted July 10.,,70,No ratings yet,True,a must have classic from steam definitely wort...,76561198312638244,http://steamcommunity.com/profiles/76561198312...,2
59301,,Posted July 8.,,362890,No ratings yet,True,this game is a perfect remake of the original ...,76561198312638244,http://steamcommunity.com/profiles/76561198312...,2
59302,1 person found this review funny,Posted July 3.,,273110,1 of 2 people (50%) found this review helpful,True,had so much fun plaing this and collecting res...,LydiaMorley,http://steamcommunity.com/id/LydiaMorley,2
59303,,Posted July 20.,,730,No ratings yet,True,:D,LydiaMorley,http://steamcommunity.com/id/LydiaMorley,2


In [9]:
df_aplanado = pd.read_json('datasets_clean/df_respaldo_sentiment.json', lines=True)

Borramos columnas que no nos interesan para trabajar

In [11]:
df_aplanado = df_aplanado.drop(['funny','last_edited','helpful','review'], axis=1)

verificamos que se hayan eliminado de forma correcta

In [12]:
df_aplanado

Unnamed: 0,posted,item_id,recommend,user_id,user_url,sentiment_analysis
0,"Posted November 5, 2011.",1250,True,76561197970982479,http://steamcommunity.com/profiles/76561197970...,2
1,"Posted July 15, 2011.",22200,True,76561197970982479,http://steamcommunity.com/profiles/76561197970...,2
2,"Posted April 21, 2011.",43110,True,76561197970982479,http://steamcommunity.com/profiles/76561197970...,2
3,"Posted June 24, 2014.",251610,True,js41637,http://steamcommunity.com/id/js41637,1
4,"Posted September 8, 2013.",227300,True,js41637,http://steamcommunity.com/id/js41637,2
...,...,...,...,...,...,...
59300,Posted July 10.,70,True,76561198312638244,http://steamcommunity.com/profiles/76561198312...,2
59301,Posted July 8.,362890,True,76561198312638244,http://steamcommunity.com/profiles/76561198312...,2
59302,Posted July 3.,273110,True,LydiaMorley,http://steamcommunity.com/id/LydiaMorley,2
59303,Posted July 20.,730,True,LydiaMorley,http://steamcommunity.com/id/LydiaMorley,2


Revisamos cuantas fechas tienen la palabra posted y nos damos cuenta que todas las fechas lo contienen 

In [13]:
df_aplanado['posted'].str.contains('Posted', case=False).sum()


59305

Eliminamos la palabra Posted de todos las fechas

In [14]:
# Utilizar una expresión regular para eliminar "Posted" y extraer la fecha
df_aplanado['posted'] = df_aplanado['posted'].str.replace(r'Posted\s+', '', regex=True)

Les quitamos el punto que se encunetra al final de las fechas

In [15]:
df_aplanado['posted'] = df_aplanado['posted'].str.rstrip('.')

Creamos una funcion para poner en formato correcto las fecha y si no existe una fecha pone por defecto el año en curso ya que se toma en cuenta que como no muestra el año las reseñas fueron realizadas en el presente año

In [16]:
def format_date(date_str):
    try:
        # Intentar analizar la fecha
        date_obj = parser.parse(date_str)
        # Si no se especifica el año, agregar "2016" como año por defecto
        if date_obj.year == 1900:
            date_obj = date_obj.replace(year=2016)
        return date_obj.strftime('%Y-%m-%d')
    except ValueError:
        # Manejar fechas no válidas
        return None

Aplicamos la funcion a posted para poner en formato correcto las fechas

In [17]:
# Aplicar la función a la columna 'date'
df_aplanado['posted'] = df_aplanado['posted'].apply(format_date)

Verificamos que las fechas se hayan cambiado correctamente

In [18]:
df_aplanado

Unnamed: 0,posted,item_id,recommend,user_id,user_url,sentiment_analysis
0,2011-11-05,1250,True,76561197970982479,http://steamcommunity.com/profiles/76561197970...,2
1,2011-07-15,22200,True,76561197970982479,http://steamcommunity.com/profiles/76561197970...,2
2,2011-04-21,43110,True,76561197970982479,http://steamcommunity.com/profiles/76561197970...,2
3,2014-06-24,251610,True,js41637,http://steamcommunity.com/id/js41637,1
4,2013-09-08,227300,True,js41637,http://steamcommunity.com/id/js41637,2
...,...,...,...,...,...,...
59300,2023-07-10,70,True,76561198312638244,http://steamcommunity.com/profiles/76561198312...,2
59301,2023-07-08,362890,True,76561198312638244,http://steamcommunity.com/profiles/76561198312...,2
59302,2023-07-03,273110,True,LydiaMorley,http://steamcommunity.com/id/LydiaMorley,2
59303,2023-07-20,730,True,LydiaMorley,http://steamcommunity.com/id/LydiaMorley,2


exportamos nuestro nuevo archivo

In [19]:
df_aplanado.to_csv('datasets_clean/df_reviews.csv', index=False)
#df_aplanado.to_json('datasets_clean/df_review.json', orient='records', lines=True)

Cargamos el dataframe df_games desde el archivo steam_games asegurandonos de que el id este en formato string

In [58]:
df_games = pd.read_json('datasets/steam_games.json',dtype={'id': str}, lines=True)

Verificamos que se haya cargado correctamente el archivo

In [59]:
df_games

Unnamed: 0,publisher,genres,app_name,title,url,release_date,tags,reviews_url,specs,price,early_access,id,developer
0,,,,,,,,,,,,,
1,,,,,,,,,,,,,
2,,,,,,,,,,,,,
3,,,,,,,,,,,,,
4,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
120440,Ghost_RUS Games,"[Casual, Indie, Simulation, Strategy]",Colony On Mars,Colony On Mars,http://store.steampowered.com/app/773640/Colon...,2018-01-04,"[Strategy, Indie, Casual, Simulation]",http://steamcommunity.com/app/773640/reviews/?...,"[Single-player, Steam Achievements]",1.99,0.0,773640,"Nikita ""Ghost_RUS"""
120441,Sacada,"[Casual, Indie, Strategy]",LOGistICAL: South Africa,LOGistICAL: South Africa,http://store.steampowered.com/app/733530/LOGis...,2018-01-04,"[Strategy, Indie, Casual]",http://steamcommunity.com/app/733530/reviews/?...,"[Single-player, Steam Achievements, Steam Clou...",4.99,0.0,733530,Sacada
120442,Laush Studio,"[Indie, Racing, Simulation]",Russian Roads,Russian Roads,http://store.steampowered.com/app/610660/Russi...,2018-01-04,"[Indie, Simulation, Racing]",http://steamcommunity.com/app/610660/reviews/?...,"[Single-player, Steam Achievements, Steam Trad...",1.99,0.0,610660,Laush Dmitriy Sergeevich
120443,SIXNAILS,"[Casual, Indie]",EXIT 2 - Directions,EXIT 2 - Directions,http://store.steampowered.com/app/658870/EXIT_...,2017-09-02,"[Indie, Casual, Puzzle, Singleplayer, Atmosphe...",http://steamcommunity.com/app/658870/reviews/?...,"[Single-player, Steam Achievements, Steam Cloud]",4.99,0.0,658870,"xropi,stev3ns"


Eliminamos columnas que no usaremos

In [57]:
df_games = df_games.drop(['publisher','title','url','tags','reviews_url', 'specs', 'early_access'], axis=1)

Verificamos que se hayan eliminado de forma correcta las fechas

In [60]:
df_games

Unnamed: 0,publisher,genres,app_name,title,url,release_date,tags,reviews_url,specs,price,early_access,id,developer
0,,,,,,,,,,,,,
1,,,,,,,,,,,,,
2,,,,,,,,,,,,,
3,,,,,,,,,,,,,
4,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
120440,Ghost_RUS Games,"[Casual, Indie, Simulation, Strategy]",Colony On Mars,Colony On Mars,http://store.steampowered.com/app/773640/Colon...,2018-01-04,"[Strategy, Indie, Casual, Simulation]",http://steamcommunity.com/app/773640/reviews/?...,"[Single-player, Steam Achievements]",1.99,0.0,773640,"Nikita ""Ghost_RUS"""
120441,Sacada,"[Casual, Indie, Strategy]",LOGistICAL: South Africa,LOGistICAL: South Africa,http://store.steampowered.com/app/733530/LOGis...,2018-01-04,"[Strategy, Indie, Casual]",http://steamcommunity.com/app/733530/reviews/?...,"[Single-player, Steam Achievements, Steam Clou...",4.99,0.0,733530,Sacada
120442,Laush Studio,"[Indie, Racing, Simulation]",Russian Roads,Russian Roads,http://store.steampowered.com/app/610660/Russi...,2018-01-04,"[Indie, Simulation, Racing]",http://steamcommunity.com/app/610660/reviews/?...,"[Single-player, Steam Achievements, Steam Trad...",1.99,0.0,610660,Laush Dmitriy Sergeevich
120443,SIXNAILS,"[Casual, Indie]",EXIT 2 - Directions,EXIT 2 - Directions,http://store.steampowered.com/app/658870/EXIT_...,2017-09-02,"[Indie, Casual, Puzzle, Singleplayer, Atmosphe...",http://steamcommunity.com/app/658870/reviews/?...,"[Single-player, Steam Achievements, Steam Cloud]",4.99,0.0,658870,"xropi,stev3ns"


Eliminamos los valores que sean nulos en todas las columnas

In [61]:
df_games = df_games.dropna(subset=['genres', 'app_name', 'release_date', 'price','id','developer'])

verificamos que se hayan eliminado correctamente

In [70]:
df_games

Unnamed: 0,publisher,genres,app_name,title,url,release_date,tags,reviews_url,specs,price,early_access,id,developer
88310,Kotoshiro,"[Action, Casual, Indie, Simulation, Strategy]",Lost Summoner Kitty,Lost Summoner Kitty,http://store.steampowered.com/app/761140/Lost_...,2018-01-04,"[Strategy, Action, Indie, Casual, Simulation]",http://steamcommunity.com/app/761140/reviews/?...,[Single-player],4.99,0.0,761140,Kotoshiro
88311,"Making Fun, Inc.","[Free to Play, Indie, RPG, Strategy]",Ironbound,Ironbound,http://store.steampowered.com/app/643980/Ironb...,2018-01-04,"[Free to Play, Strategy, Indie, RPG, Card Game...",http://steamcommunity.com/app/643980/reviews/?...,"[Single-player, Multi-player, Online Multi-Pla...",0,0.0,643980,Secret Level SRL
88312,Poolians.com,"[Casual, Free to Play, Indie, Simulation, Sports]",Real Pool 3D - Poolians,Real Pool 3D - Poolians,http://store.steampowered.com/app/670290/Real_...,2017-07-24,"[Free to Play, Simulation, Sports, Casual, Ind...",http://steamcommunity.com/app/670290/reviews/?...,"[Single-player, Multi-player, Online Multi-Pla...",0,0.0,670290,Poolians.com
88313,彼岸领域,"[Action, Adventure, Casual]",弹炸人2222,弹炸人2222,http://store.steampowered.com/app/767400/2222/,2017-12-07,"[Action, Adventure, Casual]",http://steamcommunity.com/app/767400/reviews/?...,[Single-player],0.99,0.0,767400,彼岸领域
88315,Trickjump Games Ltd,"[Action, Adventure, Simulation]",Battle Royale Trainer,Battle Royale Trainer,http://store.steampowered.com/app/772540/Battl...,2018-01-04,"[Action, Adventure, Simulation, FPS, Shooter, ...",http://steamcommunity.com/app/772540/reviews/?...,"[Single-player, Steam Achievements]",3.99,0.0,772540,Trickjump Games Ltd
...,...,...,...,...,...,...,...,...,...,...,...,...,...
120439,Bidoniera Games,"[Action, Adventure, Casual, Indie]",Kebab it Up!,Kebab it Up!,http://store.steampowered.com/app/745400/Kebab...,2018-01-04,"[Action, Indie, Casual, Violent, Adventure]",http://steamcommunity.com/app/745400/reviews/?...,"[Single-player, Steam Achievements, Steam Cloud]",1.99,0.0,745400,Bidoniera Games
120440,Ghost_RUS Games,"[Casual, Indie, Simulation, Strategy]",Colony On Mars,Colony On Mars,http://store.steampowered.com/app/773640/Colon...,2018-01-04,"[Strategy, Indie, Casual, Simulation]",http://steamcommunity.com/app/773640/reviews/?...,"[Single-player, Steam Achievements]",1.99,0.0,773640,"Nikita ""Ghost_RUS"""
120441,Sacada,"[Casual, Indie, Strategy]",LOGistICAL: South Africa,LOGistICAL: South Africa,http://store.steampowered.com/app/733530/LOGis...,2018-01-04,"[Strategy, Indie, Casual]",http://steamcommunity.com/app/733530/reviews/?...,"[Single-player, Steam Achievements, Steam Clou...",4.99,0.0,733530,Sacada
120442,Laush Studio,"[Indie, Racing, Simulation]",Russian Roads,Russian Roads,http://store.steampowered.com/app/610660/Russi...,2018-01-04,"[Indie, Simulation, Racing]",http://steamcommunity.com/app/610660/reviews/?...,"[Single-player, Steam Achievements, Steam Trad...",1.99,0.0,610660,Laush Dmitriy Sergeevich


Creamos el archivo en esta parte que usaremos para el EDA

In [63]:
df_games.to_json('datasets_clean/df_games_EDA.json', orient='records', lines=True)

Revisamos cuantos juegos existen con la palabra free dentro de la columna precio

In [64]:
df_games['price'].str.contains('free', case=False).sum()

1531

Revisamos cuantos juegos existen con la palabra party dentro de la columna precio

In [65]:
df_games['price'].str.contains('party', case=False).sum()

2

Si existen valores nulos los cambiamos vacios

In [66]:
df_games.loc[df_games['price'].isnull(), 'price'] = ''

Hacemos un parseo a string en la columna price

In [67]:
df_games.loc[:, 'price'] = df_games['price'].astype(str)

Los juegos que contengan la palabra free en lugar del precio los cambiamos a 0 ya que se toma en cuenta que es contenido gratuito

In [68]:
#df_games.loc[df_games['price'].str.contains('free', case=False), 'price'] = '0'
#df_games['price'] = df_games['price'].str.replace(r'(?i)free', '0', regex=True)
#df_games.loc[df_games['price'].str.contains('free', case=False), 'price'] = "0"
#df_games['price'] = df_games['price'].fillna("") # Reemplaza los valores NA o NaN con una cadena vacía
#df_games.iloc[df_games['price'].str.contains('free', case=False), 'price'] = "0"
valor_buscar = 'free'
df_games.loc[df_games['price'].str.contains(valor_buscar, case=False), 'price'] = "0"


Los juegos que contengan la palabra party igual los cambiamos a 0 ya que se toma en cuenta que es contenido gratuito

In [69]:
df_games.loc[df_games['price'].str.contains('party', case=False), 'price'] = "0"

Creamos un nuevo dataframe con las columnas dummies en base a los generos

In [36]:
# Utilizar get_dummies() para crear variables dummy para cada género
dummy_genres = df_games['genres'].str.join('|').str.get_dummies()

Unimos el dataframe original junto con los dummies

In [37]:
# Concatenar las variables dummy al DataFrame original
df_games = pd.concat([df_games, dummy_genres], axis=1)

Eliminamos la columna genres ya que ya no la necesitamos

In [38]:
df_games = df_games.drop(['genres'], axis=1)

Verificamos que se hayan anidado correctamente los dos dataframes

In [39]:
df_games

Unnamed: 0,app_name,release_date,price,id,developer,Accounting,Action,Adventure,Animation &amp; Modeling,Audio Production,...,Photo Editing,RPG,Racing,Simulation,Software Training,Sports,Strategy,Utilities,Video Production,Web Publishing
88310,Lost Summoner Kitty,2018-01-04,4.99,761140,Kotoshiro,0,1,0,0,0,...,0,0,0,1,0,0,1,0,0,0
88311,Ironbound,2018-01-04,0,643980,Secret Level SRL,0,0,0,0,0,...,0,1,0,0,0,0,1,0,0,0
88312,Real Pool 3D - Poolians,2017-07-24,0,670290,Poolians.com,0,0,0,0,0,...,0,0,0,1,0,1,0,0,0,0
88313,弹炸人2222,2017-12-07,0.99,767400,彼岸领域,0,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
88315,Battle Royale Trainer,2018-01-04,3.99,772540,Trickjump Games Ltd,0,1,1,0,0,...,0,0,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
120439,Kebab it Up!,2018-01-04,1.99,745400,Bidoniera Games,0,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
120440,Colony On Mars,2018-01-04,1.99,773640,"Nikita ""Ghost_RUS""",0,0,0,0,0,...,0,0,0,1,0,0,1,0,0,0
120441,LOGistICAL: South Africa,2018-01-04,4.99,733530,Sacada,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
120442,Russian Roads,2018-01-04,1.99,610660,Laush Dmitriy Sergeevich,0,0,0,0,0,...,0,0,1,1,0,0,0,0,0,0


Exportamos el df a un nuevo archivo

In [40]:
df_games.to_csv('datasets_clean/df_games.csv', index=False)
#df_games.to_json('datasets_clean/df_games.json', orient='records', lines=True)

Cargamos el dataset df_items desde el archivo users_items

In [41]:
# Cargar los datos desde el archivo JSON
rows = []
with open('datasets/users_items.json') as  f:
    for line in f.readlines():
        rows.append(ast.literal_eval(line))

# Normalizar el JSON y crear un DataFrame
df_items = pd.json_normalize(rows, 'items', ['user_id', 'user_url'])

verificamos que se haya cargado correctamente

In [42]:
df_items

Unnamed: 0,item_id,item_name,playtime_forever,playtime_2weeks,user_id,user_url
0,10,Counter-Strike,6,0,76561197970982479,http://steamcommunity.com/profiles/76561197970...
1,20,Team Fortress Classic,0,0,76561197970982479,http://steamcommunity.com/profiles/76561197970...
2,30,Day of Defeat,7,0,76561197970982479,http://steamcommunity.com/profiles/76561197970...
3,40,Deathmatch Classic,0,0,76561197970982479,http://steamcommunity.com/profiles/76561197970...
4,50,Half-Life: Opposing Force,0,0,76561197970982479,http://steamcommunity.com/profiles/76561197970...
...,...,...,...,...,...,...
5153204,346330,BrainBread 2,0,0,76561198329548331,http://steamcommunity.com/profiles/76561198329...
5153205,373330,All Is Dust,0,0,76561198329548331,http://steamcommunity.com/profiles/76561198329...
5153206,388490,One Way To Die: Steam Edition,3,3,76561198329548331,http://steamcommunity.com/profiles/76561198329...
5153207,521570,You Have 10 Seconds 2,4,4,76561198329548331,http://steamcommunity.com/profiles/76561198329...


Eliminamos columnas que no ocuparemos

In [43]:
df_items = df_items.drop(['playtime_2weeks','user_url','item_name'], axis=1)

verificamos si existen valores nulos

In [44]:
df_items.isnull().sum()

item_id             0
playtime_forever    0
user_id             0
dtype: int64

Borramos valores nulos si es que existen

In [45]:
df_items = df_items.dropna()

Contamos los valores de playtime_forever que sean igual a 0 osea que no tengan tiempo jugado

In [51]:
(df_items['playtime_forever'] == 0).sum()

0

Eliminamos las filas que su playtime forever sea igual a 0 ya que no las necesitaremos

In [50]:
df_items = df_items.drop(df_items[df_items['playtime_forever'] == 0].index)

Exportamos el dataframe al archivo df_items

In [52]:
df_items.to_csv('datasets_clean/df_items.csv', index=False)
#df_items.to_json('datasets_clean/df_items.json', orient='records', lines=True)