## 1° PROYECTO INDIVIDUAL 

### 1) Importación de librerías

In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
import scipy
from scipy import stats
from scipy.stats import skew
from math import sqrt
from numpy import mean, var
import copy 
from sklearn import preprocessing
import json

### 2) Lectura de archivo csv como dataframe

In [2]:
df=pd.read_csv('steam_games.csv') 
df.head(2) 

Unnamed: 0,publisher,genres,app_name,title,url,release_date,tags,discount_price,reviews_url,specs,price,early_access,id,developer,sentiment,metascore
0,Kotoshiro,"['Action', 'Casual', 'Indie', 'Simulation', 'S...",Lost Summoner Kitty,Lost Summoner Kitty,http://store.steampowered.com/app/761140/Lost_...,2018-01-04,"['Strategy', 'Action', 'Indie', 'Casual', 'Sim...",4.49,http://steamcommunity.com/app/761140/reviews/?...,['Single-player'],4.99,False,761140.0,Kotoshiro,,
1,"Making Fun, Inc.","['Free to Play', 'Indie', 'RPG', 'Strategy']",Ironbound,Ironbound,http://store.steampowered.com/app/643980/Ironb...,2018-01-04,"['Free to Play', 'Strategy', 'Indie', 'RPG', '...",,http://steamcommunity.com/app/643980/reviews/?...,"['Single-player', 'Multi-player', 'Online Mult...",Free To Play,False,643980.0,Secret Level SRL,Mostly Positive,


In [3]:
# Analizamos estructura del dataset
df.shape

(32135, 16)

### 3) Transformaciones

In [4]:
df.columns

Index(['publisher', 'genres', 'app_name', 'title', 'url', 'release_date',
       'tags', 'discount_price', 'reviews_url', 'specs', 'price',
       'early_access', 'id', 'developer', 'sentiment', 'metascore'],
      dtype='object')

#### Modificaciones en datos de tipo fecha

In [5]:
# Borramos valores nulos
df = df.dropna(subset = ['release_date'])
df.release_date.isna().sum()

0

In [6]:
# Transformamos el formato de fecha de 'release_date' a 'aaaa-mm-dd' si el formato es incorrecto
df['release_date'] = pd.to_datetime(df['release_date'], errors='coerce').dt.strftime('%Y-%m-%d')

  df['release_date'] = pd.to_datetime(df['release_date'], errors='coerce').dt.strftime('%Y-%m-%d')


In [7]:
# Creamos la columna 'release_year' extrayendo el año de 'release_date'
df['release_year'] = pd.to_datetime(df['release_date'], errors='coerce').dt.year
df['release_year'] = df['release_year'].astype('Int64')

In [8]:
df[['release_date','release_year']]

Unnamed: 0,release_date,release_year
0,2018-01-04,2018
1,2018-01-04,2018
2,2017-07-24,2017
3,2017-12-07,2017
5,2018-01-04,2018
...,...,...
32129,2018-01-04,2018
32130,2018-01-04,2018
32131,2018-01-04,2018
32132,2018-01-04,2018


In [9]:
df.release_year.unique()

<IntegerArray>
[2018, 2017, <NA>, 1997, 1998, 2016, 2006, 2005, 2003, 2007, 2002, 2000, 1995,
 1996, 1994, 2001, 1993, 2004, 1999, 2008, 2009, 1992, 1989, 2010, 2011, 2013,
 2012, 2014, 1983, 1984, 2015, 1990, 1988, 1991, 1985, 1982, 1987, 1981, 1986,
 2021, 2019, 1975, 1970, 1980]
Length: 44, dtype: Int64

In [10]:
# Borramos valores nulos nuevamente
df = df.dropna(subset = ['release_date'])
df.release_date.isna().sum()

0

In [11]:
# Borramos valores nulos nuevamente
df = df.dropna(subset = ['release_year'])
df.release_date.isna().sum()

0

In [12]:
df.isna().sum()

publisher          6006
genres             1234
app_name              1
title                 1
url                   0
release_date          0
tags                161
discount_price    29690
reviews_url           1
specs               669
price              1076
early_access          0
id                    1
developer          1250
sentiment          6738
metascore         27341
release_year          0
dtype: int64

In [13]:
df.shape

(29894, 17)

#### 4) Exportación a csv

In [14]:
df.to_csv('PI1.csv', index=False)

#### 5) Desarrollo API

In [15]:
def find_year(agno):
##Función inicial para las demás funciones, recibe un año (int)
##y devuelve un dataframe solo con los valores de ese año
 df_agno = df_search[df_search["release_year"].dt.year == agno]
 return df_agno

####  a) Género

In [16]:
def genre(agno):
##Recibe un año (int) y devuelve una lista con los 5 géneros
##más vendidos en el orden correspondiente.
 df_genres = find_year(agno)
 df_genres = df_genre.explode("genres")
 list_genres = df_genres["genres"].value_counts().head().index.to_list()
 return list_genres

#### b) Juegos

In [17]:
def games(agno):
##Recibe un año (int) y devuelve una lista con los juegos lanzados en el año.
 df_games = find_year(agno)
 list_games = df_games.title.to_list()
 return list_games

#### c) Specs

In [18]:
def specs(agno):
##Recibe un año (int) y devuelve una lista con los 5 specs que 
##más se repiten en el mismo en el orden correspondiente.
 df_specs = find_year(agno)
 df_specs = df_specs.explode("specs")
 list_specs = df_specs['specs'].value_counts().head().index.to_list()
 return list_specs

#### d) Earlyacces

In [19]:
def earlyacces(agno):
##Recibe un año (int) y devuelve la cantidad de juegos lanzados en ese año con early access.
 df_early = find_year(agno)
 early = df_early['early_access'].sum()
 return early

#### e) Sentiment

In [20]:
def sentiment(agno):
##Recibe un año (int) y se devuelve una lista con la cantidad de registros que
##se encuentren categorizados con un análisis de sentimiento ese año.
 df_sentiment = find_year(agno)
 sent_on = (df_sentiment["sentiment"] == 'Overwhelmingly Negative').sum()
 sent_vn = (df_sentiment["sentiment"] == 'Very Negative').sum()
 sent_n  = (df_sentiment["sentiment"] == 'Negative').sum()
 sent_mn = (df_sentiment["sentiment"] == 'Mostly Negative').sum()
 sent_m  = (df_sentiment["sentiment"] == 'Mixed').sum()
 sent_mp = (df_sentiment["sentiment"] == 'Mostly Positive').sum()
 sent_p  = (df_sentiment["sentiment"] == 'Positive').sum()
 sent_vp = (df_sentiment["sentiment"] == 'Very Positive').sum()
 sent_op = (df_sentiment["sentiment"] == 'Overwhelmingly Positive').sum()

 sent_on_str = f"Overwhelmingly Negative: {sent_on}"
 sent_vn_str = f"Very Negative: {sent_vn}"
 sent_n_str  = f"Negative: {sent_n}"
 sent_mn_str = f"Mostly Negative: {sent_mn}"
 sent_m_str  = f"Mixed: {sent_m}"
 sent_mp_str = f"Mostly Positive: {sent_mp}"   
 sent_p_str  = f"Positive: {sent_p}"
 sent_vp_str = f"Very Positive: {sent_vp}"
 sent_op_str = f"Overwhelmingly Positive: {sent_op}"
 
 lista = [[sent_on, sent_on_str], [sent_vn, sent_vn_str], [sent_n, sent_n_str], [sent_mn, sent_mn_str], [sent_m, sent_m_str],
            [sent_mp, sent_mp_str], [sent_p, sent_p_str], [sent_vp, sent_vp_str], [sent_op, sent_op_str]]

 lista_final = []

 for sent in lista:
        if sent[0] > 0:
            lista_final.append(sent[1])

 return lista_final

#### f) Metascore

In [21]:
def metascore(agno):
##Recibe un año (int) y retorna el top 5 juegos con mayor metascore.
 df_meta = find_year(agno)
 df_meta = df_meta[['title', 'metascore']].sort_values('metascore', axis=0, ascending=False).head()

 list_name_score = []

 for i in range(df_meta.shape[0]):
        name = df_meta.iloc[i:i+1, 0:1].values[0][0]
        score = df_meta.iloc[i:i+1, 1:2].values[0][0]
        name_score = f"{name}: {score}"
        list_name_score.append(name_score)
 return list_name_score