In [2]:
from fastapi import FastAPI

import numpy as np
import pandas as pd
import uvicorn

In [3]:
app = FastAPI()
genreFunction = pd.read_parquet('./data/genre_functions.parquet')

In [4]:
genreFunction.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9761461 entries, 0 to 9761460
Data columns (total 4 columns):
 #   Column            Dtype   
---  ------            -----   
 0   genres            category
 1   year              int16   
 2   user_id           category
 3   playtime_forever  int32   
dtypes: category(2), int16(1), int32(1)
memory usage: 104.9 MB


In [5]:
userRecommend = pd.read_parquet('./data/recommendations.parquet')
userRecommend.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9761461 entries, 0 to 9761460
Data columns (total 7 columns):
 #   Column              Dtype   
---  ------              -----   
 0   genres              category
 1   year                int16   
 2   user_id             category
 3   playtime_forever    int32   
 4   title               category
 5   sentiment_analysis  int8    
 6   recommend           bool    
dtypes: bool(1), category(3), int16(1), int32(1), int8(1)
memory usage: 143.4 MB


In [6]:
userRecommend.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9761461 entries, 0 to 9761460
Data columns (total 7 columns):
 #   Column              Dtype   
---  ------              -----   
 0   genres              category
 1   year                int16   
 2   user_id             category
 3   playtime_forever    int32   
 4   title               category
 5   sentiment_analysis  int8    
 6   recommend           bool    
dtypes: bool(1), category(3), int16(1), int32(1), int8(1)
memory usage: 143.4 MB


In [7]:
items = pd.read_parquet('./data/items.parquet')
items

Unnamed: 0,user_id,item_id,item_name,playtime_forever
0,76561197970982479,10,Counter-Strike,6
1,76561197970982479,20,Team Fortress Classic,0
2,76561197970982479,30,Day of Defeat,7
3,76561197970982479,40,Deathmatch Classic,0
4,76561197970982479,50,Half-Life: Opposing Force,0
...,...,...,...,...
5094077,76561198329548331,346330,BrainBread 2,0
5094078,76561198329548331,373330,All Is Dust,0
5094079,76561198329548331,388490,One Way To Die: Steam Edition,3
5094080,76561198329548331,521570,You Have 10 Seconds 2,4


In [8]:

# We apply the str.lower() method so that it does not matter how the word is written.
# f2['genres'] = f2['genres'].str.lower()
# genreFunction['genres'].unique()

In [9]:
genreFunction.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9761461 entries, 0 to 9761460
Data columns (total 4 columns):
 #   Column            Dtype   
---  ------            -----   
 0   genres            category
 1   year              int16   
 2   user_id           category
 3   playtime_forever  int32   
dtypes: category(2), int16(1), int32(1)
memory usage: 104.9 MB


In [10]:
genreFunction['genres'] = genreFunction['genres'].astype('category')
genreFunction['user_id'] = genreFunction['user_id'].astype('category')

In [11]:
genreFunction.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9761461 entries, 0 to 9761460
Data columns (total 4 columns):
 #   Column            Dtype   
---  ------            -----   
 0   genres            category
 1   year              int16   
 2   user_id           category
 3   playtime_forever  int32   
dtypes: category(2), int16(1), int32(1)
memory usage: 104.9 MB


In [12]:
# mostPlayed = genreFunction.groupby('user_id')['playtime_forever'].sum()
# mostPlayed.head()

In [13]:
# The app.get decorator is used to associate the function with an HTTP GET request.
# '/PlayTimeGenre/{genre}/' is the path for the endpoint, the {genre} is the parameter.
@app.get('/PlayTimeGenre/{genre}/')
def PlayTimeGenre( genre : str ):
    '''
    Find the genre with the most playtime hours.

    Parameters
    ----------
    genre : str
        Desired genre to look for.
    
    Returns
    -------
    int
        Year when the highest number of hours played was recorded for that genre.
    
    Examples
    -------
    >>> PlayTimeGenre('Action')
    2012
    >>> PlayTimeGenre('Indie')
    2006

    '''
    f1 = pd.read_parquet('./data/f1.parquet')
    # We look in our DataSet if there is any match with the genre entered.
    f1['genres'] = f1['genres'].str.lower()
    genres = f1[f1['genres'] == genre.lower()]
    if genres.empty:
        return f'The genre {genre} does not exist.'

    # We calculate the total playtime for each year in the dataset.
    results = genres.groupby('year')['playtime_forever'].sum()
    
    # Locate the index where the maximum value is. In this case the most played genre.
    idResut = results.idxmax()
    
    return f'Year with the most playtime hours for {genre}: {idResut}'

In [14]:
test = PlayTimeGenre('RPG')
print(test)

Year with the most playtime hours for RPG: 2011


# Segunda Función

In [15]:
@app.get('/UserForGenre/{genre}/')
def UserForGenre( genre : str ):
    '''
    Find the user with the most hours played by genre, as well as hours played for each year.

    Parameters
    ----------
    genre : str
        Desired genre to look for.
    
    Returns
    -------
    list
        The user name and the amount of hours played by year for that particular genre.
    
    Examples
    -------
    >>> UserForGenre('Simulation')
    UserName is the user with the most playtime for the genre "Action" with 23721 hours played.

    Year            2003    2006    2009    2010    2011    2012    2013    2014    2015    2016    
    Hours Played      0      0      2037    4102    1968     223     323    342     1224     112   
    '''
    
    f2 = pd.read_parquet('./data/f2.parquet')

    f2['genres'] = f2['genres'].str.lower()
    # Look in the DataSet if there is any match with the genre entered.
    genres = f2[f2['genres'] == genre.lower()]

    if genres.empty:
        return f'The genre {genre} does not exist.'

    # Group the DataSet by user ID, then sum the amount of hours played.
    mostPlayed = genres.groupby('user_id')['playtime_forever'].sum().reset_index()
    
    # Locate the index for the player.
    player = genres.loc[genres['playtime_forever'].idxmax()]['user_id']

    # Filter the DataSet with only the player id
    filteredDFWithPlayerID = (genres[genres['user_id'] == player])

    # Create a new DF with just the year and playtime_forever columns
    hoursPlayedByYear = filteredDFWithPlayerID.groupby('year')['playtime_forever'].sum()

    hoursList = [ {'Year': year, 'hours': hours} for year, hours in hoursPlayedByYear.items()]
    result = {f'{player} is the user with the most playtime for the genre "{genre.capitalize()}"\nHours Played: {hoursList}'
              }
    return result

In [16]:
UserForGenre('action')

{'Evilutional is the user with the most playtime for the genre "Action"\nHours Played: [{\'Year\': 2003, \'hours\': 0}, {\'Year\': 2006, \'hours\': 0}, {\'Year\': 2009, \'hours\': 2037}, {\'Year\': 2010, \'hours\': 4102}, {\'Year\': 2011, \'hours\': 1968}, {\'Year\': 2012, \'hours\': 680991}, {\'Year\': 2013, \'hours\': 69726}, {\'Year\': 2014, \'hours\': 24226}, {\'Year\': 2015, \'hours\': 112}, {\'Year\': 2016, \'hours\': 1291}, {\'Year\': 2017, \'hours\': 10894}]'}

# 3rd Function

In [17]:
userRecommend.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9761461 entries, 0 to 9761460
Data columns (total 7 columns):
 #   Column              Dtype   
---  ------              -----   
 0   genres              category
 1   year                int16   
 2   user_id             category
 3   playtime_forever    int32   
 4   title               category
 5   sentiment_analysis  int8    
 6   recommend           bool    
dtypes: bool(1), category(3), int16(1), int32(1), int8(1)
memory usage: 143.4 MB


In [18]:
@app.get('/UsersRecommend/{year}/')
def UsersRecommend( year : int ):
    '''
    Get the three most recommended games  
    
    Parameters
    ----------
    year : str
        Year in which the top three recommended games are.
    
    Returns
    -------
    list
        Name of the three recommended games.
    
    Examples
    -------
    >>> UsersRecommend(2018):
    Counter-Strike: Global Offense, Garry's Mode, Fall Guys
    >>> UsersRecommend(2021)
    Empire: Total War, Left 4 Dead 2, The Stanley Parable
    '''
    f3 = pd.read_parquet('./data/f3.parquet')
    # Create a DataSet with rows that match the year.
    givenYear = f3[f3['year'] == year]

    if givenYear.empty:
        return f'There are no records for the year {year}.'

    # Group the top three games
    topThree = (givenYear['title'].value_counts().head(3).reset_index()
        .rename(columns={'index': 'title', 'title': 'count'}))
    
    topThreeList = [{f"Top {i+1}: {game}": count} for i, (game, count) in topThree.iterrows()]
    return topThreeList


In [19]:
UsersRecommend(2016)

[{'Top 1: ! That Bastard Is Trying To Steal Our Gold !': 0},
 {'Top 2: Rocksmith - Rick Springfield - Jessie’s Girl': 0},
 {'Top 3: Rocksmith - Santana - Oye Como Va': 0}]

In [20]:
f3 = pd.read_parquet('./data/f3.parquet')
f3.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9761461 entries, 0 to 9761460
Data columns (total 5 columns):
 #   Column              Dtype   
---  ------              -----   
 0   year                int16   
 1   user_id             category
 2   title               category
 3   sentiment_analysis  int8    
 4   recommend           bool    
dtypes: bool(1), category(2), int16(1), int8(1)
memory usage: 96.9 MB


# 4th Function

In [21]:
@app.get('/UsersNotRecommend/{year}/')
def UsersNotRecommend( year : int ):
    '''
    Get the three least recommended games.
    
    Parameters
    ----------
    year : str
        Year in which the three least recommended games are.
    
    Returns
    -------
    list
        Name of the three recommended games.
    
    Examples
    -------
    >>> UsersNotRecommend(2008):
    Portal 2, Garry's Mode, Fall Guys
    >>> UsersNotRecommend(2011)
    Carmageddon Max Pack, Left 4 Dead 2, The Stanley Parable
    '''
    f4 = pd.read_parquet('./data/f4.parquet')
    givenYear = f4[f4['year'] == year]

    if givenYear.empty:
        return f'There are no records for the year {year}.'

    # Group the games that are from the desired year and had negative reviews.
    leastRecommendedGames = f4[(f4['year'] == year) & (f4['recommend'] == False)]

    # Create a list that has the 3 least recommended games for that year.
    leastThree = (
        leastRecommendedGames['title']
        .value_counts()
        .head(3)
        .reset_index()
        .rename(columns={'count': 'Negative Reviews', 'title': 'Game'})
    )

    leastThree = [{f'Top {i+1}: "{game}" with {reviews} negative reviews'} for i, (game, reviews) in leastThree.iterrows()]

    return leastThree

In [22]:
f4 = pd.read_parquet('./data/f4.parquet')

In [23]:
f4['recommend'].value_counts(False)

recommend
True     9754815
False       6646
Name: count, dtype: int64

In [24]:
UsersNotRecommend(2012)

[{'Top 1: "! That Bastard Is Trying To Steal Our Gold !" with 0 negative reviews'},
 {'Top 2: "Rocksmith - Rick Springfield - Jessie’s Girl" with 0 negative reviews'},
 {'Top 3: "Rocksmith - Santana - Oye Como Va" with 0 negative reviews'}]

In [25]:
f4 = pd.read_parquet('./data/f4.parquet')
leastRecommendedGames = f4[(f4['year'] == '10') & (f4['recommend'] == False)]
leastThree = (
        leastRecommendedGames['title']
        .value_counts()
        .head(3)
    )
leastThree

title
! That Bastard Is Trying To Steal Our Gold !    0
Rocksmith - Rick Springfield - Jessie’s Girl    0
Rocksmith - Santana - Oye Como Va               0
Name: count, dtype: int64

In [26]:
userRecommend.head(1)

Unnamed: 0,genres,year,user_id,playtime_forever,title,sentiment_analysis,recommend
0,Action,1997,UTNerd24,5,Lost Summoner Kitty,2,True


# 5th Function

In [27]:
@app.get('/sentiment_analysis/{year}/')
def sentiment_analysis( year : int ):
    '''
    Get the category reviews from all users in a year.
    
    Parameters
    ----------
    year : str
        Desired year to see how the reviews were.
    
    Returns
    -------
    list
        Amount of all the different review categories for that year.
    
    Examples
    -------
    >>> sentiment_analysis(2018)
    {Negative = 101, Neutral = 142, Positive = 221}
    >>>sentiment_analysis(2019)
    {Negative = 140, Neutral = 47, Positive = 115}
    '''
    f5 = pd.read_parquet('./data/f5.parquet')
    givenYear = f5[f5['year'] == year]

    if givenYear.empty:
        return f'There are no records for the year {year}.'
    
    
    sentiment = f5.groupby('year')['sentiment_analysis'].value_counts()


    sentiment = sentiment.loc[year].to_dict()
     
    return {"Negative": sentiment.get(0, 0),
            "Neutral": sentiment.get(1, 0),
            "Positive": sentiment.get(2, 0)}
            


In [28]:
sentiment_analysis(2009)

{'Negative': 333664, 'Neutral': 0, 'Positive': 0}

In [29]:
sentiment_analysis(2018)

{'Negative': 442, 'Neutral': 0, 'Positive': 0}