## 04_API

Using the FastAPI frameworkv this [API](https://pi-ml-ops-iviw.onrender.com/) provides various endpoints to access and retrieve information related to Steam games and user reviews and the recommendation system through the following endpoints:

In [2]:
import ast
import fastapi
import numpy as np
import pandas as pd
import pyarrow.parquet as pq
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

# Instanciate objects
vectorizer = TfidfVectorizer()
app = fastapi.FastAPI()

# Import and process data
steam_games = pd.read_csv("data/steam_games.csv", index_col=0, parse_dates=["date"])
steam_games["genres"] = steam_games["genres"].fillna("[]")
steam_games['genres'] = steam_games['genres'].apply(eval)
users_items = pq.read_table('data/users_items.parquet')
users_items = users_items.to_pandas()
users_reviews = pq.read_table('data/users_reviews.parquet')
users_reviews = users_reviews.to_pandas()
model_data = pd.read_csv("data/model_data.csv", index_col=0, parse_dates=["date"])
model_data['popular_genres'].fillna('', inplace=True)
model_data['common_genres'].fillna('', inplace=True)
model_data['unpopular_genres'].fillna('', inplace=True)


@app.get("/") # Returns a confirmation message and the available endpoints
def test():
    return {"API STEAM GAMES ACTIVE"
            "endpoints": {
                "/developer/{developer}": "Valve",
                "/user_data/{user_id}": "76561197970982479",
                "/user_for_genre/{genre}": "Action",
                "/best_developer_year/{year}": 2010,
                "/developer_reviews_analysis/{developer}": "Ubisoft",
                "/recommend/{title}": "Counter-Strike"}}

[GET /developer/{developer}](https://pi-ml-ops-iviw.onrender.com/developer/Valve)

Returns the amount of items and free contet porcentage for a given developer:

In [15]:
@app.get("/developer/{developer}")
def developer(developer: str):
    # Filter data
    developer_data = steam_games[steam_games["developer"] == developer]
    if developer_data.empty:
        return {"error": "Developer not found"}
    # Find amount of items per year
    game_count_by_year = developer_data["date"].dt.year.value_counts()
    game_count_by_year_df = pd.DataFrame(game_count_by_year).reset_index()
    game_count_by_year_df.columns = ["year", "item_count"]
    # Find amount of free items per year
    free_game_count_by_year = developer_data["date"][developer_data["price"] == 0].dt.year.value_counts()
    free_game_count_by_year_df = pd.DataFrame(free_game_count_by_year).reset_index()
    free_game_count_by_year_df.columns = ["year", "free_item_count"]
    # Create dataframe and calculate free content percentage
    game_count_by_year_df = pd.merge(game_count_by_year_df, free_game_count_by_year_df, on="year", how="left")    
    game_count_by_year_df["free_content"] = game_count_by_year_df["free_item_count"] * 100 / game_count_by_year_df["item_count"]
    game_count_by_year_df.drop(columns="free_item_count", inplace=True)
    game_count_by_year_df["free_content"].fillna(0, inplace=True)
    # Create JSON and return
    list = []
    for i, row in game_count_by_year_df.iterrows():
        list.append({"Year": row["year"], "Total items": row["item_count"], "Free content": row["free_content"]})
    return {"Developer": developer, "Items by year:": list}

developer("Ubisoft")

{'Developer': 'Ubisoft',
 'Items by year:': [{'Year': 2016.0,
   'Total items': 20.0,
   'Free content': 10.0},
  {'Year': 2017.0, 'Total items': 11.0, 'Free content': 0.0},
  {'Year': 2014.0, 'Total items': 7.0, 'Free content': 0.0},
  {'Year': 2003.0, 'Total items': 2.0, 'Free content': 0.0},
  {'Year': 2007.0, 'Total items': 2.0, 'Free content': 0.0},
  {'Year': 2004.0, 'Total items': 1.0, 'Free content': 0.0},
  {'Year': 2005.0, 'Total items': 1.0, 'Free content': 0.0},
  {'Year': 2010.0, 'Total items': 1.0, 'Free content': 0.0},
  {'Year': 2015.0, 'Total items': 1.0, 'Free content': 0.0}]}

[GET /userdata/{user_id}](https://pi-ml-ops-iviw.onrender.com/userdata/76561197970982479)

Returns the total spent money, the recomendation percentage and the total amount of items for a given user:

In [9]:
@app.get("/userdata/{user_id}") 
def userdata(user_id: str):
    # Filter data
    user_items = users_items[users_items["user_id"] == user_id ]
    user_reviews = users_reviews[users_reviews["user_id"] == user_id]
    # Calculate spented money
    user_items = user_items.merge(steam_games[["game_id", "price"]], on="game_id", how="left")    
    spent_money = user_items["price"].sum()
    # Calculate item amount and recommendatio rate
    item_count = user_reviews.shape[0]
    recommend_rate = user_reviews["recommend"].value_counts()[True] * 100 / item_count

    return {"user_id": user_id, "spent_money": spent_money, "recommend_rate": recommend_rate, "item_count": item_count}

userdata("76561197970982479")

{'user_id': '76561197970982479',
 'spent_money': 3424.31,
 'recommend_rate': 100.0,
 'item_count': 3}

[GET /user_for_genre/{genre}](https://pi-ml-ops-iviw.onrender.com/user_for_genre/Action)

Returns the user with the highest playtime and a list of accumulated playtime by year for a given genre:

In [8]:
@app.get("/userforgenre/{genre}")
def UserForGenre(genre: str):
    # Filter data
    genre_items = users_items.merge(steam_games[["genres", "game_id", "date"]], on="game_id", how="left")
    genre_items["genres"] = genre_items["genres"].fillna("[]")
    genre_items = genre_items[genre_items["genres"].apply(lambda x: genre in x)]
    if genre_items.empty:
        return {"error": f"No records found for genre: {genre}"}
    # Calculate player with most hours played
    data = genre_items.groupby("user_id")["playtime_forever"].sum()
    df = pd.DataFrame(data).reset_index().sort_values(by="playtime_forever", ascending=False)
    most_hours_player = df.iloc[0][["user_id"]][0]
    # Calculate playtime for each year
    hours_by_year = genre_items.groupby(genre_items["date"].dt.year)["playtime_forever"].sum()
    hours_by_year = pd.DataFrame(hours_by_year).reset_index()
    hours_by_year.columns = ["year", "playtime"]
    # Creat JSON and return
    list = []
    for i, row in hours_by_year.iterrows():
        list.append({"Year": row["year"], "playtime": row["playtime"]})
    return {"Top player": most_hours_player,
        "Hours played by year": list}

UserForGenre("Action")

{'Top player': 'Sp3ctre',
 'Hours played by year': [{'Year': 1983.0, 'playtime': 3582.0},
  {'Year': 1984.0, 'playtime': 384.0},
  {'Year': 1988.0, 'playtime': 30241.0},
  {'Year': 1989.0, 'playtime': 607.0},
  {'Year': 1990.0, 'playtime': 18787.0},
  {'Year': 1991.0, 'playtime': 2502.0},
  {'Year': 1992.0, 'playtime': 1925.0},
  {'Year': 1993.0, 'playtime': 211807.0},
  {'Year': 1994.0, 'playtime': 121057.0},
  {'Year': 1995.0, 'playtime': 222132.0},
  {'Year': 1996.0, 'playtime': 70061.0},
  {'Year': 1997.0, 'playtime': 687668.0},
  {'Year': 1998.0, 'playtime': 3089111.0},
  {'Year': 1999.0, 'playtime': 3040470.0},
  {'Year': 2000.0, 'playtime': 18663625.0},
  {'Year': 2001.0, 'playtime': 1480065.0},
  {'Year': 2002.0, 'playtime': 2680111.0},
  {'Year': 2003.0, 'playtime': 15075311.0},
  {'Year': 2004.0, 'playtime': 134255188.0},
  {'Year': 2005.0, 'playtime': 15670732.0},
  {'Year': 2006.0, 'playtime': 471120762.0},
  {'Year': 2007.0, 'playtime': 24553464.0},
  {'Year': 2008.0, 'pla

[GET /best_developer_year/{year}](https://pi-ml-ops-iviw.onrender.com/best_developer_year/2013)

Returns the top 3 developer most recommended by users for a given year:

In [10]:
@app.get("/best_developer_year/{year}")
def best_developer_year(year: int):
    # Filter data
    developer_reviews = users_reviews.merge(steam_games[["game_id", "developer"]], on="game_id", how="left")
    developer_reviews = developer_reviews[developer_reviews["date"].dt.year == year ]
    # Count recommendations
    positive_sentiment_items = developer_reviews[developer_reviews["sentiment"] == 2 ]
    positive_sentiment_items = positive_sentiment_items.groupby("developer")["sentiment"].count()
    positive_sentiment_items = pd.DataFrame(positive_sentiment_items).reset_index().sort_values(by="sentiment", ascending=False)
    # Count positive analysis
    recommended_items = developer_reviews[developer_reviews["recommend"] == True ]
    recommended_items = recommended_items.groupby("developer")["recommend"].count()
    recommended_items = pd.DataFrame(recommended_items).reset_index().sort_values(by="recommend", ascending=False)
    # Sort and return
    top_developers = pd.merge(positive_sentiment_items, recommended_items, on="developer")
    top_developers["recomendations"] = top_developers["sentiment"] + top_developers["recommend"]
    return [{"Position 1:": top_developers.iloc[0]["developer"]},
            {"Position 2:": top_developers.iloc[1]["developer"]},
            {"Position 3:": top_developers.iloc[2]["developer"]}]
    
best_developer_year(2010)

[{'Position 1:': 'Valve'},
 {'Position 2:': 'Tripwire Interactive'},
 {'Position 3:': 'DONTNOD Entertainment,Feral Interactive (Mac),Feral Interactive (Linux)'}]

[GET /developer_reviews_analysis/{developer}](https://pi-ml-ops-iviw.onrender.com/developer_reviews_analysis/Ubisoft)

Returns the positive and negative sentiment count for a given developer:

In [12]:
@app.get("/developer_reviews_analysis/{developer}")
def developer_reviews_analysis(developer: str):
    # Filter data
    developer_reviews = users_reviews.merge(steam_games[["game_id", "developer"]], on="game_id", how="left")
    developer_reviews = developer_reviews[developer_reviews["developer"] == developer ]
    if developer_reviews.empty:
        return {"error": f"No records found for developer: {developer}"}
    # Calculate positive and negative sentiment and return
    sentiment_count = developer_reviews["sentiment"].value_counts()
    return {"Negative": sentiment_count[0],
            "Positive": sentiment_count[2]}

developer_reviews_analysis("Valve")

{'Negative': 802, 'Positive': 3471}

[GET /recommend_game/{game_id}](https://pi-ml-ops-iviw.onrender.com/recommend_game/10)

Returns a list of 5 games recommendations for a given game id:

In [5]:
@app.get("/recommend_game/{game_id}")
def recommend_similar_games(game_id):
    # Find game index
    game_index = model_data[model_data['game_id'] == game_id].index[0]
    # Filter games bases on date
    year = model_data["date"].dt.year[game_index]
    five_years_ago = year - 5
    five_years_later = year + 5
    filtered_data = model_data[(model_data["date"].dt.year >= five_years_ago) & (model_data["date"].dt.year <= five_years_later)]
    # Filter games based on price
    price = model_data["price_discr"][game_index]
    upper_price = price + 1
    lower_price = price - 1
    filtered_data = filtered_data[(filtered_data["price_discr"] >= lower_price) & (filtered_data["price_discr"] <= upper_price)]
    # Filter games based on score
    score = model_data["score"][game_index]
    upper_score = score + 1
    lower_score = score - 1
    filtered_data = filtered_data[(filtered_data["score"] >= lower_score) & (filtered_data["score"] <= upper_score)]
    # Find new game index
    filtered_data = filtered_data.reset_index(drop=True)
    new_game_index = filtered_data[filtered_data['game_id'] == game_id].index[0]
    # Create similarity matrix based on popular genres for filtered_data
    popular_genres_matrix = vectorizer.fit_transform(filtered_data['popular_genres'])
    popular_genres_similarity_matrix = cosine_similarity(popular_genres_matrix, popular_genres_matrix)
    # Create similarity matrix based on common genres for filtered_data
    common_genres_matrix = vectorizer.fit_transform(filtered_data['common_genres'])
    common_genres_similarity_matrix = cosine_similarity(common_genres_matrix, common_genres_matrix)
    # Create similarity matrix based on unpopular genres for filtered_data
    unpopular_genres_matrix = vectorizer.fit_transform(filtered_data['unpopular_genres'])
    unpopular_genres_similarity_matrix = cosine_similarity(unpopular_genres_matrix, unpopular_genres_matrix)
    # Combine the similarity matrices for the three genre categories with choosen importance
    similarity_matrix = popular_genres_similarity_matrix + common_genres_similarity_matrix * 1.5 + unpopular_genres_similarity_matrix * 2
    # Adjust the game indices for the filtered_data
    similar_game_indices = similarity_matrix[new_game_index].argsort()[::-1][1:6]
    # Obtain the names of similar games and their similarity scores
    similar_game_scores = similarity_matrix[new_game_index][similar_game_indices]
    similar_game_titles = filtered_data.iloc[similar_game_indices]['title'].tolist()
    # Create a DataFrame with titles and similarity scores
    similar_games_dict = {'recommendations': similar_game_titles}
    return similar_games_dict

recommend_similar_games(10)

{'recommendations': ['Counter-Strike: Source',
  'Quake III Arena',
  'Counter-Strike: Condition Zero',
  'Commandos: Behind Enemy Lines',
  'STAR WARS™ Republic Commando™']}