# API Test

In [26]:
import os
import re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import FeatureUnion
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, BaggingClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.svm import SVC, LinearSVC
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import confusion_matrix
from keras.layers import Dropout
from keras.layers import BatchNormalization
from keras.callbacks import EarlyStopping

from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import History
from keras import backend
from keras.layers import Dropout
from keras.layers import BatchNormalization
from keras.callbacks import EarlyStopping

In [27]:
FINAL_DATASET_PATH = os.path.join("final_dataset", "final_dataset.csv")

whole_steam_data = pd.read_csv(FINAL_DATASET_PATH)

In [28]:
class DataFrameSelector(BaseEstimator, TransformerMixin):
    def __init__(self, attribute_names):
        self.attribute_names = attribute_names
    def fit(self, X, y=None):
        return self
    def transform(self, X):
        return X[self.attribute_names]

class MostFrequentImputer(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        self.most_frequent_ = pd.Series([X[c].value_counts().index[0] for c in X],
                                        index=X.columns)
        return self
    def transform(self, X, y=None):
        return X.fillna(self.most_frequent_)

num_pipeline = Pipeline([
        ("select_numeric", DataFrameSelector(["english", "windows", "mac", "linux", "achievements", "average_playtime", "median_playtime", "owners", "price", "Indie", "Action", "Adventure", "Casual", "Strategy", "Simulation", "RPG", "Early Access", "Free to Play", "Puzzle", "Racing", "VR", "Sports", "Anime", "Visual Novel", "Platformer", "Point & Click", "Horror", "Nudity", "FPS", "Multiplayer", "Sexual Content", "Violent", "Gore", "Massively Multiplayer", "Hidden Object", "Open World", "Survival", "Pixel Graphics", "Space", "Female Protagonist", "Shoot 'Em Up", "RTS", "Classic", "Turn-Based", "Arcade", "Sci-fi", "Story Rich", "Tower Defense", "Zombies", "Singleplayer", "World War II", "Card Game", "RPGMaker", "Great Soundtrack", "Management", "Co-op", "Fighting", "2D", "Turn-Based Strategy", "Fantasy", "Utilities", "Rogue-like", "Sandbox", "JRPG", "Board Game", "Retro", "Stealth", "Comedy", "Shooter", "Music", "Hack and Slash", "Bullet Hell", "Atmospheric", "First-Person", "City Builder", "Historical", "Psychological Horror", "Family Friendly", "Memes", "Match 3", "Mystery", "Difficult", "Local Multiplayer", "Driving", "Design & Illustration", "Cyberpunk", "Flight", "Building", "Clicker", "Walking Simulator", "Metroidvania", "Education", "Single-player", "Steam Achievements", "Steam Trading Cards", "Steam Cloud", "Full controller support", "Multi-player", "Partial Controller Support", "Steam Leaderboards", "Online Multi-Player", "Shared/Split Screen", "Stats", "Co-op", "Local Multi-Player", "Cross-Platform Multiplayer", "Online Co-op", "Includes level editor", "Steam Workshop", "Local Co-op", "Captions available", "In-App Purchases", "MMO", "VR Support", "Commentary available", "Valve Anti-Cheat enabled", "Steam Turn Notifications", "SteamVR Collectibles", "Includes Source SDK", "Mods"])),
        ("scaler", StandardScaler()),
        ("imputer", SimpleImputer(strategy="median")),
    ])


# from future_encoders import OneHotEncoder
cat_pipeline = Pipeline([
        ("select_cat", DataFrameSelector(["desc", "name", "developer", "release_date"])),
        #("select_cat", DataFrameSelector(["developer"])),
        ("imputer", MostFrequentImputer()),
        ("cat_encoder", OneHotEncoder(sparse=False, handle_unknown='ignore')),
    ])

preprocess_pipeline = FeatureUnion(transformer_list=[
        ("num_pipeline", num_pipeline),
        ("cat_pipeline", cat_pipeline),
    ])


In [29]:
from sklearn.model_selection import train_test_split

# 500, 800, 1000, 3600, 5000, 8200, 10_000, 12_000

temp_steam_data = whole_steam_data.head(800)

X = temp_steam_data.loc[:, temp_steam_data.columns != "user_reviews"]
y = temp_steam_data["user_reviews"]

# X = whole_steam_data.loc[:, whole_steam_data.columns != "user_reviews"]
# y = whole_steam_data["user_reviews"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)


num_pipeline.fit_transform(X_train)

cat_pipeline.fit_transform(X_train)

(640, 131) (640,)
(160, 131) (160,)


array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [30]:
test_path = os.path.join("test.csv")

test_steam = pd.read_csv(test_path)


test_steam_X = test_steam.loc[:, test_steam.columns != "user_reviews"]
test_steam_y = test_steam["user_reviews"]

test_steam_X_scaled = preprocess_pipeline.transform(test_steam_X)

In [31]:
model_path = os.path.join("model")

epic_model = keras.models.load_model(model_path)

epic_model.predict(test_steam_X_scaled)






array([[0.51195014],
       [0.71519613],
       [0.58772606],
       [0.10402623],
       [0.02290966],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.7477514 ],
       [0.877059  ],
       [1.04514   ],
       [1.1546992 ],
       [1.1250917 ],
       [0.9345016 ],
       [0.18899597]], dtype=float32)