# Player Characteristics and Subscribing to a Game Related Newsletter

## Introduction

!!!

In [17]:
import altair as alt
import pandas as pd
import numpy as np
from sklearn import set_config
from sklearn.compose import make_column_transformer
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split

alt.data_transformers.enable('vegafusion')

set_config(transform_output="pandas")

In [18]:
players = pd.read_csv("https://drive.google.com/uc?export=download&id=1Mw9vW0hjTJwRWx0bDXiSpYsO3gKogaPz")
players

Unnamed: 0,experience,subscribe,hashedEmail,played_hours,name,gender,age,individualId,organizationName
0,Pro,True,f6daba428a5e19a3d47574858c13550499be23603422e6...,30.3,Morgan,Male,9,,
1,Veteran,True,f3c813577c458ba0dfef80996f8f32c93b6e8af1fa9397...,3.8,Christian,Male,17,,
2,Veteran,False,b674dd7ee0d24096d1c019615ce4d12b20fcbff12d79d3...,0.0,Blake,Male,17,,
3,Amateur,True,23fe711e0e3b77f1da7aa221ab1192afe21648d47d2b4f...,0.7,Flora,Female,21,,
4,Regular,True,7dc01f10bf20671ecfccdac23812b1b415acd42c2147cb...,0.1,Kylie,Male,21,,
...,...,...,...,...,...,...,...,...,...
191,Amateur,True,b6e9e593b9ec51c5e335457341c324c34a2239531e1890...,0.0,Bailey,Female,17,,
192,Veteran,False,71453e425f07d10da4fa2b349c83e73ccdf0fb3312f778...,0.3,Pascal,Male,22,,
193,Amateur,False,d572f391d452b76ea2d7e5e53a3d38bfd7499c7399db29...,0.0,Dylan,Prefer not to say,17,,
194,Amateur,False,f19e136ddde68f365afc860c725ccff54307dedd13968e...,2.3,Harlow,Male,17,,


In [19]:
players_plot = alt.Chart(players).mark_point(opacity = 0.6).encode(
    x = alt.X("age").title("Age"),
    y = alt.Y("played_hours").title("Time Played (Hours)").scale(type = "sqrt"),
    shape = alt.Shape("subscribe").title("Subscribed"),
    color = alt.Color("experience").title("Experience")
).properties(
    title = "Age vs Time Played",
    width = 700,
    height = 500
)
players_plot

## Method and Results

!!!

In [23]:
players_reduced = players.drop(columns = ["experience", "individualId", "organizationName", "hashedEmail", "name", "gender"])
players_reduced

Unnamed: 0,subscribe,played_hours,age
0,True,30.3,9
1,True,3.8,17
2,False,0.0,17
3,True,0.7,21
4,True,0.1,21
...,...,...,...
191,True,0.0,17
192,False,0.3,22
193,False,0.0,17
194,False,2.3,17


In [32]:
np.random.seed(2025)

players_train, players_test = train_test_split(players_reduced, train_size = 0.75, stratify = players_reduced["subscribe"])

X_train = players_train[["played_hours", "age"]]
y_train = players_train["subscribe"]

X_test = players_test[["played_hours", "age"]]
y_test = players_test["subscribe"]

players_preprocessor = make_column_transformer(
    (StandardScaler(), ["played_hours", "age"]),
    remainder = "passthrough",
)

knn = KNeighborsClassifier()
players_pipe = make_pipeline(players_preprocessor, knn)


## Discussion

!!!

In [None]:
 To do: use classifier, scale, test/split, choose k, find RMSPE, predict, plot