In [79]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [80]:
df = pd.read_csv("../data/steam.csv", index_col=0)

In [81]:
df.head()

Unnamed: 0_level_0,name,release_date,english,developer,publisher,platforms,required_age,categories,genres,steamspy_tags,achievements,positive_ratings,negative_ratings,average_playtime,median_playtime,owners,price
appid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
10,Counter-Strike,2000-11-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Online Multi-Player;Local Multi-P...,Action,Action;FPS;Multiplayer,0,124534,3339,17612,317,10000000-20000000,7.19
20,Team Fortress Classic,1999-04-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Online Multi-Player;Local Multi-P...,Action,Action;FPS;Multiplayer,0,3318,633,277,62,5000000-10000000,3.99
30,Day of Defeat,2003-05-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Valve Anti-Cheat enabled,Action,FPS;World War II;Multiplayer,0,3416,398,187,34,5000000-10000000,3.99
40,Deathmatch Classic,2001-06-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Online Multi-Player;Local Multi-P...,Action,Action;FPS;Multiplayer,0,1273,267,258,184,5000000-10000000,3.99
50,Half-Life: Opposing Force,1999-11-01,1,Gearbox Software,Valve,windows;mac;linux,0,Single-player;Multi-player;Valve Anti-Cheat en...,Action,FPS;Action;Sci-fi,0,5250,288,624,415,5000000-10000000,3.99


In [82]:
df["positive_ratio"] = df["positive_ratings"] / (df["positive_ratings"] + df["negative_ratings"])
df = df[(df['positive_ratings'] + df['negative_ratings']) >= 500]
df_encoded = pd.get_dummies(df)


In [83]:
class LinearRegressor(torch.nn.Module):
    def __init__(self, input_dim):
        super(LinearRegressor, self).__init__()
        self.linear = nn.Linear(input_dim, 1)
    
    def forward(self, x):
        predict_y = self.linear(x)
        return predict_y

In [84]:
X = df_encoded.drop("positive_ratio", axis=1)
y = df_encoded["positive_ratio"]
label_encoder = LabelEncoder()
X = X.apply(label_encoder.fit_transform)

In [85]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
X_train = torch.from_numpy(X_train.values).float()
X_test = torch.from_numpy(X_test.values).float()
y_train = torch.from_numpy(y_train.values).float()
y_test = torch.from_numpy(y_test.values).float()

In [86]:
model = LinearRegressor(X_train.shape[1])
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_fn = nn.MSELoss()

In [88]:
for epoch in range(100):
    y_pred = model(X_train)
    loss = loss_fn(y_pred.squeeze(1), y_train)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

print(y_pred, y_train)

with torch.no_grad():
    y_test_pred = model(X_test)
    test_loss = loss_fn(y_test_pred.squeeze(1), y_test)
    print(f"Test Loss: {test_loss.item():.4f}")

Epoch 1, Loss: 0.0213
Epoch 2, Loss: 0.0209
Epoch 3, Loss: 0.0203
Epoch 4, Loss: 0.0197
Epoch 5, Loss: 0.0193
Epoch 6, Loss: 0.0189
Epoch 7, Loss: 0.0185
Epoch 8, Loss: 0.0180
Epoch 9, Loss: 0.0176
Epoch 10, Loss: 0.0172
Epoch 11, Loss: 0.0168
Epoch 12, Loss: 0.0164
Epoch 13, Loss: 0.0160
Epoch 14, Loss: 0.0156
Epoch 15, Loss: 0.0153
Epoch 16, Loss: 0.0149
Epoch 17, Loss: 0.0146
Epoch 18, Loss: 0.0142
Epoch 19, Loss: 0.0139
Epoch 20, Loss: 0.0136
Epoch 21, Loss: 0.0133
Epoch 22, Loss: 0.0130
Epoch 23, Loss: 0.0127
Epoch 24, Loss: 0.0124
Epoch 25, Loss: 0.0121
Epoch 26, Loss: 0.0118
Epoch 27, Loss: 0.0116
Epoch 28, Loss: 0.0113
Epoch 29, Loss: 0.0110
Epoch 30, Loss: 0.0108
Epoch 31, Loss: 0.0106
Epoch 32, Loss: 0.0103
Epoch 33, Loss: 0.0101
Epoch 34, Loss: 0.0099
Epoch 35, Loss: 0.0096
Epoch 36, Loss: 0.0094
Epoch 37, Loss: 0.0092
Epoch 38, Loss: 0.0090
Epoch 39, Loss: 0.0088
Epoch 40, Loss: 0.0086
Epoch 41, Loss: 0.0084
Epoch 42, Loss: 0.0082
Epoch 43, Loss: 0.0081
Epoch 44, Loss: 0.00

In [90]:
torch.save(model, "./model.pt")

In [91]:
print(type(model))

<class '__main__.LinearRegressor'>
