In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
data = pd.read_csv("Video Games.csv")
data.head()

Unnamed: 0,id,platform,year,genre,publisher,na_sales,eu_sales,other_sales,jp_sales
0,2830,PSV,2012,Racing,Electronic Arts,0.38,0.52,0.25,1.01
1,3865,SNES,1996,Role-Playing,ASCII Entertainment,0.0,0.0,0.0,1.31
2,4617,PSP,2009,Sports,Takara Tomy,0.09,0.0,0.01,1.04
3,3675,DS,2007,Fighting,Takara Tomy,0.0,0.0,0.0,1.08
4,1612,PSV,2016,Action,Namco Bandai Games,0.0,0.0,0.0,1.12


In [38]:
class LinearRegression:
    def __init__(self, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8):
        self.learning_rate = learning_rate
        self.beta1 = beta1
        self.beta2 = beta2
        self.epsilon = epsilon
        self.m = None
        self.v = None
        self.t = 0

    def fit(self, X, y, epochs=100, batch_size=None):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0
        self.m = np.zeros_like(self.weights)
        self.v = np.zeros_like(self.weights)

        for epoch in range(epochs):
            if batch_size:
                indices = np.random.choice(n_samples, batch_size, replace=False)
                X_batch = X[indices]
                y_batch = y.iloc[indices]
            else:
                X_batch = X
                y_batch = y

            predictions = self.predict(X_batch)
            error = predictions - y_batch

            gradient_weights = 2 * np.dot(X_batch.T, error) / n_samples
            gradient_bias = 2 * np.mean(error)

            self.t += 1
            self.m = self.beta1 * self.m + (1 - self.beta1) * gradient_weights
            self.v = self.beta2 * self.v + (1 - self.beta2) * (gradient_weights ** 2)
            m_hat = self.m / (1 - self.beta1 ** self.t)
            v_hat = self.v / (1 - self.beta2 ** self.t)

            self.weights -= self.learning_rate * m_hat / (np.sqrt(v_hat) + self.epsilon)
            self.bias -= self.learning_rate * gradient_bias

            if epoch % 100 == 0:
                mse = np.mean(np.square(error))
                print(f'Epoch {epoch}, MSE: {mse}')

    def predict(self, X):
        return np.dot(X, self.weights) + self.bias
    def score(self, X, y):
        predictions = self.predict(X)
        ssr = np.sum((predictions - y.mean())**2)
        sse = np.sum((predictions - y)**2)
        r_squared = 1 - (sse / ssr)
        return r_squared

In [39]:
X = data.drop(columns=['jp_sales'])
y = data['jp_sales']

In [41]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

model = LinearRegression()
model.fit(X_train, y_train, epochs=1000, batch_size=16)

predictions = model.predict(X_test)


Epoch 0, MSE: 1.2190687495305152
Epoch 100, MSE: 0.7800341383760827
Epoch 200, MSE: 0.5357228682793461
Epoch 300, MSE: 0.41313755029072374
Epoch 400, MSE: 0.28979470924532524
Epoch 500, MSE: 0.19370179780725216
Epoch 600, MSE: 0.817946003799601
Epoch 700, MSE: 0.07887819450251783
Epoch 800, MSE: 0.1145327962823938
Epoch 900, MSE: 0.041362037863186235


In [36]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
X = data.drop('jp_sales', axis=1)
X = (X - np.min(X, axis=0)) / (np.max(X, axis=0) - np.min(X, axis=0))
X = np.hstack((np.ones((X.shape[0], 1)), X))
Y = data['jp_sales']

class MultipleRegression:
    def __init__(self, learning_rate=0.01, iterations=3000):
        self.learning_rate = learning_rate
        self.iterations = iterations
        self.coefficients = None
        self.errors = []

    def fit(self, X, Y):
        self.coefficients = np.zeros(X.shape[1])
        for _ in range(self.iterations):
            predictions = X @ self.coefficients
            errors = predictions - Y
            gradient = 2 * X.T @ errors / len(Y)
            self.coefficients -= self.learning_rate * gradient

            self.errors.append(np.mean(errors**2))

    def predict(self, X):
        return X @ self.coefficients

model = MultipleRegression(learning_rate=0.01, iterations=3000)
model.fit(X, Y)


predictions = model.predict(X)

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

model = MultipleRegression(learning_rate=0.01, iterations=10000)
model.fit(X_train, Y_train)

predictions_train = model.predict(X_train)

mse_train = mean_squared_error(Y_train, predictions_train)

print(f'MSE: {mse_train}')

MSE: 0.18408787757675396
