In [None]:
import numpy as np  
import pandas as pd  
import matplotlib.pyplot as plt  
import seaborn as sns  
from sklearn.model_selection import train_test_split  
from sklearn.linear_model import LinearRegression  
from sklearn.metrics import mean_squared_error, r2_score  

In [None]:
df = pd.read_csv('csv/prelim_datasets/gameandgrade.csv')

In [None]:
df.head()

In [None]:
# remove the double dots from a certain entry from the Grade column (very sneaky)

df["Grade"] = df["Grade"].str.replace(r"\.\.", ".", regex=True)

In [None]:
x = df[["Playing Hours"]]
y = df["Grade"]

print(x)

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.4, random_state=42)

In [None]:
model = LinearRegression()
model.fit(x_train, y_train)

In [None]:
print(f"Intercept (b): {model.intercept_:.2f}")  # The base score when all features are 0
coefficients = pd.DataFrame(model.coef_, x.columns, columns=['Coefficient'])
print(coefficients)


In [None]:
y_pred = model.predict(x_test)

In [None]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"R² Score: {r2:.2f}")

In [None]:
# Create scatter plot
plt.figure(figsize=(40, 20))
sns.scatterplot(x=y_test.values.flatten(), y=y_test.values.flatten(), color="blue", label="Actual", alpha=0.6)  # Actual values
sns.scatterplot(x=y_test.values.flatten(), y=y_pred.flatten(), color="red", label="Predicted", alpha=0.6)  # Predicted values

# Plot a reference diagonal line (perfect predictions)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], color="black", linestyle="--", label="Perfect Fit")

# Labels, title, and legend
plt.xlabel("Actual Assessment of Grade")
plt.ylabel("Predicted Assessment of Grade")
plt.title("Actual vs. Predicted Assessment of Grade")
plt.legend()
plt.show()