In [None]:
import numpy as np  
import pandas as pd  
import matplotlib.pyplot as plt  
import seaborn as sns  
from sklearn.model_selection import train_test_split  
from sklearn.linear_model import LinearRegression  
from sklearn.metrics import mean_squared_error, r2_score  

In [None]:
df = pd.read_csv('csv/StudentPerformanceFactors.csv')

In [None]:
plt.figure(figsize=(8, 6))
plt.scatter(x=df["Hours_Studied"], y=df["Exam_Score"], color="blue")
plt.title('Hours studied vs Exam Score')
plt.xlabel('Hours studied')
plt.ylabel('Exam score')
plt.show()

In [None]:
x = df[["Hours_Studied"]]
y = df["Exam_Score"]

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [None]:
print("Training Set:")
print(x_train)
print(y_train)

print("\nTesting Set:")
print(x_test)
print(y_test)

In [None]:
model = LinearRegression()
model.fit(x_train, y_train)

In [None]:
print(f"Intercept: {model.intercept_:.2f}")
print(f"Coefficient: {model.coef_[0]:.2f}")

In [None]:
y_pred = model.predict(x_test)

plt.figure(figsize=(8, 6))

# Scatterplot of the training set
sns.scatterplot(x=x_test.values.flatten(), y=y_test.values.flatten(), color="blue", label="Actual test scores")

# Scatterplot of predicted values
sns.scatterplot(x=x_test.values.flatten(), y=y_pred, color="red", label="Predicted test scores")

plt.xlabel("Hours studied")
plt.ylabel("Exam score")
plt.title("Actual vs Predicted exam scores")
plt.legend()
plt.show()


In [None]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean squared error: {mse:.2f}")
print(f"R^2 score: {r2:.2f}")

In [None]:
exam_score_variance = np.var(y_test)
print(f"Exam score variance: {exam_score_variance:.2f}")