In [0]:
import pandas as pd
import numpy as np

# Set seed for reproducibility
np.random.seed(42)

# Generate 30 students' data
n = 300
study_hours = np.random.randint(1, 11, n)           # 1 to 10 hours
attendance = np.random.randint(50, 101, n)          # 50% to 100%
previous_score = np.random.randint(30, 91, n)       # 30 to 90
noise = np.random.normal(0, 3, n)                   # small random noise

# Assume a weighted sum formula for final score
# final_score = (
#     0.4 * study_hours +
#     0.3 * (attendance / 10) +
#     0.3 * (previous_score / 2)
# ) + noise

final_score = (
    0.3 * study_hours +
    0.3 * (attendance / 10) +
    0.4 * previous_score + 10  # no division
) + np.random.normal(0, 3, n)


# Round and cap the scores
final_score = np.clip(np.round(final_score, 1), 0, 100)

# Create DataFrame
df = pd.DataFrame({
    'study_hours': study_hours,
    'attendance': attendance,
    'previous_score': previous_score,
    'final_score': final_score
})

df.head(10)


In [0]:
import numpy as np

# Add some random noise to the final score
#df['final_score'] = df['final_score'] + np.random.normal(0, 2, size=len(df))


In [0]:
from sklearn.model_selection import train_test_split

X = df[['study_hours', 'attendance', 'previous_score']]
y = df['final_score']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [0]:
display(X_test.assign(final_score=y_test.values))

In [0]:
from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(X_train, y_train)


In [0]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

y_pred = model.predict(X_test)

print("Mean Absolute Error:", mean_absolute_error(y_test, y_pred))
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))
print("R2 Score:", r2_score(y_test, y_pred))


In [0]:
new_data = pd.DataFrame({
    'study_hours': [3],
    'attendance': [89],
    'previous_score': [82]
})

predicted_score = model.predict(new_data)
print(f"Predicted Final Score: {predicted_score[0]:.2f}")


In [0]:
new_data = pd.DataFrame({
    'study_hours': [7],
    'attendance': [92],
    'previous_score': [66]
})

predicted_score = model.predict(new_data)
print(f"Predicted Final Score: {predicted_score[0]:.2f}")


In [0]:
import matplotlib.pyplot as plt

plt.scatter(y_test, y_pred)
plt.xlabel("Actual Final Scores")
plt.ylabel("Predicted Final Scores")
plt.title("Actual vs Predicted Scores")
plt.grid(True)
plt.show()
