In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
import seaborn as sns

data = pd.read_csv('StudentPerformance.csv')


data = data.drop('Student ID', axis=1)

data['Gender'] = data['Gender'].map({'Male': 0, 'Female': 1})

grade_mapping = {'A': 4, 'B': 3, 'C': 2, 'D': 1}
data['Grade'] = data['Grade'].map(grade_mapping)

if data.isnull().any().any():
    print("Error: Missing values in dataset. Please check the CSV file.")
    exit()

X = data.drop('Grade', axis=1)
y = data['Grade']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

model = LinearRegression()
model.fit(X_train_scaled, y_train)

y_pred = model.predict(X_test_scaled)

mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse:.2f}")
print(f"Root Mean Squared Error: {rmse:.2f}")
print(f"R-squared Score: {r2:.2f}")

feature_names = X.columns
coefficients = pd.DataFrame(model.coef_, index=feature_names, columns=['Coefficient'])
print("\nFeature Coefficients:")
print(coefficients)

print("\nFeature Ranges for Reference:")
print(X.describe().loc[['min', 'max']].transpose())

print("\nEnter values for a new student to predict their grade:")
try:
    gender = input("Gender (Male/Female): ").strip().capitalize()
    gender = 0 if gender == 'Male' else 1 if gender == 'Female' else None
    if gender is None:
        raise ValueError("Gender must be 'Male' or 'Female'.")
    age = float(input("Age (17-19): "))
    study_hours = float(input("Study Hours (2-6): "))
    test_scores = float(input("Previous Test Scores (50-93): "))

    if not (17 <= age <= 19):
        raise ValueError("Age must be between 17 and 19.")
    if not (2 <= study_hours <= 6):
        raise ValueError("Study Hours must be between 2 and 6.")
    if not (50 <= test_scores <= 93):
        raise ValueError("Previous Test Scores must be between 50 and 93.")

    user_input = pd.DataFrame([[gender, age, study_hours, test_scores]], 
                              columns=feature_names)
    
    user_input_scaled = scaler.transform(user_input)
    
    predicted_grade_num = model.predict(user_input_scaled)[0]
    
    predicted_grade = 'A' if predicted_grade_num >= 3.5 else 'B' if predicted_grade_num >= 2.5 else 'C' if predicted_grade_num >= 1.5 else 'D'
    print(f"\nPredicted Grade: {predicted_grade} (Numerical: {predicted_grade_num:.2f})")

except ValueError as e:
    print(f"Error: {e}. Please enter valid inputs.")

plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred, alpha=0.5, label='Test Data')
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2, label='Perfect Prediction')
plt.xlabel('Actual Grade (Numerical)')
plt.ylabel('Predicted Grade (Numerical)')
plt.title('Actual vs Predicted Grades')
plt.legend()
plt.tight_layout()
plt.savefig('actual_vs_predicted_grades.png')
plt.close()

plt.figure(figsize=(10, 6))
sns.barplot(x=coefficients['Coefficient'], y=coefficients.index)
plt.title('Feature Importance in Linear Regression Model')
plt.xlabel('Coefficient Value')
plt.tight_layout()
plt.savefig('feature_importance_sp.png')
plt.close()

Mean Squared Error: 0.07
Root Mean Squared Error: 0.27
R-squared Score: 0.90

Feature Coefficients:
                      Coefficient
Gender                   0.050162
Age                      0.042669
Study Hours              0.329103
Previous Test Scores     0.543460

Feature Ranges for Reference:
                       min   max
Gender                 0.0   1.0
Age                   17.0  19.0
Study Hours            2.0   6.0
Previous Test Scores  50.0  93.0

Enter values for a new student to predict their grade:


Gender (Male/Female):  Male
Age (17-19):  18
Study Hours (2-6):  3
Previous Test Scores (50-93):  87



Predicted Grade: B (Numerical: 2.98)
