#Student Performance Analysis

StudentPerformance/
├── student_performance.py
├── student_data.csv  # <-- Sample dataset (I'll generate it for you)

In [1]:

pip install pandas numpy matplotlib seaborn scikit-learn



In [None]:
#student_data.csv Dataset file creation
import pandas as pd
import numpy as np
import os

# Set seed for reproducibility
np.random.seed(42)

# Generate sample data
num_students = 200
hours_studied = np.random.normal(5, 2, num_students).clip(0, 10)  # Normally distributed
attendance_rate = np.random.uniform(60, 100, num_students)  # Percentage
# Performance is influenced by both, plus some noise
performance_score = (hours_studied * 6 + attendance_rate * 0.4 + np.random.normal(0, 5, num_students)).clip(0, 100)

# Create DataFrame
df = pd.DataFrame({
    "Hours_Studied": hours_studied,
    "Attendance_Rate": attendance_rate,
    "Performance_Score": performance_score
})

# Save to CSV
output_path = "/mnt/data/student_data.csv"
df.to_csv(output_path, index=False)

output_path

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler

# Load dataset
df = pd.read_csv("student_data.csv")

# EDA
print("First 5 rows of data:\n", df.head())
print("\nDataset info:")
print(df.info())
print("\nSummary statistics:")
print(df.describe())

# Correlation heatmap
plt.figure(figsize=(6,4))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.title("Feature Correlation")
plt.tight_layout()
plt.show()

# Feature selection
X = df[["Hours_Studied", "Attendance_Rate"]]
y = df["Performance_Score"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scaling features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train model
model = LinearRegression()
model.fit(X_train_scaled, y_train)

# Predict
y_pred = model.predict(X_test_scaled)

# Evaluation
print("\nModel Coefficients:", model.coef_)
print("Model Intercept:", model.intercept_)
print("R² Score:", r2_score(y_test, y_pred))
print("Mean Absolute Error:", mean_absolute_error(y_test, y_pred))

# Visualization
plt.scatter(y_test, y_pred)
plt.xlabel("Actual Performance")
plt.ylabel("Predicted Performance")
plt.title("Actual vs Predicted Performance")
plt.grid(True)
plt.tight_layout()
plt.show()