# Online vs Offline Learning - Student Performance Analysis

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error, accuracy_score, classification_report


### Load the cleaned dataset

In [None]:
df = pd.read_csv("combined_student_data.csv")
df.head()

### Visualize Grade Distribution by Learning Mode

In [None]:
sns.histplot(data=df, x='G3', hue='learning_mode', kde=True, bins=20)
plt.title('Distribution of Final Grades')
plt.show()

### Correlation Heatmaps

In [None]:
offline_df = df[df['learning_mode'] == 'offline']
online_df = df[df['learning_mode'] == 'online']

sns.heatmap(offline_df[['engagement', 'absences', 'G3']].corr(), annot=True)
plt.title('Offline Correlation Heatmap')
plt.show()

sns.heatmap(online_df[['engagement', 'resources', 'discussion', 'absences', 'G3']].corr(), annot=True)
plt.title('Online Correlation Heatmap')
plt.show()

### Predictive Modeling - Offline (Linear Regression)

In [None]:
X_off = offline_df[['engagement', 'absences']]
y_off = offline_df['G3']

X_train_o, X_test_o, y_train_o, y_test_o = train_test_split(X_off, y_off, test_size=0.2, random_state=42)
model_off = LinearRegression()
model_off.fit(X_train_o, y_train_o)
y_pred_off = model_off.predict(X_test_o)

print("Offline R²:", r2_score(y_test_o, y_pred_off))
print("Offline RMSE:", mean_squared_error(y_test_o, y_pred_off, squared=False))

### Predictive Modeling - Online (Logistic Regression)

In [None]:
X_on = online_df[['engagement', 'resources', 'discussion', 'absences']]
y_on = online_df['G3']

X_train_on, X_test_on, y_train_on, y_test_on = train_test_split(X_on, y_on, test_size=0.2, random_state=42)
model_on = LogisticRegression(max_iter=200)
model_on.fit(X_train_on, y_train_on)
y_pred_on = model_on.predict(X_test_on)

print("Online Accuracy:", accuracy_score(y_test_on, y_pred_on))
print("Classification Report:")
print(classification_report(y_test_on, y_pred_on))