# Student AI Usage - Exploratory Data Analysis

This notebook explores the dataset to understand how AI tools impact student performance.

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px

# Load Data
df = pd.read_csv('../data/processed/cleaned_data.csv')
df.head()

## 1. Distribution of Users

In [None]:
sns.countplot(x='user_category', data=df)
plt.title('AI Users vs Non-Users')
plt.show()

## 2. Impact on Grades
Does using AI correlate with better grades?

In [None]:
plt.figure(figsize=(10,6))
sns.boxplot(x='user_category', y='grade_improvement', data=df)
plt.title('Grade Improvement by AI Usage')
plt.show()

## 3. Tool Popularity

In [None]:
tool_counts = df[df['ai_tools_used'] != 'None']['ai_tools_used'].value_counts()
fig = px.bar(tool_counts, title='Most Popular AI Tools')
fig.show()

## 4. Correlation Analysis

In [None]:
numeric_df = df.select_dtypes(include=['float64', 'int64'])
sns.heatmap(numeric_df.corr(), annot=True, cmap='coolwarm')
plt.title('Correlation Matrix')
plt.show()