# Exploratory Data Analysis for VCE Papers

This notebook is intended for exploratory data analysis (EDA) of the VCE Methods and Specialist Maths papers. It will include visualizations, data summaries, and insights that can help in understanding the dataset and guiding the model training process.

In [None]:
# Import necessary libraries
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set visualization style
sns.set(style='whitegrid')

# Define paths to processed data
methods_data_path = '../data/processed/methods/'
specialist_data_path = '../data/processed/specialist/'

# Load processed data
methods_data = pd.read_csv(os.path.join(methods_data_path, 'processed_data.csv'))
specialist_data = pd.read_csv(os.path.join(specialist_data_path, 'processed_data.csv'))

# Display the first few rows of the Methods data
print('Methods Data:')
display(methods_data.head())

# Display the first few rows of the Specialist data
print('Specialist Data:')
display(specialist_data.head())

# Visualize the distribution of question types in Methods data
plt.figure(figsize=(10, 6))
sns.countplot(data=methods_data, x='question_type')
plt.title('Distribution of Question Types in VCE Methods')
plt.xlabel('Question Type')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.show()

# Visualize the distribution of question types in Specialist data
plt.figure(figsize=(10, 6))
sns.countplot(data=specialist_data, x='question_type')
plt.title('Distribution of Question Types in VCE Specialist Maths')
plt.xlabel('Question Type')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.show()

# Further analysis can be added here based on the dataset characteristics.