# Exploration Notebook

This notebook is used for exploratory data analysis (EDA) on the data retrieved from BigQuery for the WBR methodology. It allows for interactive data exploration and visualization.

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from google.cloud import bigquery
from src.data.bigquery_client import BigQueryClient
from src.data.queries.wbr import QUERY

# Initialize BigQuery client
client = BigQueryClient()

# Fetch data from BigQuery
data = client.query(QUERY).to_dataframe()

# Display the first few rows of the data
data.head()

In [None]:
# Data exploration and visualization

# Summary statistics
data.describe()

# Visualize distributions of key metrics
plt.figure(figsize=(12, 6))
sns.histplot(data['metric_column'], bins=30, kde=True)
plt.title('Distribution of Metric Column')
plt.xlabel('Metric Column')
plt.ylabel('Frequency')
plt.show()

In [None]:
# Correlation heatmap
plt.figure(figsize=(10, 8))
correlation_matrix = data.corr()
sns.heatmap(correlation_matrix, annot=True, fmt='.2f', cmap='coolwarm', square=True)
plt.title('Correlation Heatmap')
plt.show()

In [None]:
# Save the cleaned and processed data for further analysis
data.to_csv('processed_data.csv', index=False)
print('Processed data saved to processed_data.csv')