# Brent Oil Price Analysis - Data Exploration
This notebook explores the Brent crude oil price dataset, performs initial data cleaning, and conducts exploratory data analysis (EDA).

In [None]:
# Import required libraries
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Add project root to path
sys.path.append(os.path.abspath(os.path.join('..', '..')))

# Import project modules
from src.data.preprocessing import load_data, prepare_time_series
from src.visualization.plots import plot_price_series

## 1. Load and Inspect Data

In [None]:
# Load the data
data_dir = os.path.join('..', 'data')
raw_data_path = os.path.join(data_dir, 'raw', 'brent_prices.csv')

# Load and preprocess the data
df = load_data(raw_data_path)

# Display basic information
print(f"Dataset shape: {df.shape}")
display(df.head())
display(df.describe())

# Check for missing values
print("\nMissing values:")
print(df.isnull().sum())

## 2. Time Series Visualization

In [None]:
# Prepare time series data
prices, returns = prepare_time_series(df, resample_freq='M')

# Plot the price series
plt.figure(figsize=(14, 7))
plt.plot(prices.index, prices.values, linewidth=1)
plt.title('Brent Crude Oil Price History')
plt.xlabel('Year')
plt.ylabel('Price (USD/barrel)')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## 3. Statistical Analysis

In [None]:
# Calculate and plot rolling statistics
rolling_window = 12  # 12 months
rolling_mean = prices.rolling(window=rolling_window).mean()
rolling_std = prices.rolling(window=rolling_window).std()

plt.figure(figsize=(14, 7))
plt.plot(prices.index, prices.values, label='Monthly Price', alpha=0.5)
plt.plot(rolling_mean.index, rolling_mean.values, label=f'{rolling_window}-Month Rolling Mean', color='red')
plt.fill_between(
    rolling_mean.index,
    (rolling_mean - 2 * rolling_std).values,
    (rolling_mean + 2 * rolling_std).values,
    color='gray',
    alpha=0.2,
    label='Rolling Std Dev'
)

plt.title('Brent Crude Oil Price with Rolling Statistics')
plt.xlabel('Year')
plt.ylabel('Price (USD/barrel)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## 4. Load and Explore Event Data

In [None]:
# Load event data
events_path = os.path.join(data_dir, 'events', 'geopolitical_events.csv')

if os.path.exists(events_path):
    events = pd.read_csv(events_path, parse_dates=['event_date'])
    print(f"Loaded {len(events)} events")
    display(events.head())
    
    # Plot events on price chart
    fig, ax = plt.subplots(figsize=(14, 7))
    ax.plot(prices.index, prices.values, linewidth=1, label='Price')
    
    # Plot different event types with different markers/colors
    event_types = events['event_type'].unique()
    colors = plt.cm.tab10(range(len(event_types)))
    
    for i, event_type in enumerate(event_types):
        type_events = events[events['event_type'] == event_type]
        event_dates = [pd.to_datetime(date) for date in type_events['event_date']]
        
        # Get prices at event dates
        event_prices = [prices[date] if date in prices.index else None for date in event_dates]
        
        # Plot events
        ax.scatter(
            event_dates,
            event_prices,
            color=colors[i],
            label=f"{event_type} Events",
            alpha=0.7,
            s=100,
            marker='o' if i % 2 == 0 else 's'
        )
    
    plt.title('Brent Crude Oil Price with Major Events')
    plt.xlabel('Year')
    plt.ylabel('Price (USD/barrel)')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()
else:
    print("Event data not found. Please add event data to the 'data/events/' directory.")

## 5. Save Processed Data

In [None]:
# Create processed data directory if it doesn't exist
processed_dir = os.path.join(data_dir, 'processed')
os.makedirs(processed_dir, exist_ok=True)

# Save processed data
prices.to_csv(os.path.join(processed_dir, 'monthly_prices.csv'))
returns.to_csv(os.path.join(processed_dir, 'monthly_returns.csv'))

print(f"Processed data saved to {processed_dir}")