In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from ucimlrepo import fetch_ucirepo

In [None]:
print("Loading Forest Fires dataset...")
df = pd.read_csv('data/forestfires.csv')
print(f"Dataset shape: {df.shape}")
df.head()

In [None]:
print("Dataset Info:")
print(df.info())
print("\nMissing values:")
print(df.isnull().sum())

In [None]:
# Target variable distribution
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.hist(df['area'], bins=50, alpha=0.7)
plt.title('Distribution of Burned Area')
plt.xlabel('Area (ha)')
plt.ylabel('Frequency')

plt.subplot(1, 2, 2)
plt.hist(np.log(df['area'] + 1), bins=50, alpha=0.7)
plt.title('Distribution of Log-transformed Area')
plt.xlabel('Log(Area + 1)')
plt.ylabel('Frequency')

plt.tight_layout()
plt.show()

In [None]:
# Correlation matrix
numeric_cols = df.select_dtypes(include=[np.number]).columns
plt.figure(figsize=(10, 8))
sns.heatmap(df[numeric_cols].corr(), annot=True, cmap='coolwarm', center=0)
plt.title('Correlation Matrix')
plt.show()