In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
from scipy import stats
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Set style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

In [4]:
# 1. DATA LOADING AND SETUP 

print("="*80)
print("WORLD TRADE EXPLORATORY DATA ANALYSIS")
print("="*80)

# Load cleaned data
df = pd.read_csv('../notebooks/world_trade_cleaned.csv')
df['Year'] = pd.to_datetime(df['Year'])

print(f"Dataset shape: {df.shape}")
print(f"Years covered: {df['Year_Value'].min()} - {df['Year_Value'].max()}")
print(f"Number of countries: {df['Is_Country'].sum()}")
print(f"Number of regions: {df['Is_Region'].sum()}")



WORLD TRADE EXPLORATORY DATA ANALYSIS
Dataset shape: (8096, 42)
Years covered: 1988 - 2021
Number of countries: 7695
Number of regions: 401


In [5]:
# 2. DESCRIPTIVE STATISTICS 

print("\n2. DESCRIPTIVE STATISTICS")
print("-"*40)

# Separate countries and regions
country_df = df[df['Is_Country']].copy()
region_df = df[df['Is_Region']].copy()

# Key statistics for trade metrics
trade_metrics = ['Export (US$ Thousand)_imputed', 'Import (US$ Thousand)_imputed', 
                 'Trade_Balance', 'Total_Trade']

print("\nTrade Statistics (Countries Only):")
print("="*60)

stats_dict = {}
for metric in trade_metrics:
    if metric in country_df.columns:
        stats_dict[metric] = {
            'Mean': country_df[metric].mean(),
            'Median': country_df[metric].median(),
            'Std': country_df[metric].std(),
            'Min': country_df[metric].min(),
            'Max': country_df[metric].max(),
            'Skewness': country_df[metric].skew(),
            'Kurtosis': country_df[metric].kurtosis()
        }

stats_df = pd.DataFrame(stats_dict).T
print(stats_df.round(2))




2. DESCRIPTIVE STATISTICS
----------------------------------------

Trade Statistics (Countries Only):
                                       Mean      Median           Std  \
Export (US$ Thousand)_imputed  9.896250e+07  3285579.69  9.221170e+08   
Import (US$ Thousand)_imputed  9.112316e+07  1790649.46  8.349606e+08   
Trade_Balance                  7.839341e+06   266480.80  1.159003e+08   
Total_Trade                    1.900857e+08  5377002.70  1.755416e+09   

                                        Min           Max  Skewness  Kurtosis  
Export (US$ Thousand)_imputed  0.000000e+00  2.422743e+10     19.12    396.18  
Import (US$ Thousand)_imputed  3.000000e-02  2.193121e+10     18.87    387.55  
Trade_Balance                 -9.638287e+08  2.455320e+09     11.14    186.41  
Total_Trade                    3.000000e-02  4.615865e+10     19.04    393.46  


In [None]:
# 3. TEMPORAL TREND ANALYSIS 

print("\n3. TEMPORAL TREND ANALYSIS")
print("-"*40)

fig, axes = plt.subplots(2, 2, figsize=(15, 10))
fig.suptitle('World Trade Trends Over Time', fontsize=16, fontweight='bold')

# Global trade trends
global_trade = df.groupby('Year_Value').agg({
    'Export (US$ Thousand)_imputed': 'sum',
    'Import (US$ Thousand)_imputed': 'sum',
    'Total_Trade': 'sum'
}).reset_index()

# Plot 1: Global Export/Import Trends
ax1 = axes[0, 0]
ax1.plot(global_trade['Year_Value'], global_trade['Export (US$ Thousand)_imputed']/1e6, 
         label='Exports', marker='o', linewidth=2)
ax1.plot(global_trade['Year_Value'], global_trade['Import (US$ Thousand)_imputed']/1e6,
         label='Imports', marker='s', linewidth=2)
ax1.set_xlabel('Year')
ax1.set_ylabel('Trade Value (Millions USD)')
ax1.set_title('Global Export/Import Trends')
ax1.legend()
ax1.grid(True, alpha=0.3)

# Plot 2: Year-over-Year Growth
ax2 = axes[0, 1]
global_trade['Export_Growth'] = global_trade['Export (US$ Thousand)_imputed'].pct_change() * 100
global_trade['Import_Growth'] = global_trade['Import (US$ Thousand)_imputed'].pct_change() * 100

ax2.bar(global_trade['Year_Value'], global_trade['Export_Growth'], 
        alpha=0.7, label='Export Growth')
ax2.bar(global_trade['Year_Value'], global_trade['Import_Growth'],
        alpha=0.7, label='Import Growth', bottom=global_trade['Export_Growth'])
ax2.set_xlabel('Year')
ax2.set_ylabel('Growth Rate (%)')
ax2.set_title('Year-over-Year Trade Growth')
ax2.legend()

# Plot 3: Regional Trade Composition
ax3 = axes[1, 0]
regional_trade = df.groupby(['Region', 'Year_Value'])['Total_Trade'].sum().unstack().T
regional_trade.plot(kind='area', stacked=True, ax=ax3, alpha=0.8)
ax3.set_xlabel('Year')
ax3.set_ylabel('Total Trade (USD)')
ax3.set_title('Regional Trade Composition')
ax3.legend(title='Region', bbox_to_anchor=(1.05, 1), loc='upper left')

# Plot 4: Top 10 Countries Trade Evolution
ax4 = axes[1, 1]
top_countries = country_df.groupby('Partner Name')['Total_Trade'].mean().nlargest(10).index
top_countries_data = country_df[country_df['Partner Name'].isin(top_countries)]

for country in top_countries:
    country_data = top_countries_data[top_countries_data['Partner Name'] == country]
    ax4.plot(country_data['Year_Value'], country_data['Total_Trade']/1e6, 
             marker='o', label=country, linewidth=2)

ax4.set_xlabel('Year')
ax4.set_ylabel('Total Trade (Millions USD)')
ax4.set_title('Top 10 Countries: Trade Evolution')
ax4.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
ax4.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('temporal_trends.png', dpi=300, bbox_inches='tight')
plt.show()

