In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random
import os

np.random.seed(42)
random.seed(42)

# Create dates (2010-2024, monthly)
start_date = datetime(2010, 1, 15)
dates = [start_date + timedelta(days=30*i) for i in range(180)]

# List of 42 countries
countries = [
    'Tunisia', 'France', 'Canada', 'USA', 'Italy', 'Germany', 'UK', 'Spain', 
    'Belgium', 'Netherlands', 'Morocco', 'Algeria', 'Libya', 'Japan', 'China',
    'Australia', 'UAE', 'Saudi Arabia', 'Qatar', 'Kuwait', 'Egypt', 'Jordan',
    'Lebanon', 'Turkey', 'Greece', 'Portugal', 'Sweden', 'Norway', 'Denmark',
    'Austria', 'Switzerland', 'Poland', 'Czech Republic', 'Hungary', 'Romania',
    'South Africa', 'Brazil', 'Mexico', 'Argentina', 'Chile', 'Peru', 'Colombia'
]

# Season function
def get_season(month):
    if month in [3, 4]: return 'Ramadan'
    elif month in [6, 7, 8]: return 'Summer'
    elif month in [12, 1, 2]: return 'Winter'
    else: return 'Spring'

# Create dataset
print("🚀 Creating dataset...")
data = []
for country in countries:
    for i in range(57):
        temp_dates = dates.copy()
        random.shuffle(temp_dates)
        for date in temp_dates:
            row = {
                'Date': date.strftime('%Y-%m-%d'),
                'Country': country,
                'Production_Tons': random.randint(150000, 350000),
                'Export_Tons': random.randint(5000, 80000),
                'USD_Price': round(random.uniform(5.5, 18.5), 2),
                'Month': date.month,
                'Year': date.year,
                'Season': get_season(date.month)
            }
            data.append(row)

df = pd.DataFrame(data)
df = df.sample(frac=1).reset_index(drop=True)

# Create directories if not exist
os.makedirs('../data/raw', exist_ok=True)

# Save to project structure
df.to_csv('../data/raw/tunisia_olive_oil_dataset.csv', index=False)

print(f"✅ Dataset ready: {len(df):,} rows")
print(f"✅ Countries: {df['Country'].nunique()}")
print("\n📊 First 5 rows:")
print(df[['Date', 'Country', 'Export_Tons', 'USD_Price', 'Season']].head())
print(f"\n💾 File saved: ../data/raw/tunisia_olive_oil_dataset.csv")

🚀 Creating dataset...
✅ Dataset ready: 430,920 rows
✅ Countries: 42

📊 First 5 rows:
         Date Country  Export_Tons  USD_Price  Season
0  2016-02-13  France        23185       6.21  Winter
1  2011-09-07  France        36689      10.03  Spring
2  2017-09-05     UAE         7224      16.72  Spring
3  2010-06-14   China        72467      12.65  Summer
4  2010-08-13  Brazil        58887       8.27  Summer

💾 File saved: ../data/raw/tunisia_olive_oil_dataset.csv


In [2]:
print("\n🎉 All done!")


🎉 All done!
