<a href="https://colab.research.google.com/github/IkerZha0401/INST0001/blob/main/PROJECT_and_BUDGET_GENERATION.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import csv
import random
from datetime import datetime, timedelta

# Part 1: Utility Functions

def random_date(start, end):
    """
    Generate a random date between 'start' and 'end'.
    """
    delta = end - start
    random_days = random.randint(0, delta.days)
    return start + timedelta(days=random_days)

def get_project_count(region_type):
    """
    Return the number of projects for a region-year.
    Urban: 1-2 projects.
    Rural: 3-4 projects.
    """
    if region_type == "urban":
        return random.randint(1, 2)
    else:
        return random.randint(3, 4)

def generate_start_date(year):
    """
    Generate a random start date within a given 'year'.
    """
    start = datetime(year, 1, 1)
    end = datetime(year, 12, 31)
    return random_date(start, end)

def generate_end_date(start_date, max_year=2024):
    """
    Generate a random end date not earlier than 'start_date'
    and not later than December 31 of 'max_year'.
    """
    start_year = start_date.year
    end_year = random.randint(start_year, max_year)
    if end_year == start_year:
        end = datetime(end_year, 12, 31)
        return random_date(start_date, end)
    else:
        start_of_end_year = datetime(end_year, 1, 1)
        end_of_end_year = datetime(end_year, 12, 31)
        return random_date(start_of_end_year, end_of_end_year)

# Part 2: Project Data Generation with Direct Budget Allocation

def generate_projects(regions, years, base_target=107_000_000):
    """
    For each region in each year, generate projects according to region type:
      - Urban: 1-2 projects per year.
      - Rural: 3-4 projects per year.

    For each project, generate a random 'raw_weight' (e.g. uniform between 0.5 and 1.5).
    Then, for each year, compute the sum of raw weights and allocate the project budget as:
      budget = (raw_weight / total_raw_weight) * target_budget_for_year

    Here, the target_budget_for_year is based on base_target (107M euros) adjusted with a random
    factor between 0.95 and 1.05 (to allow natural fluctuations).

    Other project attributes (category, SDG, start/end dates) are generated as before.
    """
    # Predefined categories and SDG indicators (unchanged)
    categories = ["Education", "Advertising Campaign", "Financial Aid", "Infrastructure"]
    sdg_list = ["SDG_1.1.1", "SDG_5.4.1", "SDG_13.1.1"]
    # Weight distributions remain the same:
    urban_weights = [35, 30, 20, 15]
    rural_weights = [35, 30, 20, 15]

    projects = []
    project_id = 1

    # Budget allocation
    projects_by_year = {year: [] for year in years}

    # Generate raw projects data
    for region in regions:
        region_id = region["region_id"]
        region_type = region["region_type"]
        for year in years:
            count = get_project_count(region_type)
            for _ in range(count):
                proj_name = f"Project_{project_id}"
                if region_type == "urban":
                    category = random.choices(categories, weights=urban_weights, k=1)[0]
                    # Do not use fixed budget range; generate a raw weight
                    raw_weight = random.uniform(0.5, 1.5)
                else:
                    category = random.choices(categories, weights=rural_weights, k=1)[0]
                    raw_weight = random.uniform(0.5, 1.5)
                sdg = random.choice(sdg_list)
                start_date_obj = generate_start_date(year)
                end_date_obj = generate_end_date(start_date_obj, max_year=2024)
                start_date = start_date_obj.strftime("%Y-%m-%d")
                end_date = end_date_obj.strftime("%Y-%m-%d")

                project = {
                    "project_id": project_id,
                    "project_name": proj_name,
                    "category": category,
                    "track_sdg_indicator": sdg,
                    "start_date": start_date,
                    "end_date": end_date,
                    "raw_weight": raw_weight,  # temporary field for budget allocation
                    "region_id": region_id,
                    "year": year  # temporary for grouping
                }
                projects_by_year[year].append(project)
                project_id += 1

    # For each year, allocate budgets based on raw weights so that total is near target (with natural fluctuation)
    for year in years:
        proj_list = projects_by_year[year]
        total_raw = sum(p["raw_weight"] for p in proj_list)
        # Apply a random fluctuation factor between 0.95 and 1.05 for this year's target
        target_budget = base_target * random.uniform(0.95, 1.05)
        for p in proj_list:
            allocated_budget = int(p["raw_weight"] / total_raw * target_budget)
            p["budget"] = allocated_budget
            # Remove temporary fields
            del p["raw_weight"]
            del p["year"]
            projects.append(p)

    return projects

def write_projects_to_csv(projects, filename='projects.csv'):
    """
    Write the project data to a CSV file.
    """
    fieldnames = ["project_id", "project_name", "category", "track_sdg_indicator",
                  "start_date", "end_date", "budget", "region_id"]
    with open(filename, 'w', newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        for p in projects:
            writer.writerow(p)

# Part 3: Main Execution

def main():
    # Updated region list
    regions = [
        # SOUTH SUDAN (SOU)
        {"region_id": "SSD001", "region_type": "urban", "region_name": "Juba", "country_id": "SOU"},
        {"region_id": "SSD002", "region_type": "urban", "region_name": "Wau", "country_id": "SOU"},
        {"region_id": "SSD003", "region_type": "rural", "region_name": "Bor", "country_id": "SOU"},
        {"region_id": "SSD004", "region_type": "rural", "region_name": "Yambio", "country_id": "SOU"},
        # DEMOCRATIC REPUBLIC of CONGO (DEM)
        {"region_id": "RCB001", "region_type": "urban", "region_name": "Kinshasa", "country_id": "DEM"},
        {"region_id": "RCB002", "region_type": "urban", "region_name": "Lubumbashi", "country_id": "DEM"},
        {"region_id": "RCB003", "region_type": "rural", "region_name": "Goma", "country_id": "DEM"},
        {"region_id": "RCB004", "region_type": "rural", "region_name": "Bukavu", "country_id": "DEM"},
        # PALESTAN (PAL)
        {"region_id": "PSE001", "region_type": "urban", "region_name": "Ramallah", "country_id": "PAL"},
        {"region_id": "PSE002", "region_type": "urban", "region_name": "Gaza", "country_id": "PAL"},
        {"region_id": "PSE003", "region_type": "rural", "region_name": "Jenin", "country_id": "PAL"},
        {"region_id": "PSE004", "region_type": "rural", "region_name": "Tubas", "country_id": "PAL"},
        # YEMEN (YEM)
        {"region_id": "YEM001", "region_type": "urban", "region_name": "Sanaa", "country_id": "YEM"},
        {"region_id": "YEM002", "region_type": "urban", "region_name": "Aden", "country_id": "YEM"},
        {"region_id": "YEM003", "region_type": "rural", "region_name": "Ibb", "country_id": "YEM"},
        {"region_id": "YEM004", "region_type": "rural", "region_name": "Taiz", "country_id": "YEM"},
        # LEBANON (LEB)
        {"region_id": "LEB001", "region_type": "urban", "region_name": "Beirut", "country_id": "LEB"},
        {"region_id": "LEB002", "region_type": "urban", "region_name": "Tripoli", "country_id": "LEB"},
        {"region_id": "LEB003", "region_type": "rural", "region_name": "Baalbek", "country_id": "LEB"},
        {"region_id": "LEB004", "region_type": "rural", "region_name": "Byblos", "country_id": "LEB"}
    ]

    # Years: 2019 to 2024 (6 years)
    years = list(range(2019, 2025))

    # Target annual budget is 107 million euros (as a baseline)
    base_target = 107_000_000

    # Generate projects with direct budget allocation based on target
    projects = generate_projects(regions, years, base_target)

    # Write projects data to CSV
    write_projects_to_csv(projects, 'projects.csv')

    # Verify: Compute and print annual total budgets
    annual_totals = {year: 0 for year in years}
    for p in projects:
        y = int(p["start_date"][:4])
        annual_totals[y] += p["budget"]

    print("Annual Budget Totals (with direct allocation):")
    for y in years:
        print(f"{y}: €{annual_totals[y]:,}")

if __name__ == "__main__":
    main()



Annual Budget Totals (with direct allocation):
2019: €107,194,148
2020: €106,162,212
2021: €110,344,944
2022: €106,491,735
2023: €106,004,153
2024: €108,478,907


In [None]:
import pandas as pd
from google.colab import files

# Step 1: Upload the file manually
print("Please upload 'projects_updated.csv'")
uploaded = files.upload()

# Step 2: Load the uploaded file
budget_file_path = "projects_updated.csv"
df_budget = pd.read_csv(budget_file_path)

# Step 3: Rename 'budget' to 'expenditure' and keep only relevant columns
df_budget = df_budget[['project_id', 'budget']].rename(columns={'budget': 'expenditure'})

# Step 4: Function to round expenditure based on custom 500000 interval rules
def custom_round_budget(value):
    if 1000000 <= value <= 1300000:
        return 1000000
    elif 1300001 <= value <= 1800000:
        return 1500000
    elif 1800001 <= value <= 2300000:
        return 2000000
    elif 2300001 <= value <= 2800000:
        return 2500000
    elif 2800001 <= value <= 3300000:
        return 3000000
    else:
        return 3500000  # For values above 3300000

# Step 5: Apply the new rounding function
df_budget['total_budget'] = df_budget['expenditure'].apply(custom_round_budget)

# Step 6: Save the corrected budget data as a CSV file
corrected_budget_csv_path = "budget_corrected.csv"
df_budget.to_csv(corrected_budget_csv_path, index=False)

# Step 7: Allow user to download the processed file
files.download(corrected_budget_csv_path)



Please upload 'projects_updated.csv'


Saving projects_updated.csv to projects_updated (2).csv
✅ Processing complete. Click the link below to download the file.


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>