In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Set random seed for reproducibility
np.random.seed(42)

# Generate sample data
n_projects = 20
current_date = datetime.now()

# Helper function to generate random dates
def random_date(start_date, days_range):
    return start_date + timedelta(days=np.random.randint(0, days_range))

In [2]:
# Create project information
projects = {
    'project_id': [f'PRJ{str(i).zfill(3)}' for i in range(1, n_projects + 1)],
    'project_name': [
        'Solar Farm Alpha', 'Wind Park Beta', 'Hydro Plant Gamma', 
        'Waste Management Delta', 'Green Building Epsilon', 'Forest Conservation Zeta',
        'Smart Grid Eta', 'Electric Bus Fleet Theta', 'Biogas Plant Iota',
        'Ocean Cleanup Kappa', 'Sustainable Agriculture Lambda', 'Green Hydrogen Mu',
        'Recycling Center Nu', 'Energy Storage Xi', 'Green Data Center Omicron',
        'Coastal Protection Pi', 'Urban Gardens Rho', 'Water Treatment Sigma',
        'EV Charging Network Tau', 'Sustainable Housing Upsilon'
    ],
    'project_description': [
        'Large-scale solar power generation facility',
        'Offshore wind farm development',
        'Small-scale hydroelectric power plant',
        'Advanced waste processing and recycling facility',
        'LEED Platinum certified office complex',
        'Protection of native forest and biodiversity',
        'Smart grid infrastructure deployment',
        'Electric public transportation fleet',
        'Organic waste to energy conversion plant',
        'Ocean plastic waste collection system',
        'Regenerative agriculture implementation',
        'Green hydrogen production facility',
        'Advanced materials recycling center',
        'Grid-scale battery storage system',
        'Energy-efficient data center facility',
        'Nature-based coastal defense system',
        'Urban farming and community gardens',
        'Advanced wastewater treatment plant',
        'National EV charging infrastructure',
        'Sustainable affordable housing complex'
    ],
    'project_type': np.random.choice(
        ['Renewable Energy', 'Infrastructure', 'Conservation', 'Waste Management', 'Green Building'],
        n_projects
    ),
    'project_status': np.random.choice(
        ['Proposed', 'In Progress', 'Under Review', 'Approved'],
        n_projects,
        p=[0.3, 0.4, 0.2, 0.1]
    ),
    'start_date': [random_date(current_date, 365).strftime('%Y-%m-%d') for _ in range(n_projects)],
    'expected_completion_date': [random_date(current_date + timedelta(days=730), 1095).strftime('%Y-%m-%d') for _ in range(n_projects)],
    'country': np.random.choice(
        ['USA', 'Canada', 'UK', 'Germany', 'France', 'Australia', 'Japan', 'Brazil', 'India'],
        n_projects
    ),
    'region': np.random.choice(
        ['North America', 'Europe', 'Asia Pacific', 'South America'],
        n_projects
    ),
    'coordinates': [f"{np.random.uniform(-90, 90):.4f}, {np.random.uniform(-180, 180):.4f}" for _ in range(n_projects)]
}

In [3]:
# Financial metrics
financial_metrics = {
    'total_investment_required': np.random.uniform(1e6, 50e6, n_projects),
    'current_funding': np.random.uniform(5e5, 20e6, n_projects),
    'funding_gap': None,  # Will calculate later
    'expected_roi': np.random.uniform(0.05, 0.25, n_projects),
    'payback_period': np.random.uniform(3, 15, n_projects),
    'capital_expenditure': np.random.uniform(8e5, 40e6, n_projects),
    'operational_expenditure': np.random.uniform(1e5, 5e6, n_projects),
    'maintenance_costs': np.random.uniform(5e4, 2e6, n_projects),
    'projected_revenue_5yr': np.random.uniform(2e6, 100e6, n_projects),
    'projected_cashflow_5yr': np.random.uniform(1e6, 75e6, n_projects),
    'financial_risk_score': np.random.uniform(0.1, 0.9, n_projects),
    'market_volatility_index': np.random.uniform(0.1, 0.8, n_projects),
    'currency_risk_exposure': np.random.uniform(0.1, 0.7, n_projects)
}

In [4]:
# Environmental metrics
environmental_metrics = {
    'carbon_reduction_tons': np.random.uniform(1000, 50000, n_projects),
    'carbon_intensity': np.random.uniform(0.1, 0.9, n_projects),
    'emission_savings_forecast': np.random.uniform(500, 25000, n_projects),
    'water_usage_reduction': np.random.uniform(1000, 100000, n_projects),
    'energy_efficiency_score': np.random.uniform(0.5, 1.0, n_projects),
    'renewable_energy_generation': np.random.uniform(0, 100000, n_projects),
    'biodiversity_impact_score': np.random.uniform(0.3, 0.9, n_projects),
    'land_use_change': np.random.uniform(-1000, 1000, n_projects),
    'waste_reduction_tons': np.random.uniform(100, 10000, n_projects),
    'climate_risk_score': np.random.uniform(0.1, 0.9, n_projects),
    'natural_disaster_risk': np.random.uniform(0.1, 0.8, n_projects),
    'climate_adaptation_score': np.random.uniform(0.3, 0.9, n_projects)
}

In [5]:

# Social metrics
social_metrics = {
    'jobs_created': np.random.randint(10, 1000, n_projects),
    'community_benefit_score': np.random.uniform(0.3, 0.9, n_projects),
    'local_business_impact': np.random.uniform(0.2, 0.8, n_projects),
    'healthcare_impact_score': np.random.uniform(0.1, 0.9, n_projects),
    'education_impact_score': np.random.uniform(0.2, 0.9, n_projects),
    'poverty_reduction_impact': np.random.uniform(0.2, 0.8, n_projects),
    'community_engagement_level': np.random.uniform(0.3, 0.9, n_projects),
    'indigenous_peoples_impact': np.random.uniform(-0.2, 0.8, n_projects),
    'stakeholder_satisfaction_score': np.random.uniform(0.4, 0.9, n_projects)
}

In [6]:
# Governance metrics
governance_metrics = {
    'compliance_score': np.random.uniform(0.5, 1.0, n_projects),
    'transparency_index': np.random.uniform(0.4, 0.95, n_projects),
    'corruption_risk_score': np.random.uniform(0.1, 0.8, n_projects),
    'management_experience_score': np.random.uniform(0.4, 0.9, n_projects),
    'track_record_score': np.random.uniform(0.3, 0.9, n_projects),
    'regulatory_compliance_score': np.random.uniform(0.5, 1.0, n_projects),
    'reporting_quality_score': np.random.uniform(0.4, 0.9, n_projects),
    'monitoring_framework_score': np.random.uniform(0.3, 0.9, n_projects),
    'audit_frequency': np.random.randint(1, 4, n_projects)
}

# Create DataFrame
df = pd.DataFrame({**projects, **financial_metrics, **environmental_metrics, 
                  **social_metrics, **governance_metrics})

# Calculate funding gap
df['funding_gap'] = df['total_investment_required'] - df['current_funding']

# Calculate initial ESG scores (these would normally be calculated by the model)
df['environmental_score'] = np.random.uniform(0.4, 0.9, n_projects)
df['social_score'] = np.random.uniform(0.4, 0.9, n_projects)
df['governance_score'] = np.random.uniform(0.4, 0.9, n_projects)
df['overall_esg_score'] = (df['environmental_score'] + df['social_score'] + df['governance_score']) / 3

# Save to CSV
df.to_csv('sample_green_finance_data.csv', index=False)

# Display first few rows of the data
print("\nSample of generated data:")
print(df[['project_id', 'project_name', 'total_investment_required', 'overall_esg_score']].head())


Sample of generated data:
  project_id            project_name  total_investment_required  \
0     PRJ001        Solar Farm Alpha               9.246261e+06   
1     PRJ002          Wind Park Beta               1.171945e+07   
2     PRJ003       Hydro Plant Gamma               2.834700e+07   
3     PRJ004  Waste Management Delta               2.078797e+07   
4     PRJ005  Green Building Epsilon               4.179720e+06   

   overall_esg_score  
0           0.738552  
1           0.439202  
2           0.668814  
3           0.696814  
4           0.500363  
