In [None]:
import pandas as pd
import random
from datetime import datetime, timedelta


random.seed(42)

def generate_realistic_data(num_rows=500):
    data = []

    for i in range(num_rows):
        start_date = datetime(2020, 1, 1) + timedelta(days=random.randint(0, 365))
        

        land_size = random.uniform(800, 5000)
        project_duration = int(max(30, land_size / 100))
        
        end_date = start_date + timedelta(days=project_duration)

        num_floors = random.randint(1, 10)
        

        tons_cement = max(5, min(land_size / 10, num_floors * 3.5))
        
        labor_required = max(5, min(num_floors * 5, 150))

        cost_per_ton_cement = 8000
        cost_per_brick = 6
        cost_per_ton_iron = 50000
        cost_per_ton_sand = 7500

        total_budget = (tons_cement * cost_per_ton_cement +
                        labor_required * 500) 

        bricks_per_sqft = 15
        num_bricks = int(land_size * bricks_per_sqft)

        tons_iron = max(0, min(land_size / 100, num_floors * 2))  
        sand_ton=tons_cement*2.5

        total_cost = (num_bricks * cost_per_brick +
                      tons_iron * cost_per_ton_iron +
                      total_budget+sand_ton*cost_per_ton_sand )

        data.append({
            'Project_ID': f'P-{i + 1}',
            'Land_Size_Sqm': round(land_size, 2),
            'Num_Floors': num_floors,
            'Tons_Cement': round(tons_cement, 2),
            'Labor_Required': labor_required,
            'Start_Date': start_date,
            'End_Date': end_date,
            'Num_Bricks': num_bricks,
            'Tons_Iron': round(tons_iron, 2),
            'Tons_Sand':sand_ton,
            'Total_Cost': total_cost,
        })

    return data


dataset = pd.DataFrame(generate_realistic_data())

csv_file_path = 'C:/Users/sriyo/Desktop/Bob\'s Esti-Mate/output.csv'

dataset.to_csv(csv_file_path, index=False)

print(dataset)
