In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
topography_options = ['rural', 'suburban', 'urban', 'metropolitan']
construction_type_options = ['commercial', 'residential']
architectural_style_options = ['modern', 'traditional', 'contemporary']
quality_options = ['low', 'medium', 'high']
furnished_options = ['Furnished', 'Non-Furnished']
luxury_level_options = ['basic', 'standard', 'luxury']

num_samples = 3592

data = {
    'Topography': np.random.choice(topography_options, num_samples),
    'Construction Type': np.random.choice(construction_type_options, num_samples),
    'Amount of Land (sqft)': np.zeros(num_samples, dtype=int),
    'Land Price per sqft': np.zeros(num_samples, dtype=int),
    'Number of Floors': np.zeros(num_samples, dtype=int),
    'Architectural Style': np.random.choice(architectural_style_options, num_samples),
    'Quality Level of Raw Materials': np.random.choice(quality_options, num_samples),
    'Furnished': np.random.choice(furnished_options, num_samples),
    'Luxury Level': np.random.choice(luxury_level_options, num_samples),
    'Building Area (sqft)': np.random.randint(1000, 10001, num_samples),
    'Construction Cost per sqft': np.random.randint(1500, 1801, num_samples),
    'Raw Materials Cost per sqft': np.random.randint(1000, 1501, num_samples),
    'Flooring Cost per sqft': np.random.randint(300, 601, num_samples),
    'Ceiling Cost per sqft': np.random.randint(50, 201, num_samples),
    'Plumbing Cost per sqft': np.random.randint(150, 301, num_samples),
    'Wiring Cost per sqft': np.random.randint(100, 201, num_samples),
    'Price': np.zeros(num_samples, dtype=int)
}

data['Price'] = (data['Building Area (sqft)'] * (data['Construction Cost per sqft'] +
                 data['Raw Materials Cost per sqft'] +
                 data['Number of Floors'] * data['Flooring Cost per sqft'] +
                 data['Ceiling Cost per sqft'] +
                 data['Plumbing Cost per sqft'] +
                 data['Wiring Cost per sqft']))

data['Amount of Land (sqft)'] = (data['Building Area (sqft)'] * 1.2).astype(int)

for i in range(num_samples):
    if data['Topography'][i] == 'rural':
        data['Land Price per sqft'][i] = np.random.randint(200, 1001)
    elif data['Topography'][i] == 'suburban':
        data['Land Price per sqft'][i] = np.random.randint(1000, 5001)
    elif data['Topography'][i] == 'urban':
        data['Land Price per sqft'][i] = np.random.randint(5000, 20001)
    elif data['Topography'][i] == 'metropolitan':
        data['Land Price per sqft'][i] = np.random.randint(20000, 50001)

for i in range(num_samples):
    if data['Construction Type'][i] == 'commercial':
        data['Number of Floors'][i] = np.random.randint(1, 31)
    else:
        data['Number of Floors'][i] = np.random.randint(1, 6)

for i in range(num_samples):
    data['Building Area (sqft)'][i] = int(0.95 * data['Amount of Land (sqft)'][i] / data['Number of Floors'][i])
    data['Building Area (sqft)'][i] *= data['Number of Floors'][i]

data['Price'] = (data['Building Area (sqft)'] * (data['Construction Cost per sqft'] +
                 data['Raw Materials Cost per sqft'] +
                 data['Number of Floors'] * data['Flooring Cost per sqft'] +
                 data['Ceiling Cost per sqft'] +
                 data['Plumbing Cost per sqft'] +
                 data['Wiring Cost per sqft']))

for i in range(num_samples):
    data['Price'][i] += data['Amount of Land (sqft)'][i] * data['Land Price per sqft'][i]

for i in range(num_samples):
    if data['Furnished'][i] == 'Furnished':
        furnishing_cost = 0.5 * data['Building Area (sqft)'][i] * np.random.uniform(50, 200)
        data['Price'][i] += furnishing_cost

for i in range(num_samples):
    if data['Construction Type'][i] == 'commercial':
        data['Price'][i] *= 1.1
    else:
        data['Price'][i] *= 0.9
    
    if data['Quality Level of Raw Materials'][i] == 'low':
        data['Price'][i] *= 0.9
    elif data['Quality Level of Raw Materials'][i] == 'medium':
        data['Price'][i] *= 1.0
    else:
        data['Price'][i] *= 1.1
        
    if data['Luxury Level'][i] == 'basic':
        data['Price'][i] *= 0.9
    elif data['Luxury Level'][i] == 'standard':
        data['Price'][i] *= 1.0
    else:
        data['Price'][i] *= 1.1

df = pd.DataFrame(data)

In [3]:
df.to_csv('cost_data.csv', index=False)

In [4]:
df.head()

Unnamed: 0,Topography,Construction Type,Amount of Land (sqft),Land Price per sqft,Number of Floors,Architectural Style,Quality Level of Raw Materials,Furnished,Luxury Level,Building Area (sqft),Construction Cost per sqft,Raw Materials Cost per sqft,Flooring Cost per sqft,Ceiling Cost per sqft,Plumbing Cost per sqft,Wiring Cost per sqft,Price
0,suburban,residential,3877,2495,2,modern,low,Furnished,basic,3682,1615,1454,436,176,191,170,19235005
1,rural,commercial,4605,295,7,modern,low,Furnished,luxury,4368,1701,1086,551,91,292,108,35867796
2,urban,commercial,4294,6346,5,contemporary,low,Non-Furnished,standard,4075,1570,1443,387,164,199,157,49036505
3,metropolitan,residential,10388,41568,1,contemporary,medium,Furnished,standard,9868,1625,1068,386,57,231,118,420079286
4,rural,commercial,7467,846,13,contemporary,medium,Non-Furnished,standard,7085,1747,1133,418,132,252,140,75827743
