In [5]:
import numpy as np
import pandas as pd

In [6]:
building_area = np.arange(50, 1001, 10)
building_height = np.arange(3, 16, 3)
building_types = ['Single House', 'Townhouse', 'Duplex', 'Apartment']

In [7]:
# Temperature
air_temperature = np.random.uniform(10, 30, 10000)

'''min_temp_df = pd.read_csv('mintemp_2017_24_annual.csv')
max_temp_df = pd.read_csv('maxtemp_2017_24_annual.csv')
average_temp_df = pd.DataFrame({
    'Year': min_temp_df['Year'],
    'Average Temperature': (min_temp_df['Annual'] + max_temp_df['Annual']) / 2
})
average_temp_df = average_temp_df.dropna(subset=['Average Temperature']).reset_index(drop=True)
print(average_temp_df)
temperature_values = average_temp_df['Average Temperature'].sample(n=10000, replace=True).reset_index(drop=True)
air_temperature = temperature_values'''

min_temp_df = pd.read_csv('sydney_mintemp_2017_2024.csv')
max_temp_df = pd.read_csv('sydney_maxtemp_2017_2024.csv')
temperature_values = min_temp_df['Mean minimum temperature (°C)'].tolist() + max_temp_df['Mean maximum temperature (°C)'].tolist()
air_temperature = np.random.choice(temperature_values, size = 10000)

In [8]:
# Humidity
mean_humidity = 61.3
std_dev_humidity = 10
humidity = np.round(np.clip(np.random.normal(mean_humidity, std_dev_humidity, 10000), 0, 100), 1)

In [9]:
# Solar Radiation
solar_radiation = np.random.uniform(10, 500, 10000) 

solar_df = pd.read_csv('solar-radiation-observations.csv')
non_zero_radiation = solar_df[solar_df['solar_radiation']!=0.0]['solar_radiation']
solar_radiation = np.random.choice(non_zero_radiation, size=10000)
print(len(solar_radiation), solar_radiation)

10000 [561. 151.   3. ... 209.  50. 621.]


In [10]:
energy_consumption = np.random.choice(np.arange(50, 301, 10), 10000)

In [31]:
data = {
    'Building Area': np.random.choice(building_area, 10000),
    'Building Height': np.random.choice(building_height, 10000),
    'Building Type': np.random.choice(building_types, 10000),
    'Air Temperature': air_temperature,
    'Humidity': humidity,
    'Solar Radiation': solar_radiation,
    'Normalized Energy Consumption': energy_consumption
}

df = pd.DataFrame(data)
print(df)

'''
pd.set_option('display.max_columns', None)  # Display all columns
pd.set_option('display.width', None)  # Set the width to fit all columns in one line

# Print the DataFrame
print(df.to_string(index=False))'''

      Building Area  Building Height Building Type  Air Temperature  Humidity  \
0               940               15        Duplex             17.9      69.2   
1               150                3     Townhouse              9.9      49.1   
2               670                6     Townhouse             18.3      63.4   
3               900                3        Duplex             14.0      61.0   
4               380                3        Duplex             29.6      61.3   
...             ...              ...           ...              ...       ...   
9995            680               12  Single House              9.0      70.0   
9996            810               12     Townhouse             27.4      44.5   
9997            240                9     Townhouse             20.0      60.1   
9998            860                9     Apartment             19.5      52.0   
9999            970                3  Single House              9.1      61.4   

      Solar Radiation  Norm

"\npd.set_option('display.max_columns', None)  # Display all columns\npd.set_option('display.width', None)  # Set the width to fit all columns in one line\n\n# Print the DataFrame\nprint(df.to_string(index=False))"

In [59]:
'''
Trying to co-relate Energy consumed based on the inputs
Building Area: 25% 
Building Height: 10% 
Building Type: 10% 
Outdoor Temperature: 25%
Humidity: 10% 
'''

# Convert 'Building Type' to numerical values
building_type_dict = {'Single House': 1, 'Townhouse': 2, 'Duplex': 3, 'Apartment': 4}

# Define the base weights based on estimated influences, how each of these factors affect the energy consumption of the buildings
weights = {
    'Building Area': 0.5,
    'Building Height': 0.20,
    'Building Type': 0.20,
    'Air Temperature': 0.5,
    'Humidity': 0.20,
    'Solar Radiation': 0.40
}

# Define functions to adjust weights based on the actual values of the inputs with logical l variations and random factors
def area_weight(area):
    l = min(0.5 + (area / 1000), 0.99)  # Higher area, higher l value, capped at 1.0
    variation = np.random.uniform(l, 1.0)
    return variation * (area / 1000)

def height_weight(height):
    l = min(0.5 + (height / 15), 0.99)  # Higher height, higher l value, capped at 1.0
    variation = np.random.uniform(l, 1.0)
    return variation * (height / 15)

def type_weight(building):
    building_type = building_type_dict[building]
    if building_type == 1:  # Single house
        l = 0.9
    elif building_type == 2:  # Townhouse
        l = 0.7
    elif building_type == 3:  # Duplex
        l = 0.6
    else:  # Apartment
        l = 0.5
    variation = np.random.uniform(l, 1.0)
    return variation * (building_type / 4)

def temperature_weight(temperature):
    if np.isnan(temperature):
        temperature = 0
    l = min(0.5 + (temperature / 40), 0.99)  # Higher temperature, higher l value, capped at 1.0
    variation = np.random.uniform(l, 1.0)
    return variation * (temperature / 40)

def humidity_weight(humidity):
    if np.isnan(humidity):
        humidity = 0
    l = 0.5 + (humidity / 100)  # Higher humidity, higher l value, capped at 1.0
    if l>=1.0: l = 1.0
    variation = np.random.uniform(l, 1.0)
    return variation * (humidity / 100)

def solar_radiation_weight(solar_radiation):
    if np.isnan(solar_radiation):
        solar_radiation = 0
    l = min(0.5 + (solar_radiation / 1000), 1.0)  # Higher solar radiation, higher l value, capped at 0.99
    variation = np.random.uniform(l, 1.0)
    return variation * (solar_radiation / 1000)

# Function to calculate the weighted sum for a row
def calculate_weighted_sum(row):
    total_score = (
        weights['Building Area'] * area_weight(row['Building Area']) +
        weights['Building Height'] * height_weight(row['Building Height']) +
        weights['Building Type'] * type_weight(row['Building Type']) +
        weights['Air Temperature'] * temperature_weight(row['Air Temperature']) +
        weights['Humidity'] * humidity_weight(row['Humidity']) +
        weights['Solar Radiation'] * solar_radiation_weight(row['Solar Radiation'])
    )
    return total_score

# Apply the function to each row to calculate the total score
df['Total Score'] = df.apply(calculate_weighted_sum, axis=1)

# Normalize the total score to the range 0 to 1
normalized_score = (df['Total Score'] - df['Total Score'].min()) / (df['Total Score'].max() - df['Total Score'].min())

# Scale the normalized score to the energy consumption range (50 to 300)
df['Energy Consumption'] = 50 + normalized_score *2.2* (300 - 50)

print(max(df['Energy Consumption']))
print(min(df['Energy Consumption']))
# Calculate the mean of the column
mean_value = df['Energy Consumption'].mean()

# Calculate the variance of the column
variance_value = df['Energy Consumption'].var()

# Calculate the standard deviation of the column
std_deviation_value = df['Energy Consumption'].std()

print("Mean:", mean_value)
print("Variance:", variance_value)
print("Standard Deviation:", std_deviation_value)

600.0
50.0
Mean: 130.03042456731524
Variance: 785.5506822072097
Standard Deviation: 28.027677074763254


In [62]:
pd.set_option('display.max_columns', None)  # Display all columns
pd.set_option('display.width', None)  # Set the width to fit all columns in one line

# Print the DataFrame
# print(df.to_string(index=False))

columns_to_export = ['Building Area', 'Building Height', 'Building Type','Air Temperature', 'Humidity', 'Solar Radiation', 'Energy Consumption']
# Save the DataFrame to a CSV file
df[columns_to_export].to_csv('data_frame.csv', index=False)
