#BIGANS
###Building Intelligence with Generative Adversarial Networks

In [43]:
import numpy as np
import pandas as pd

DATA Source --> Australian government Department of Bureau of Meteorology
http://www.bom.gov.au/climate/data/


In [44]:
max_temp_df = pd.read_csv('Max Temp.csv')
rainfall_df = pd.read_csv('Rainfall.csv')
solar_exposure_df = pd.read_csv('Solar Exposure.csv')

In [45]:
max_temp_df = max_temp_df[max_temp_df['Year']>1990]
rainfall_df = rainfall_df[(rainfall_df['Year'] > 1990) & (rainfall_df['Year'] < 2021)]
solar_exposure_df = solar_exposure_df[(solar_exposure_df['Year'] > 1990) & (solar_exposure_df['Year'] < 2021)]

In [46]:
max_temp_df = max_temp_df.reset_index(drop=True)
rainfall_df = rainfall_df.reset_index(drop=True)
solar_exposure_df = solar_exposure_df.reset_index(drop=True)

In [47]:
max_temp_df.drop(columns=['Product code', 'Bureau of Meteorology station number','Days of accumulation of maximum temperature','Quality'], inplace=True)
rainfall_df.drop(columns=['Product code', 'Bureau of Meteorology station number','Period over which rainfall was measured (days)','Quality'], inplace=True)
solar_exposure_df.drop(columns=['Product code', 'Bureau of Meteorology station number'], inplace=True)

In [48]:
print(max_temp_df.shape)
print(rainfall_df.shape)
print(solar_exposure_df.shape)

(10835, 4)
(10958, 4)
(10958, 4)


In [49]:
max_temp_df.head()

Unnamed: 0,Year,Month,Day,Maximum temperature (Degree C)
0,1991,1,1,31.0
1,1991,1,2,29.2
2,1991,1,3,29.9
3,1991,1,4,32.2
4,1991,1,5,35.6


In [50]:
rainfall_df.head()

Unnamed: 0,Year,Month,Day,Rainfall amount (millimetres)
0,1991,1,1,0.0
1,1991,1,2,0.0
2,1991,1,3,0.0
3,1991,1,4,0.0
4,1991,1,5,0.0


In [51]:
solar_exposure_df.head()

Unnamed: 0,Year,Month,Day,Daily global solar exposure (MJ/m*m)
0,1991,1,1,
1,1991,1,2,30.5
2,1991,1,3,33.3
3,1991,1,4,32.8
4,1991,1,5,20.8


In [52]:
df = pd.merge(max_temp_df, rainfall_df, on=['Year', 'Month', 'Day'], how='inner')
df = pd.merge(df, solar_exposure_df, on=['Year', 'Month', 'Day'], how='inner')

In [53]:
df

Unnamed: 0,Year,Month,Day,Maximum temperature (Degree C),Rainfall amount (millimetres),Daily global solar exposure (MJ/m*m)
0,1991,1,1,31.0,0.0,
1,1991,1,2,29.2,0.0,30.5
2,1991,1,3,29.9,0.0,33.3
3,1991,1,4,32.2,0.0,32.8
4,1991,1,5,35.6,0.0,20.8
...,...,...,...,...,...,...
10830,2020,8,26,19.6,0.0,16.7
10831,2020,8,27,22.2,0.0,16.8
10832,2020,8,28,19.7,0.0,17.0
10833,2020,8,29,21.5,0.0,17.1


In [54]:
df.describe()

Unnamed: 0,Year,Month,Day,Maximum temperature (Degree C),Rainfall amount (millimetres),Daily global solar exposure (MJ/m*m)
count,10835.0,10835.0,10835.0,10815.0,10679.0,10425.0
mean,2005.335764,6.477711,15.728565,22.824595,3.322062,16.295127
std,8.565456,3.44019,8.799862,4.436295,10.885159,7.896226
min,1991.0,1.0,1.0,10.4,0.0,0.1
25%,1998.0,4.0,8.0,19.5,0.0,10.5
50%,2005.0,6.0,16.0,22.7,0.0,15.0
75%,2013.0,9.0,23.0,25.8,1.2,22.3
max,2020.0,12.0,31.0,45.8,264.0,34.9


Source of floor area mean -> Australian Bureau Of Statistics https://www.abs.gov.au/articles/average-floor-area-new-residential-dwellings

In [55]:
#Building area Means of different years
mean_area = [[2002, 245.1], [2003, 249.7], [2004, 247.8],[2005,259.7],[2006,270.4],[2007,250.2],[2008,265.2],[2009,257.8],[2010,255.4],[2011,260.5],
 [2012,256.3],[2013,243],[2014,	238.5],[2015,226.9],[2016,230.6],[2017,220.4],[2018,	221.8],[2019,	235],[2020,224.9]]
mean_area_df = pd.DataFrame(mean_area, columns=['Year', 'Area'])
mean_area_df.head()


Unnamed: 0,Year,Area
0,2002,245.1
1,2003,249.7
2,2004,247.8
3,2005,259.7
4,2006,270.4


In [56]:
def generate_bulding_area():
  num_increments = np.random.randint((1000 - 50) // 10)
  area = 50 + num_increments * 10
  return area

In [57]:
def generate_building_height():
  num_increments = np.random.randint((15 - 3) // 3 + 1)
  height = 3 + num_increments * 3
  return height

In [58]:
def generate_building_type():
  categories = ['Single house', 'Townhouse', 'Duplex', 'Apartment']
  cat = np.random.choice(categories)
  return cat

In [59]:
import random

def find_energy(area, height, b_type, temp, rain, solar):
    # Generate random weights for the input values within a narrower range
    weight1 = random.uniform(0.7, 1.3)
    weight2 = random.uniform(0.7, 1.3)
    weight3 = random.uniform(0.7, 1.3)
    weight4 = random.uniform(0.7, 1.3)
    weight5 = random.uniform(0.7, 1.3)

    # Calculate the weighted mean of the three values
    weighted_mean = (area * weight1 + height * weight2 + b_type * weight3 + rain * weight4 + solar * weight5 ) / (weight1 + weight2 + weight3)

    # Add a random offset to the weighted mean value within a narrower range
    random_offset = random.uniform(-10, 10)  # Adjusted range
    weighted_mean += random_offset

    # Round the weighted mean to the nearest multiple of 10 within the range of 50 to 300
    energy = max(min(round(weighted_mean, -1), 300), 50)
    return energy


In [60]:
gen_df = df

In [61]:
def add_building_area(row):
    row['Building Area'] = generate_bulding_area()
    return row
gen_df = gen_df.apply(add_building_area, axis=1)
gen_df

Unnamed: 0,Year,Month,Day,Maximum temperature (Degree C),Rainfall amount (millimetres),Daily global solar exposure (MJ/m*m),Building Area
0,1991.0,1.0,1.0,31.0,0.0,,920.0
1,1991.0,1.0,2.0,29.2,0.0,30.5,350.0
2,1991.0,1.0,3.0,29.9,0.0,33.3,180.0
3,1991.0,1.0,4.0,32.2,0.0,32.8,690.0
4,1991.0,1.0,5.0,35.6,0.0,20.8,620.0
...,...,...,...,...,...,...,...
10830,2020.0,8.0,26.0,19.6,0.0,16.7,260.0
10831,2020.0,8.0,27.0,22.2,0.0,16.8,140.0
10832,2020.0,8.0,28.0,19.7,0.0,17.0,390.0
10833,2020.0,8.0,29.0,21.5,0.0,17.1,60.0


In [62]:
def add_building_height(row):
    row['Building Height'] = generate_building_height()
    return row
gen_df = gen_df.apply(add_building_height, axis=1)
gen_df

Unnamed: 0,Year,Month,Day,Maximum temperature (Degree C),Rainfall amount (millimetres),Daily global solar exposure (MJ/m*m),Building Area,Building Height
0,1991.0,1.0,1.0,31.0,0.0,,920.0,12.0
1,1991.0,1.0,2.0,29.2,0.0,30.5,350.0,6.0
2,1991.0,1.0,3.0,29.9,0.0,33.3,180.0,12.0
3,1991.0,1.0,4.0,32.2,0.0,32.8,690.0,3.0
4,1991.0,1.0,5.0,35.6,0.0,20.8,620.0,3.0
...,...,...,...,...,...,...,...,...
10830,2020.0,8.0,26.0,19.6,0.0,16.7,260.0,3.0
10831,2020.0,8.0,27.0,22.2,0.0,16.8,140.0,3.0
10832,2020.0,8.0,28.0,19.7,0.0,17.0,390.0,9.0
10833,2020.0,8.0,29.0,21.5,0.0,17.1,60.0,6.0


In [63]:
def add_building_type(row):
    row['Building Type'] = generate_building_type()
    return row
gen_df = gen_df.apply(add_building_type, axis=1)
gen_df

Unnamed: 0,Year,Month,Day,Maximum temperature (Degree C),Rainfall amount (millimetres),Daily global solar exposure (MJ/m*m),Building Area,Building Height,Building Type
0,1991.0,1.0,1.0,31.0,0.0,,920.0,12.0,Duplex
1,1991.0,1.0,2.0,29.2,0.0,30.5,350.0,6.0,Townhouse
2,1991.0,1.0,3.0,29.9,0.0,33.3,180.0,12.0,Single house
3,1991.0,1.0,4.0,32.2,0.0,32.8,690.0,3.0,Apartment
4,1991.0,1.0,5.0,35.6,0.0,20.8,620.0,3.0,Townhouse
...,...,...,...,...,...,...,...,...,...
10830,2020.0,8.0,26.0,19.6,0.0,16.7,260.0,3.0,Duplex
10831,2020.0,8.0,27.0,22.2,0.0,16.8,140.0,3.0,Single house
10832,2020.0,8.0,28.0,19.7,0.0,17.0,390.0,9.0,Townhouse
10833,2020.0,8.0,29.0,21.5,0.0,17.1,60.0,6.0,Townhouse


In [75]:
# gen_df.drop(columns=['Buidling Type','Buidling Area','Buidling Height'],inplace=True)
gen_df = gen_df.fillna(df.mean())

In [76]:
def add_building_energy(row):
    if row['Building Type']=='Single house':
       a = 1
    if row['Building Type']=='Townhouse':
       a = 5
    if row['Building Type']=='Duplex':
       a = 2
    if row['Building Type']=='Apartment':
       a = 10
    row['Buidling Energy'] = find_energy(row['Building Area'],row['Building Height'],a, row['Maximum temperature (Degree C)'], row['Rainfall amount (millimetres)'],row['Daily global solar exposure (MJ/m*m)'])
    return row
gen_df = gen_df.apply(add_building_energy, axis=1)
gen_df

Unnamed: 0,Year,Month,Day,Maximum temperature (Degree C),Rainfall amount (millimetres),Daily global solar exposure (MJ/m*m),Building Area,Building Height,Building Type,Buidling Energy
0,1991.0,1.0,1.0,31.0,0.0,16.295127,920.0,12.0,Duplex,260.0
1,1991.0,1.0,2.0,29.2,0.0,30.500000,350.0,6.0,Townhouse,130.0
2,1991.0,1.0,3.0,29.9,0.0,33.300000,180.0,12.0,Single house,70.0
3,1991.0,1.0,4.0,32.2,0.0,32.800000,690.0,3.0,Apartment,190.0
4,1991.0,1.0,5.0,35.6,0.0,20.800000,620.0,3.0,Townhouse,220.0
...,...,...,...,...,...,...,...,...,...,...
10830,2020.0,8.0,26.0,19.6,0.0,16.700000,260.0,3.0,Duplex,80.0
10831,2020.0,8.0,27.0,22.2,0.0,16.800000,140.0,3.0,Single house,70.0
10832,2020.0,8.0,28.0,19.7,0.0,17.000000,390.0,9.0,Townhouse,140.0
10833,2020.0,8.0,29.0,21.5,0.0,17.100000,60.0,6.0,Townhouse,50.0


In [77]:
gen_df.describe()

Unnamed: 0,Year,Month,Day,Maximum temperature (Degree C),Rainfall amount (millimetres),Daily global solar exposure (MJ/m*m),Building Area,Building Height,Buidling Energy
count,10835.0,10835.0,10835.0,10835.0,10835.0,10835.0,10835.0,10835.0,10835.0
mean,2005.335764,6.477711,15.728565,22.824595,3.322062,16.295127,520.699585,9.020489,179.864329
std,8.565456,3.44019,8.799862,4.432198,10.806506,7.745373,274.765797,4.222375,85.299752
min,1991.0,1.0,1.0,10.4,0.0,0.1,50.0,3.0,50.0
25%,1998.0,4.0,8.0,19.5,0.0,10.7,290.0,6.0,100.0
50%,2005.0,6.0,16.0,22.7,0.0,15.4,520.0,9.0,180.0
75%,2013.0,9.0,23.0,25.8,1.4,21.9,760.0,12.0,260.0
max,2020.0,12.0,31.0,45.8,264.0,34.9,990.0,15.0,300.0


In [78]:
gen_df.shape

(10835, 10)

In [79]:
gen_df.isnull().sum()

Year                                    0
Month                                   0
Day                                     0
Maximum temperature (Degree C)          0
Rainfall amount (millimetres)           0
Daily global solar exposure (MJ/m*m)    0
Building Area                           0
Building Height                         0
Building Type                           0
Buidling Energy                         0
dtype: int64

In [82]:
final_df  = gen_df.drop(columns=['Year','Month','Day'])
final_df

Unnamed: 0,Maximum temperature (Degree C),Rainfall amount (millimetres),Daily global solar exposure (MJ/m*m),Building Area,Building Height,Building Type,Buidling Energy
0,31.0,0.0,16.295127,920.0,12.0,Duplex,260.0
1,29.2,0.0,30.500000,350.0,6.0,Townhouse,130.0
2,29.9,0.0,33.300000,180.0,12.0,Single house,70.0
3,32.2,0.0,32.800000,690.0,3.0,Apartment,190.0
4,35.6,0.0,20.800000,620.0,3.0,Townhouse,220.0
...,...,...,...,...,...,...,...
10830,19.6,0.0,16.700000,260.0,3.0,Duplex,80.0
10831,22.2,0.0,16.800000,140.0,3.0,Single house,70.0
10832,19.7,0.0,17.000000,390.0,9.0,Townhouse,140.0
10833,21.5,0.0,17.100000,60.0,6.0,Townhouse,50.0


##Synthetic Data Generation Using GANS