# Important Libraries 

In [1]:
import pandas as pd  ## Used for data manipulation
import numpy as np   ## Used for numerical data

### Date Range

In [2]:
# Define the date range
date_range = pd.date_range(start='2020-01-01', end='2023-12-31')

### Generate synthetic data using `normal (Gaussian) distribution`

In [8]:
# Generate synthetic data
np.random.seed(42)
temperature = np.random.normal(loc=20, scale=10, size=len(date_range))  
humidity = np.random.normal(loc=60, scale=15, size=len(date_range))     
wind_speed = np.random.normal(loc=15, scale=5, size=len(date_range))    
aqi = np.random.normal(loc=50, scale=20, size=len(date_range))         

### Realistic Bounds

In [7]:
# Ensure realistic bounds
temperature = np.clip(temperature, -30, 50)
humidity = np.clip(humidity, 0, 100)
wind_speed = np.clip(wind_speed, 0, 150)
aqi = np.clip(aqi, 0, 500)

### Impact Score 

In [9]:
# Calculate Impact Score 
impact_score = (temperature - 20)**2 + (humidity - 60)**2 + (wind_speed - 15)**2 + (aqi - 50)**2
impact_score = np.clip(impact_score, 0, 1000)

### DataFrame

In [10]:
# Create the DataFrame
environmental_data = pd.DataFrame({
    'Date': date_range,
    'Temperature (°C)': temperature,
    'Humidity (%)': humidity,
    'Wind Speed (km/h)': wind_speed,
    'Air Quality Index (AQI)': aqi,
    'Impact Score': impact_score
})

### Add Some Null Values

In [12]:
null_percentage = 0.05  ## 5%
for column in ['Temperature (°C)', 'Humidity (%)', 'Wind Speed (km/h)', 'Air Quality Index (AQI)', 'Impact Score']:
    num_nulls = int(null_percentage * len(environmental_data))
    null_indices = np.random.choice(environmental_data.index, num_nulls, replace=False)
    environmental_data.loc[null_indices, column] = np.nan

### Save Data In `CSV File`

In [14]:
environmental_data.to_csv('environmental_data.csv', index = False)

In [15]:
# Display the first few rows of the dataset
environmental_data.head(10)

Unnamed: 0,Date,Temperature (°C),Humidity (%),Wind Speed (km/h),Air Quality Index (AQI),Impact Score
0,2020-01-01,24.967142,41.635526,14.026514,53.958333,378.542462
1,2020-01-02,,56.864651,14.679059,37.850554,159.454149
2,2020-01-03,26.476885,47.242193,15.458762,41.91276,
3,2020-01-04,,51.292148,16.261202,36.274417,497.770928
4,2020-01-05,17.658466,68.828676,14.418855,21.346574,904.784841
5,2020-01-06,17.65863,85.048568,16.073566,,
6,2020-01-07,35.792128,65.920073,22.89059,,483.730163
7,2020-01-08,27.674347,42.061754,19.927248,60.297672,510.996097
8,2020-01-09,15.305256,66.66904,19.345773,66.398505,354.313425
9,2020-01-10,25.4256,77.949472,12.722302,56.486804,398.887231


In [16]:
environmental_data.shape

(1461, 6)