<a href="https://colab.research.google.com/github/MNagaHarshithRao/Environment-Impact-Assessment/blob/main/Synthetic_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd

In [None]:
date_range = pd.date_range(start='2020-01-01', end='2024-12-31')

In [None]:
# Generate synthetic data
np.random.seed(42)
temperature = np.random.normal(loc=20, scale=10, size=len(date_range))
humidity = np.random.normal(loc=60, scale=15, size=len(date_range))
wind_speed = np.random.normal(loc=15, scale=5, size=len(date_range))
aqi = np.random.normal(loc=50, scale=20, size=len(date_range))

In [None]:
# Ensure realistic bounds
temperature = np.clip(temperature, -30, 50)
humidity = np.clip(humidity, 0, 100)
wind_speed = np.clip(wind_speed, 0, 150)
aqi = np.clip(aqi, 0, 500)

In [None]:
# Calculate Impact Score
impact_score = (temperature - 20)**2 + (humidity - 60)**2 + (wind_speed - 15)**2 + (aqi - 50)**2
impact_score = np.clip(impact_score, 0, 1000)

In [None]:
# Create the DataFrame
environmental_data = pd.DataFrame({
    'Date': date_range,
    'Temperature (°C)': temperature,
    'Humidity (%)': humidity,
    'Wind Speed (km/h)': wind_speed,
    'Air Quality Index (AQI)': aqi,
    'Impact Score': impact_score
})

In [None]:
null_percentage = 0.05  ## 5%
for column in ['Temperature (°C)', 'Humidity (%)', 'Wind Speed (km/h)', 'Air Quality Index (AQI)', 'Impact Score']:
    num_nulls = int(null_percentage * len(environmental_data))
    null_indices = np.random.choice(environmental_data.index, num_nulls, replace=False)
    environmental_data.loc[null_indices, column] = np.nan

In [None]:
environmental_data.to_csv('environmental_data.csv', index = False)

In [None]:
# Display the first few rows of the dataset
environmental_data.head(10)

Unnamed: 0,Date,Temperature (°C),Humidity (%),Wind Speed (km/h),Air Quality Index (AQI),Impact Score
0,2020-01-01,24.967142,81.163986,15.746817,59.012261,554.36537
1,2020-01-02,18.617357,58.521178,16.820701,51.655458,10.154111
2,2020-01-03,26.476885,,15.035892,50.735657,42.572468
3,2020-01-04,35.230299,70.623216,4.567612,2.313864,1000.0
4,2020-01-05,17.658466,63.498242,,64.903017,240.464324
5,2020-01-06,17.65863,74.29705,26.062655,52.683328,339.470227
6,2020-01-07,35.792128,,13.199804,58.566841,344.571848
7,2020-01-08,27.674347,50.813439,,35.770756,359.01229
8,2020-01-09,15.305256,65.422553,16.866743,42.310862,114.052265
9,2020-01-10,25.4256,42.844107,16.561891,41.779501,393.777912


In [None]:
environmental_data.shape

(1827, 6)