In [2]:
import boto3
from dotenv import load_dotenv
import os
# Load environment variables from .env file
load_dotenv()
# Replace these values
bucket_name = "my-feature-store-data"
file_path = "./historical_aqi_weather_data.json"
s3_key = "raw-data/historical_aqi_weather_data.json"


# Initialize the S3 client
s3 = boto3.client(
    's3',
    aws_access_key_id= os.environ.get('AWS_ACCESS_KEY_ID'),
    aws_secret_access_key=os.environ.get('AWS_SECRET_ACCESS_KEY'),
)
# download the json file
s3.download_file(bucket_name, s3_key, file_path)

print(f"✅ Uploaded {file_path} to s3://{bucket_name}/{s3_key}")


✅ Uploaded ./historical_aqi_weather_data.json to s3://my-feature-store-data/raw-data/historical_aqi_weather_data.json


In [3]:
import json
import pandas as pd

# Load JSON file
with open('./historical_aqi_weather_data.json', 'r') as f:
    data = json.load(f)

records = []

for item in data:
    weather_data = item['weather']
    aqi_info = item['aqi']
    aqi_list = aqi_info['list']

    if aqi_list:  # AQI data is available
        aqi_data = aqi_list[0]
        components = aqi_data['components']
        record = {
            'aqi_index': aqi_data['main']['aqi'],
            'aqi_timestamp': aqi_data['dt'],
            'co': components['co'],
            'no': components['no'],
            'no2': components['no2'],
            'o3': components['o3'],
            'so2': components['so2'],
            'pm2_5': components['pm2_5'],
            'pm10': components['pm10'],
            'nh3': components['nh3']
        }
    else:  # AQI data is missing
        record = {
            'aqi_index': None,
            'aqi_timestamp': None,
            'co': None,
            'no': None,
            'no2': None,
            'o3': None,
            'so2': None,
            'pm2_5': None,
            'pm10': None,
            'nh3': None,
        }

    # Add weather data
    record.update({
        'temperature_2m': weather_data['temperature_2m'],
        'relative_humidity_2m': weather_data['relative_humidity_2m'],
        'precipitation': weather_data['precipitation'],
        'wind_speed_10m': weather_data['wind_speed_10m'],
        'wind_direction_10m': weather_data['wind_direction_10m'],
        'surface_pressure': weather_data['surface_pressure'],
        'dew_point_2m': weather_data['dew_point_2m'],
        'apparent_temperature': weather_data['apparent_temperature'],
        'shortwave_radiation': weather_data['shortwave_radiation'],
        'et0_fao_evapotranspiration': weather_data['et0_fao_evapotranspiration'],
    })

    records.append(record)

# Convert to DataFrame
df = pd.DataFrame(records)




In [4]:
import datetime
# Convert 'aqi_timestamp' to datetime and extract individual components
df['year'] = df['aqi_timestamp'].apply(lambda x: datetime.datetime.utcfromtimestamp(x).year if pd.notna(x) else None)
df['month'] = df['aqi_timestamp'].apply(lambda x: datetime.datetime.utcfromtimestamp(x).month if pd.notna(x) else None)
df['day'] = df['aqi_timestamp'].apply(lambda x: datetime.datetime.utcfromtimestamp(x).day if pd.notna(x) else None)
df['hour'] = df['aqi_timestamp'].apply(lambda x: datetime.datetime.utcfromtimestamp(x).hour if pd.notna(x) else None)

# Print the result
print(df[['year', 'month', 'day', 'hour']].head())
# Print the result

df.drop(columns=['aqi_timestamp'], inplace=True)
print(df.head())


     year  month   day  hour
0  2024.0   10.0  22.0  19.0
1  2024.0   10.0  22.0  20.0
2  2024.0   10.0  22.0  21.0
3  2024.0   10.0  22.0  22.0
4  2024.0   10.0  22.0  23.0
   aqi_index       co    no    no2     o3    so2  pm2_5    pm10    nh3  \
0        4.0  1201.63  0.01  45.24  41.13   8.82  68.43  112.67  11.78   
1        4.0  1388.55  0.05  57.58  26.46   8.94  71.22  113.28  10.39   
2        5.0  1468.66  0.06  58.26  21.99  10.01  76.47  117.74  10.64   
3        5.0  1375.20  0.01  48.67  26.82  10.73  79.89  121.13  11.02   
4        4.0  1001.36  0.00  30.85  43.99   8.82  72.51  111.48   8.87   

   temperature_2m  ...  wind_direction_10m  surface_pressure  dew_point_2m  \
0            27.3  ...               314.0            1011.3          23.8   
1            26.8  ...               307.0            1011.2          24.0   
2            26.2  ...               323.0            1011.0          24.4   
3            26.0  ...               331.0            1010.9         

In [5]:
# Save to CSV
df.to_csv('aqi_weather_data.csv', index=False)
print("CSV file created: aqi_weather_data.csv")

CSV file created: aqi_weather_data.csv


In [6]:
print(df)

      aqi_index       co    no    no2     o3    so2  pm2_5    pm10    nh3  \
0           4.0  1201.63  0.01  45.24  41.13   8.82  68.43  112.67  11.78   
1           4.0  1388.55  0.05  57.58  26.46   8.94  71.22  113.28  10.39   
2           5.0  1468.66  0.06  58.26  21.99  10.01  76.47  117.74  10.64   
3           5.0  1375.20  0.01  48.67  26.82  10.73  79.89  121.13  11.02   
4           4.0  1001.36  0.00  30.85  43.99   8.82  72.51  111.48   8.87   
...         ...      ...   ...    ...    ...    ...    ...     ...    ...   
4336        4.0   113.63  0.01   0.05  72.76   0.43  44.26  161.20   0.00   
4337        4.0   113.63  0.01   0.05  72.76   0.43  44.26  161.20   0.00   
4338        4.0   113.97  0.01   0.07  68.93   0.48  44.69  162.71   0.00   
4339        4.0   109.04  0.01   0.06  67.06   0.27  44.82  155.90   0.00   
4340        3.0   105.50  0.00   0.08  58.63   0.39  18.45   58.66   0.00   

      temperature_2m  ...  wind_direction_10m  surface_pressure  dew_point_

In [7]:
import boto3
from dotenv import load_dotenv
import os
# Load environment variables from .env file
load_dotenv()
# Replace these values
bucket_name = "my-feature-store-data"
file_path = "./aqi_weather_data.csv"
s3_key = "raw-data/aqi_weather_data.csv"


# Initialize the S3 client
s3 = boto3.client(
    's3',
    aws_access_key_id= os.environ.get('AWS_ACCESS_KEY_ID'),
    aws_secret_access_key=os.environ.get('AWS_SECRET_ACCESS_KEY'),
)
# upload the csv file
s3.upload_file(file_path, bucket_name, s3_key)

print(f"✅ Uploaded {file_path} to s3://{bucket_name}/{s3_key}")


✅ Uploaded ./aqi_weather_data.csv to s3://my-feature-store-data/raw-data/aqi_weather_data.csv
