### IMPORT PACKAGES

In [16]:
import pandas as pd
from dotenv import dotenv_values
import os
import json
import requests
import time
import datetime
import sql_functions
import psycopg2

### Create data frame from zip codes

In [2]:
file = pd.read_csv('/Users/p/Downloads/neueFische/crime_nerds/Data/Weather_Data/nyc-zip-codes.csv')
zip_df = pd.DataFrame(file)

### Get API key from .env file

In [3]:
from dotenv import load_dotenv
openweather_api_key = load_dotenv()

### Transform data frame into list

In [4]:
zips = zip_df.ZipCode.to_list()

### Define URL and API key

In [5]:
base_url = 'http://api.openweathermap.org/geo/1.0/zip'
api_key =  os.getenv('openweather_api_key')

In [7]:
# Create empty list to store geo data
geo_data = []

# Iterate over each zip code in the zips list and retrieve the geo data
for n in zips:
    url = f'{base_url}?zip={n}&appid={api_key}&units=metric'
    # API request
    response = requests.get(url)
    geo_dict = response.json()
    geo_data.append(geo_dict)

### Check out geo data

In [8]:
geo_data[:3]

[{'zip': '10453',
  'name': 'New York',
  'lat': 40.852,
  'lon': -73.9129,
  'country': 'US'},
 {'zip': '10457',
  'name': 'New York',
  'lat': 40.8486,
  'lon': -73.8999,
  'country': 'US'},
 {'zip': '10460',
  'name': 'New York',
  'lat': 40.8409,
  'lon': -73.8794,
  'country': 'US'}]

## Retrieve historical weather data with meteostat api

### Get API key from .env

In [9]:
from dotenv import load_dotenv

x_rapidapi_key = load_dotenv()
api_key_2 = os.getenv('x_rapidapi_key')

In [None]:
url = "https://meteostat.p.rapidapi.com/point/daily"

headers = {
			"X-RapidAPI-Key": api_key_2,
			"X-RapidAPI-Host": "meteostat.p.rapidapi.com"
}

# Define the start and end dates for the data you want to retrieve
start_date = "2015-01-01"
end_date = "2023-12-31"

df_1 = pd.DataFrame([])

# Extract latitude and longitude from the current dictionary
for location in geo_data:
    zip_code = str(location['zip'])
    latitude = str(location['lat'])
    longitude = str(location['lon'])

    querystring = {
        "lat": latitude,
        "lon": longitude,
        "start": start_date,
        "end": end_date,
    }

    response = requests.get(url, headers=headers, params=querystring)

    time.sleep(1) 

    weather = response.json()

    weather_df = pd.json_normalize(weather,
                                    sep='_',
                                    record_path='data',
                                    record_prefix="weather_",
                                    errors="ignore"
                                    )
    
    weather_df['lat'] = latitude
    weather_df['lon'] = longitude
    weather_df['zip_code'] = zip_code

    df_1 = pd.concat([df_1, weather_df], ignore_index=True)

In [12]:
weather

{'message': 'You have exceeded the MONTHLY quota for Requests on your current plan, BASIC. Upgrade your plan at https://rapidapi.com/meteostat/api/meteostat'}

In [60]:
final_weather_df = pd.concat([df_1,df_2], ignore_index=True)
final_weather_df

  final_weather_df = pd.concat([empty_df, empty_df_2], ignore_index=True)


Unnamed: 0,weather_date,weather_tavg,weather_tmin,weather_tmax,weather_prcp,weather_snow,weather_wdir,weather_wspd,weather_wpgt,weather_pres,weather_tsun,lat,lon
0,2006-01-01,3.0,0.0,5.6,0.0,0.0,,3.7,,1016.7,,40.852,-73.9129
1,2006-01-02,5.5,4.4,7.8,16.5,0.0,,7.2,,1021.3,,40.852,-73.9129
2,2006-01-03,2.8,1.7,4.4,26.7,0.0,35.0,23.0,,1013.0,,40.852,-73.9129
3,2006-01-04,1.1,-1.7,3.3,0.0,0.0,,11.1,,1015.2,,40.852,-73.9129
4,2006-01-05,6.0,2.8,10.0,0.5,0.0,241.0,8.2,,1005.3,,40.852,-73.9129
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1170167,2023-12-27,7.7,6.2,9.3,9.3,0.0,64.0,8.1,,1017.7,,40.6039,-74.1472
1170168,2023-12-28,10.4,8.7,12.0,38.7,0.0,23.0,11.9,,1005.5,,40.6039,-74.1472
1170169,2023-12-29,9.4,7.0,12.0,0.9,,301.0,8.4,,1004.8,,40.6039,-74.1472
1170170,2023-12-30,5.3,1.3,7.2,0.1,0.0,290.0,15.3,,1005.3,,40.6039,-74.1472


### Merge the data frames

In [61]:
#final_weather_df.to_csv('/Users/p/Downloads/neueFische/crime_nerds/Weather_Data/weather_data.csv', index=False)

In [17]:
geo_df = pd.DataFrame(geo_data)

In [19]:
geo_df.to_csv('/Users/p/Downloads/neueFische/crime_nerds/Data/Weather_Data/lon_lat_zip_data.csv', index=False)

# Data prep

In [71]:
final_weather_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1170172 entries, 0 to 1170171
Data columns (total 11 columns):
 #   Column        Non-Null Count    Dtype         
---  ------        --------------    -----         
 0   weather_date  1170172 non-null  datetime64[ns]
 1   weather_tavg  1168740 non-null  float64       
 2   weather_tmin  1170172 non-null  float64       
 3   weather_tmax  1170172 non-null  float64       
 4   weather_prcp  1170172 non-null  float64       
 5   weather_snow  1169992 non-null  float64       
 6   weather_wdir  853835 non-null   float64       
 7   weather_wspd  1170172 non-null  float64       
 8   weather_pres  1120297 non-null  float64       
 9   lat           1170172 non-null  object        
 10  lon           1170172 non-null  object        
dtypes: datetime64[ns](1), float64(8), object(2)
memory usage: 98.2+ MB


### Drop columns

In [67]:
final_weather_df.drop(['weather_wpgt','weather_tsun'], axis=1, inplace=True)

### Transform date into suiting dtype

In [70]:
final_weather_df.weather_date = pd.to_datetime(final_weather_df.weather_date)

### Rename columns

In [89]:
rename = {
    'weather_date': 'date',
    'weather_tavg': 'temp_avg',
    'weather_tmin': 'temp_min',
    'weather_tmax': 'temp_max',
    'weather_prcp': 'total_precipitation_mm',
    'weather_snow': 'snow_depth_mm',
    'weather_wdir': 'avg_wind_direction',
    'weather_wspd': 'wind_km/h',
    'weather_pres': 'air_pressure_hPa',
    'lat': 'lat',
    'lon': 'lon'
}

final_weather_df.rename(rename, axis=1, inplace=True)

In [90]:
final_weather_df

Unnamed: 0,date,temp_avg,temp_min,temp_max,total_precipitation_mm,snow_depth_mm,avg_wind_direction,wind_km/h,air_pressure_hPa,lat,lon
0,2006-01-01,3.0,0.0,5.6,0.0,0.0,,3.7,1016.7,40.852,-73.9129
1,2006-01-02,5.5,4.4,7.8,16.5,0.0,,7.2,1021.3,40.852,-73.9129
2,2006-01-03,2.8,1.7,4.4,26.7,0.0,35.0,23.0,1013.0,40.852,-73.9129
3,2006-01-04,1.1,-1.7,3.3,0.0,0.0,,11.1,1015.2,40.852,-73.9129
4,2006-01-05,6.0,2.8,10.0,0.5,0.0,241.0,8.2,1005.3,40.852,-73.9129
...,...,...,...,...,...,...,...,...,...,...,...
1170167,2023-12-27,7.7,6.2,9.3,9.3,0.0,64.0,8.1,1017.7,40.6039,-74.1472
1170168,2023-12-28,10.4,8.7,12.0,38.7,0.0,23.0,11.9,1005.5,40.6039,-74.1472
1170169,2023-12-29,9.4,7.0,12.0,0.9,,301.0,8.4,1004.8,40.6039,-74.1472
1170170,2023-12-30,5.3,1.3,7.2,0.1,0.0,290.0,15.3,1005.3,40.6039,-74.1472
