In [3]:
#Trying to use the meteostat library to pull date time specific data for a city

#!pip install meteostat




In [1]:
# Importing specific files that are necessary for data
import pandas as pd
from meteostat import Stations
from datetime import datetime
from meteostat import Hourly


In [2]:
# Set time period
start = datetime(2024, 1, 1)
end = datetime(2024, 12, 31, 23, 59)

# Get hourly data for New York/ Wall Street ICOA Weather Station
data = Hourly('KJRB0', start, end)
data = data.fetch()

# Convert data to dataframe for massaging
wallstreet_temp_df = pd.DataFrame(data)
print(wallstreet_temp_df.head())
print(wallstreet_temp_df.info())


                     temp  dwpt  rhum  prcp  snow   wdir  wspd  wpgt    pres  \
time                                                                           
2024-01-01 00:00:00   6.0  -1.9  57.0   0.0   NaN  260.0  11.0   NaN  1017.0   
2024-01-01 01:00:00   5.6  -2.3  57.0   0.0   NaN  260.0  11.2   NaN  1016.4   
2024-01-01 02:00:00   5.6  -1.8  59.0   0.0   NaN  260.0   9.4   NaN  1016.4   
2024-01-01 03:00:00   5.6  -1.1  62.0   0.0   NaN  250.0   9.4   NaN  1016.4   
2024-01-01 04:00:00   5.6  -0.7  64.0   0.0   NaN  260.0   9.4   NaN  1016.5   

                     tsun  coco  
time                             
2024-01-01 00:00:00   NaN   3.0  
2024-01-01 01:00:00   NaN   3.0  
2024-01-01 02:00:00   NaN   3.0  
2024-01-01 03:00:00   NaN   3.0  
2024-01-01 04:00:00   NaN   3.0  
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 8784 entries, 2024-01-01 00:00:00 to 2024-12-31 23:00:00
Data columns (total 11 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------

In [3]:
# Reduce dataset to only features of interest, Precipitation & Temperature
wallstreet_temp_df = wallstreet_temp_df[['temp', 'prcp']]
wallstreet_temp_df.head()

Unnamed: 0_level_0,temp,prcp
time,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-01-01 00:00:00,6.0,0.0
2024-01-01 01:00:00,5.6,0.0
2024-01-01 02:00:00,5.6,0.0
2024-01-01 03:00:00,5.6,0.0
2024-01-01 04:00:00,5.6,0.0


In [4]:
# Converting Temp from Celcius to Fahrenheit
tempf = (wallstreet_temp_df['temp']*9/5)+32
wallstreet_temp_df['temp(f)'] = tempf
wallstreet_temp_df.head()

Unnamed: 0_level_0,temp,prcp,temp(f)
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-01-01 00:00:00,6.0,0.0,42.8
2024-01-01 01:00:00,5.6,0.0,42.08
2024-01-01 02:00:00,5.6,0.0,42.08
2024-01-01 03:00:00,5.6,0.0,42.08
2024-01-01 04:00:00,5.6,0.0,42.08


In [5]:
# Dropping 'temp' in celcius as its not needed
wallstreet_temp_df = wallstreet_temp_df.drop('temp', axis=1)
wallstreet_temp_df

Unnamed: 0_level_0,prcp,temp(f)
time,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-01-01 00:00:00,0.0,42.80
2024-01-01 01:00:00,0.0,42.08
2024-01-01 02:00:00,0.0,42.08
2024-01-01 03:00:00,0.0,42.08
2024-01-01 04:00:00,0.0,42.08
...,...,...
2024-12-31 19:00:00,0.0,53.06
2024-12-31 20:00:00,0.0,53.06
2024-12-31 21:00:00,0.0,50.00
2024-12-31 22:00:00,0.0,48.02


In [6]:
#checking to see if we have any null values in Preciptiation & Temp
print(wallstreet_temp_df.info())
print(wallstreet_temp_df['prcp'].unique())


<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 8784 entries, 2024-01-01 00:00:00 to 2024-12-31 23:00:00
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   prcp     8523 non-null   float64
 1   temp(f)  8739 non-null   float64
dtypes: float64(2)
memory usage: 205.9 KB
None
[ 0.   0.4  1.2  1.8  2.8  2.5  1.7  1.3  0.5  0.1  0.2  0.3  0.7  1.5
  0.9  3.4  4.5  2.3  3.5  4.4  3.8  7.4 10.4  1.9  0.6  0.8  2.7  2.
  4.   9.5 14.4 13.3  1.   1.1  3.   1.4  1.6  2.6  2.2  3.9  5.   3.2
  3.6  5.9  9.   4.6  3.1  5.7  4.7  7.1  8.1  6.1  8.9 11.3  5.8  2.1
  2.9  6.2  2.4  7.8  4.9  4.2  3.7  5.5  6.   4.8  5.3  8.3  6.4  3.3
  6.9  nan  5.6]


In [7]:
# since we have some 'nan' values in Precipitation, we'll replace those with 0
wallstreet_temp_df['prcp'] = wallstreet_temp_df['prcp'].fillna(0)


In [8]:
# since we have 'nan' values in Temperature, we'll replace them with an average temperature of 65 deg
wallstreet_temp_df['temp(f)']=wallstreet_temp_df['temp(f)'].fillna(65)


In [9]:
wallstreet_temp_df.info()
wallstreet_temp_df.sort_index

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 8784 entries, 2024-01-01 00:00:00 to 2024-12-31 23:00:00
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   prcp     8784 non-null   float64
 1   temp(f)  8784 non-null   float64
dtypes: float64(2)
memory usage: 205.9 KB


<bound method DataFrame.sort_index of                      prcp  temp(f)
time                              
2024-01-01 00:00:00   0.0    42.80
2024-01-01 01:00:00   0.0    42.08
2024-01-01 02:00:00   0.0    42.08
2024-01-01 03:00:00   0.0    42.08
2024-01-01 04:00:00   0.0    42.08
...                   ...      ...
2024-12-31 19:00:00   0.0    53.06
2024-12-31 20:00:00   0.0    53.06
2024-12-31 21:00:00   0.0    50.00
2024-12-31 22:00:00   0.0    48.02
2024-12-31 23:00:00   0.0    48.02

[8784 rows x 2 columns]>

In [10]:
# converting to csv
wallstreet_temp_df.to_csv('NewYorkCity_Temperature',index=True)
