In [158]:
import pandas as pd
import numpy as np
import pytz as tz

In [159]:
from noaa_sdk import NOAA

n = NOAA()
res = n.get_forecasts('11365', 'US')

In [160]:
res_df = pd.DataFrame(res)

In [161]:
res_df[['startTime', 'endTime']]

Unnamed: 0,startTime,endTime
0,2023-07-28T14:00:00-04:00,2023-07-28T15:00:00-04:00
1,2023-07-28T15:00:00-04:00,2023-07-28T16:00:00-04:00
2,2023-07-28T16:00:00-04:00,2023-07-28T17:00:00-04:00
3,2023-07-28T17:00:00-04:00,2023-07-28T18:00:00-04:00
4,2023-07-28T18:00:00-04:00,2023-07-28T19:00:00-04:00
...,...,...
151,2023-08-03T21:00:00-04:00,2023-08-03T22:00:00-04:00
152,2023-08-03T22:00:00-04:00,2023-08-03T23:00:00-04:00
153,2023-08-03T23:00:00-04:00,2023-08-04T00:00:00-04:00
154,2023-08-04T00:00:00-04:00,2023-08-04T01:00:00-04:00


In [162]:
res_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 156 entries, 0 to 155
Data columns (total 16 columns):
 #   Column                      Non-Null Count  Dtype 
---  ------                      --------------  ----- 
 0   number                      156 non-null    int64 
 1   name                        156 non-null    object
 2   startTime                   156 non-null    object
 3   endTime                     156 non-null    object
 4   isDaytime                   156 non-null    bool  
 5   temperature                 156 non-null    int64 
 6   temperatureUnit             156 non-null    object
 7   temperatureTrend            0 non-null      object
 8   probabilityOfPrecipitation  156 non-null    object
 9   dewpoint                    156 non-null    object
 10  relativeHumidity            156 non-null    object
 11  windSpeed                   156 non-null    object
 12  windDirection               156 non-null    object
 13  icon                        156 non-null    object

In [163]:
res_df['startTime'] = pd.to_datetime(res_df['startTime'])
res_df['endTime'] = pd.to_datetime(res_df['endTime'])
res_df[['startTime', 'endTime']]

Unnamed: 0,startTime,endTime
0,2023-07-28 14:00:00-04:00,2023-07-28 15:00:00-04:00
1,2023-07-28 15:00:00-04:00,2023-07-28 16:00:00-04:00
2,2023-07-28 16:00:00-04:00,2023-07-28 17:00:00-04:00
3,2023-07-28 17:00:00-04:00,2023-07-28 18:00:00-04:00
4,2023-07-28 18:00:00-04:00,2023-07-28 19:00:00-04:00
...,...,...
151,2023-08-03 21:00:00-04:00,2023-08-03 22:00:00-04:00
152,2023-08-03 22:00:00-04:00,2023-08-03 23:00:00-04:00
153,2023-08-03 23:00:00-04:00,2023-08-04 00:00:00-04:00
154,2023-08-04 00:00:00-04:00,2023-08-04 01:00:00-04:00


In [164]:
res_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 156 entries, 0 to 155
Data columns (total 16 columns):
 #   Column                      Non-Null Count  Dtype                                 
---  ------                      --------------  -----                                 
 0   number                      156 non-null    int64                                 
 1   name                        156 non-null    object                                
 2   startTime                   156 non-null    datetime64[ns, pytz.FixedOffset(-240)]
 3   endTime                     156 non-null    datetime64[ns, pytz.FixedOffset(-240)]
 4   isDaytime                   156 non-null    bool                                  
 5   temperature                 156 non-null    int64                                 
 6   temperatureUnit             156 non-null    object                                
 7   temperatureTrend            0 non-null      object                                
 8   probabilit

In [165]:
# show current timezones used in the dataframe
res_df['startTime'][0].tzinfo

pytz.FixedOffset(-240)

In [166]:
# convert to UTC
res_df['startTime'] = res_df['startTime'].dt.tz_convert(tz.UTC)
res_df['endTime'] = res_df['endTime'].dt.tz_convert(tz.UTC)

In [167]:
precent_prec = res_df['probabilityOfPrecipitation']

In [168]:
precent_prec

0       {'unitCode': 'wmoUnit:percent', 'value': 2}
1       {'unitCode': 'wmoUnit:percent', 'value': 2}
2       {'unitCode': 'wmoUnit:percent', 'value': 2}
3       {'unitCode': 'wmoUnit:percent', 'value': 2}
4       {'unitCode': 'wmoUnit:percent', 'value': 2}
                           ...                     
151    {'unitCode': 'wmoUnit:percent', 'value': 37}
152    {'unitCode': 'wmoUnit:percent', 'value': 37}
153    {'unitCode': 'wmoUnit:percent', 'value': 37}
154    {'unitCode': 'wmoUnit:percent', 'value': 37}
155    {'unitCode': 'wmoUnit:percent', 'value': 37}
Name: probabilityOfPrecipitation, Length: 156, dtype: object

In [169]:
# unstack the dataframe
precent_prec = precent_prec.apply(pd.Series)

In [170]:
precent_prec

Unnamed: 0,unitCode,value
0,wmoUnit:percent,2
1,wmoUnit:percent,2
2,wmoUnit:percent,2
3,wmoUnit:percent,2
4,wmoUnit:percent,2
...,...,...
151,wmoUnit:percent,37
152,wmoUnit:percent,37
153,wmoUnit:percent,37
154,wmoUnit:percent,37


In [171]:
res_df.head()

Unnamed: 0,number,name,startTime,endTime,isDaytime,temperature,temperatureUnit,temperatureTrend,probabilityOfPrecipitation,dewpoint,relativeHumidity,windSpeed,windDirection,icon,shortForecast,detailedForecast
0,1,,2023-07-28 18:00:00+00:00,2023-07-28 19:00:00+00:00,True,91,F,,"{'unitCode': 'wmoUnit:percent', 'value': 2}","{'unitCode': 'wmoUnit:degC', 'value': 21.11111...","{'unitCode': 'wmoUnit:percent', 'value': 51}",8 mph,SW,"https://api.weather.gov/icons/land/day/few,2?s...",Sunny,
1,2,,2023-07-28 19:00:00+00:00,2023-07-28 20:00:00+00:00,True,92,F,,"{'unitCode': 'wmoUnit:percent', 'value': 2}","{'unitCode': 'wmoUnit:degC', 'value': 21.11111...","{'unitCode': 'wmoUnit:percent', 'value': 49}",9 mph,SW,"https://api.weather.gov/icons/land/day/few,2?s...",Sunny,
2,3,,2023-07-28 20:00:00+00:00,2023-07-28 21:00:00+00:00,True,93,F,,"{'unitCode': 'wmoUnit:percent', 'value': 2}","{'unitCode': 'wmoUnit:degC', 'value': 21.11111...","{'unitCode': 'wmoUnit:percent', 'value': 47}",9 mph,SW,"https://api.weather.gov/icons/land/day/few,2?s...",Sunny,
3,4,,2023-07-28 21:00:00+00:00,2023-07-28 22:00:00+00:00,True,92,F,,"{'unitCode': 'wmoUnit:percent', 'value': 2}","{'unitCode': 'wmoUnit:degC', 'value': 21.11111...","{'unitCode': 'wmoUnit:percent', 'value': 49}",10 mph,SW,"https://api.weather.gov/icons/land/day/few,2?s...",Sunny,
4,5,,2023-07-28 22:00:00+00:00,2023-07-28 23:00:00+00:00,False,91,F,,"{'unitCode': 'wmoUnit:percent', 'value': 2}","{'unitCode': 'wmoUnit:degC', 'value': 21.66666...","{'unitCode': 'wmoUnit:percent', 'value': 52}",9 mph,SW,"https://api.weather.gov/icons/land/night/few,2...",Mostly Clear,


In [173]:
# itterate over the columns in res_df and un-stack columns with dict values
for col in res_df.columns:
    if isinstance(res_df[col][0], dict):
        print(col)
        # create a new dataframe with the dict values
        col_df = res_df[col].apply(pd.Series)
        # name the unitCode column to the original column name
        unit = col_df['unitCode'][0].split(':')[-1]
        col_df.rename(columns={'value': f'{col}_{unit}'}, inplace=True)
        # append the new dataframe to the original dataframe
        res_df = pd.concat([res_df, col_df[f'{col}_{unit}']], axis=1)
        # drop the original column
        res_df.drop(col, axis=1, inplace=True)

res_df.head()

probabilityOfPrecipitation
dewpoint
relativeHumidity


Unnamed: 0,number,name,startTime,endTime,isDaytime,temperature,temperatureUnit,temperatureTrend,windSpeed,windDirection,icon,shortForecast,detailedForecast,probabilityOfPrecipitation_percent,dewpoint_degC,relativeHumidity_percent
0,1,,2023-07-28 18:00:00+00:00,2023-07-28 19:00:00+00:00,True,91,F,,8 mph,SW,"https://api.weather.gov/icons/land/day/few,2?s...",Sunny,,2,21.111111,51
1,2,,2023-07-28 19:00:00+00:00,2023-07-28 20:00:00+00:00,True,92,F,,9 mph,SW,"https://api.weather.gov/icons/land/day/few,2?s...",Sunny,,2,21.111111,49
2,3,,2023-07-28 20:00:00+00:00,2023-07-28 21:00:00+00:00,True,93,F,,9 mph,SW,"https://api.weather.gov/icons/land/day/few,2?s...",Sunny,,2,21.111111,47
3,4,,2023-07-28 21:00:00+00:00,2023-07-28 22:00:00+00:00,True,92,F,,10 mph,SW,"https://api.weather.gov/icons/land/day/few,2?s...",Sunny,,2,21.111111,49
4,5,,2023-07-28 22:00:00+00:00,2023-07-28 23:00:00+00:00,False,91,F,,9 mph,SW,"https://api.weather.gov/icons/land/night/few,2...",Mostly Clear,,2,21.666667,52


In [None]:
dewpoint_df = res_df['dewpoint'].apply(pd.Series)
probabilityOfPrecipitation_df = res_df['probabilityOfPrecipitation'].apply(pd.Series)
relativeHumidity_df = res_df['relativeHumidity'].apply(pd.Series)

In [174]:
res_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 156 entries, 0 to 155
Data columns (total 16 columns):
 #   Column                              Non-Null Count  Dtype              
---  ------                              --------------  -----              
 0   number                              156 non-null    int64              
 1   name                                156 non-null    object             
 2   startTime                           156 non-null    datetime64[ns, UTC]
 3   endTime                             156 non-null    datetime64[ns, UTC]
 4   isDaytime                           156 non-null    bool               
 5   temperature                         156 non-null    int64              
 6   temperatureUnit                     156 non-null    object             
 7   temperatureTrend                    0 non-null      object             
 8   windSpeed                           156 non-null    object             
 9   windDirection                       156 non

In [None]:
dewpoint_df

Unnamed: 0,unitCode,value
0,wmoUnit:degC,21.111111
1,wmoUnit:degC,21.111111
2,wmoUnit:degC,21.111111
3,wmoUnit:degC,21.111111
4,wmoUnit:degC,21.666667
...,...,...
151,wmoUnit:degC,17.222222
152,wmoUnit:degC,17.777778
153,wmoUnit:degC,17.777778
154,wmoUnit:degC,17.777778


In [None]:
probabilityOfPrecipitation_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 156 entries, 0 to 155
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   unitCode  156 non-null    object
 1   value     156 non-null    int64 
dtypes: int64(1), object(1)
memory usage: 2.6+ KB


In [None]:
relativeHumidity_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 156 entries, 0 to 155
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   unitCode  156 non-null    object
 1   value     156 non-null    int64 
dtypes: int64(1), object(1)
memory usage: 2.6+ KB
