In [29]:
import pandas as pd
import datetime

In [30]:
df = pd.read_csv("datasets/hour.csv")

In [31]:
df.shape

(17379, 17)

In [32]:
df.head()

Unnamed: 0,instant,dteday,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
0,1,2011-01-01,1,0,1,0,0,6,0,1,0.24,0.2879,0.81,0.0,3,13,16
1,2,2011-01-01,1,0,1,1,0,6,0,1,0.22,0.2727,0.8,0.0,8,32,40
2,3,2011-01-01,1,0,1,2,0,6,0,1,0.22,0.2727,0.8,0.0,5,27,32
3,4,2011-01-01,1,0,1,3,0,6,0,1,0.24,0.2879,0.75,0.0,3,10,13
4,5,2011-01-01,1,0,1,4,0,6,0,1,0.24,0.2879,0.75,0.0,0,1,1


In [33]:
df.isna().any()

instant       False
dteday        False
season        False
yr            False
mnth          False
hr            False
holiday       False
weekday       False
workingday    False
weathersit    False
temp          False
atemp         False
hum           False
windspeed     False
casual        False
registered    False
cnt           False
dtype: bool

In [34]:
df['dteday'] = pd.to_datetime(df['dteday'])
df['dtime'] = df.apply(lambda row: datetime.datetime(year=row['dteday'].year, month=row['dteday'].month,
                                                      day=row['dteday'].day, hour=row['hr']), axis=1)



In [35]:
df = df.iloc[:,[0,1,17,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]]

In [36]:
df.dtypes

instant                int64
dteday        datetime64[ns]
dtime         datetime64[ns]
season                 int64
yr                     int64
mnth                   int64
hr                     int64
holiday                int64
weekday                int64
workingday             int64
weathersit             int64
temp                 float64
atemp                float64
hum                  float64
windspeed            float64
casual                 int64
registered             int64
cnt                    int64
dtype: object

In [37]:
df.rename(columns={'dteday': 'dte', 'season':'season_num'}, inplace=True)

In [38]:
df['season_name'] = df['season_num'].apply(lambda x: 
                                           'winter' if x == 1 else 
                                           'spring' if x == 2 else 
                                           'summer' if x == 3 else 
                                           'fall')

In [39]:
df['temp_celsius'] = (df['temp']*41).round()

In [40]:
df['temp_celsius_realfeel'] = (df['atemp']*50).round()

In [41]:
df['season_name'].value_counts()

summer    4496
spring    4409
winter    4242
fall      4232
Name: season_name, dtype: int64

In [42]:
seasons = df.groupby('season_name')

In [43]:
seasons['cnt'].mean()

season_name
fall      198.868856
spring    208.344069
summer    236.016237
winter    111.114569
Name: cnt, dtype: float64

In [44]:
import requests
import json 

class WeatherData():


    def __init__(self):

        self.base_url = "https://archive-api.open-meteo.com/v1/archive"
        self.coordinates = '?latitude=38.8951&longitude=-77.0364'
        self.timezone = '&timezone=America%2FNew_York'

    def get_weather(self, start_date = '2011-01-01', end_date = '2012-12-31', 
                    hourly_list=['precipitation,cloudcover,windspeed_10m','temperature_2m'],
                    daily_list = ['rain_sum,winddirection_10m_dominant']):
        
        timeframe = ('&start_date='+ start_date + '&end_date=' + end_date)
        hourly = ('&hourly=' + (",".join(hourly_list)))
        daily = ('&daily='+ ",".join((daily_list)))
        api_url = (self.base_url + self.coordinates + timeframe + hourly + daily + self.timezone)
        resp = requests.get(api_url)
        weather_data = resp.json()
        
        return weather_data

    
# api_url = ("https://archive-api.open-meteo.com/v1/archive")

# coordinates = '?latitude=38.8951&longitude=-77.0364'
# start_date = '2011-01-01'
# end_date = '2012-12-31'
# timeframe = '&start_date='+ start_date + '&end_date=' + end_date
# rest_const = '&hourly=precipitation,cloudcover,windspeed_10m&daily=rain_sum,winddirection_10m_dominant&timezone=America%2FNew_York'
# URL = api_url + coordinates + timeframe +rest_const

# resp = requests.get(URL)

dc_weather = WeatherData()
my_weather = dc_weather.get_weather()


In [45]:
def print_json_structure(data, indent=0):
    if isinstance(data, dict):
        for key, value in data.items():
            print(f"{' ' * indent}{key}:")
            print_json_structure(value, indent + 2)
    elif isinstance(data, list):
        for item in data:
            print_json_structure(item, indent)
    else:
        print(f"{' ' * indent}{data}")

# Convert JSON to Python object
data = json.loads(json.dumps(my_weather))

# Print the JSON structure
print_json_structure(my_weather)

latitude:
  38.90001
longitude:
  -77.0
generationtime_ms:
  3.6520957946777344
utc_offset_seconds:
  -14400
timezone:
  America/New_York
timezone_abbreviation:
  EDT
elevation:
  12.0
hourly_units:
  time:
    iso8601
  precipitation:
    mm
  cloudcover:
    %
  windspeed_10m:
    km/h
  temperature_2m:
    °C
hourly:
  time:
    2011-01-01T00:00
    2011-01-01T01:00
    2011-01-01T02:00
    2011-01-01T03:00
    2011-01-01T04:00
    2011-01-01T05:00
    2011-01-01T06:00
    2011-01-01T07:00
    2011-01-01T08:00
    2011-01-01T09:00
    2011-01-01T10:00
    2011-01-01T11:00
    2011-01-01T12:00
    2011-01-01T13:00
    2011-01-01T14:00
    2011-01-01T15:00
    2011-01-01T16:00
    2011-01-01T17:00
    2011-01-01T18:00
    2011-01-01T19:00
    2011-01-01T20:00
    2011-01-01T21:00
    2011-01-01T22:00
    2011-01-01T23:00
    2011-01-02T00:00
    2011-01-02T01:00
    2011-01-02T02:00
    2011-01-02T03:00
    2011-01-02T04:00
    2011-01-02T05:00
    2011-01-02T06:00
    2011-01-02T07:0

In [46]:
len(df['dte'])

17379

In [47]:
len(my_weather['hourly']['precipitation'])

17544

In [48]:
# Extract the precipitation data
hourly_precipitation = my_weather['hourly']['precipitation']

# Iterate over the rows in the DataFrame
for index, row in df.iterrows():
    precipitation_index = index  # Assuming the precipitation values are in the same order as the DataFrame rows
    precipitation_value = hourly_precipitation[precipitation_index]

    # Assign the precipitation value to the respective row in the DataFrame
    df.at[index, 'precipitation'] = precipitation_value

# Print the updated DataFrame
df.head()

Unnamed: 0,instant,dte,dtime,season_num,yr,mnth,hr,holiday,weekday,workingday,...,atemp,hum,windspeed,casual,registered,cnt,season_name,temp_celsius,temp_celsius_realfeel,precipitation
0,1,2011-01-01,2011-01-01 00:00:00,1,0,1,0,0,6,0,...,0.2879,0.81,0.0,3,13,16,winter,10.0,14.0,0.0
1,2,2011-01-01,2011-01-01 01:00:00,1,0,1,1,0,6,0,...,0.2727,0.8,0.0,8,32,40,winter,9.0,14.0,0.0
2,3,2011-01-01,2011-01-01 02:00:00,1,0,1,2,0,6,0,...,0.2727,0.8,0.0,5,27,32,winter,9.0,14.0,0.0
3,4,2011-01-01,2011-01-01 03:00:00,1,0,1,3,0,6,0,...,0.2879,0.75,0.0,3,10,13,winter,10.0,14.0,0.0
4,5,2011-01-01,2011-01-01 04:00:00,1,0,1,4,0,6,0,...,0.2879,0.75,0.0,0,1,1,winter,10.0,14.0,0.0


In [49]:
df['hr'].value_counts()

17    730
16    730
13    729
15    729
14    729
12    728
22    728
21    728
20    728
19    728
18    728
23    728
11    727
10    727
9     727
8     727
7     727
0     726
6     725
1     724
5     717
2     715
4     697
3     697
Name: hr, dtype: int64

In [50]:
my_weather['hourly'].keys()

dict_keys(['time', 'precipitation', 'cloudcover', 'windspeed_10m', 'temperature_2m'])

In [51]:
# Extract the precipitation data
hourly_temp = my_weather['hourly']['temperature_2m']

# Iterate over the rows in the DataFrame
for index, row in df.iterrows():
    temp_index = index  # Assuming the precipitation values are in the same order as the DataFrame rows
    temp_value = hourly_temp[temp_index]

    # Assign the precipitation value to the respective row in the DataFrame
    df.at[index, 'temperature_api_celcius'] = round(temp_value)

# Print the updated DataFrame
df.head()

Unnamed: 0,instant,dte,dtime,season_num,yr,mnth,hr,holiday,weekday,workingday,...,hum,windspeed,casual,registered,cnt,season_name,temp_celsius,temp_celsius_realfeel,precipitation,temperature_api_celcius
0,1,2011-01-01,2011-01-01 00:00:00,1,0,1,0,0,6,0,...,0.81,0.0,3,13,16,winter,10.0,14.0,0.0,2.0
1,2,2011-01-01,2011-01-01 01:00:00,1,0,1,1,0,6,0,...,0.8,0.0,8,32,40,winter,9.0,14.0,0.0,2.0
2,3,2011-01-01,2011-01-01 02:00:00,1,0,1,2,0,6,0,...,0.8,0.0,5,27,32,winter,9.0,14.0,0.0,2.0
3,4,2011-01-01,2011-01-01 03:00:00,1,0,1,3,0,6,0,...,0.75,0.0,3,10,13,winter,10.0,14.0,0.0,1.0
4,5,2011-01-01,2011-01-01 04:00:00,1,0,1,4,0,6,0,...,0.75,0.0,0,1,1,winter,10.0,14.0,0.0,1.0


In [52]:
df.groupby('dte').sum()

Unnamed: 0_level_0,instant,season_num,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt,temp_celsius,temp_celsius_realfeel,precipitation,temperature_api_celcius
dte,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2011-01-01,300,24,0,24,276,0,144,0,38,8.26,8.7270,19.34,3.8507,331,654,985,338.0,435.0,2.5,198.0
2011-01-02,828,23,0,23,271,0,0,0,45,8.36,8.1360,16.01,5.7164,131,670,801,344.0,407.0,3.0,203.0
2011-01-03,1287,22,0,22,271,0,22,22,22,4.32,4.1669,9.62,5.4628,120,1229,1349,179.0,209.0,0.0,6.0
2011-01-04,1863,23,0,23,273,0,46,23,24,4.60,4.8788,13.58,3.6868,108,1454,1562,191.0,247.0,0.0,-5.0
2011-01-05,2392,23,0,23,273,0,69,23,23,5.22,5.2732,10.05,4.2987,82,1518,1600,213.0,262.0,0.0,-6.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2012-12-27,414516,24,24,288,276,0,96,24,40,6.10,5.4394,15.67,8.4032,247,1867,2114,253.0,272.0,8.6,142.0
2012-12-28,415092,24,24,288,276,0,120,24,41,6.08,6.1211,14.16,3.7313,644,2451,3095,250.0,305.0,8.4,131.0
2012-12-29,415668,24,24,288,276,0,144,0,49,6.08,5.8176,18.07,2.9852,159,1182,1341,251.0,288.0,0.0,63.0
2012-12-30,416244,24,24,288,276,0,0,0,32,6.14,5.5608,11.60,8.4181,364,1432,1796,253.0,279.0,0.0,57.0


In [53]:
df.head()

Unnamed: 0,instant,dte,dtime,season_num,yr,mnth,hr,holiday,weekday,workingday,...,hum,windspeed,casual,registered,cnt,season_name,temp_celsius,temp_celsius_realfeel,precipitation,temperature_api_celcius
0,1,2011-01-01,2011-01-01 00:00:00,1,0,1,0,0,6,0,...,0.81,0.0,3,13,16,winter,10.0,14.0,0.0,2.0
1,2,2011-01-01,2011-01-01 01:00:00,1,0,1,1,0,6,0,...,0.8,0.0,8,32,40,winter,9.0,14.0,0.0,2.0
2,3,2011-01-01,2011-01-01 02:00:00,1,0,1,2,0,6,0,...,0.8,0.0,5,27,32,winter,9.0,14.0,0.0,2.0
3,4,2011-01-01,2011-01-01 03:00:00,1,0,1,3,0,6,0,...,0.75,0.0,3,10,13,winter,10.0,14.0,0.0,1.0
4,5,2011-01-01,2011-01-01 04:00:00,1,0,1,4,0,6,0,...,0.75,0.0,0,1,1,winter,10.0,14.0,0.0,1.0


In [54]:
len(my_weather['hourly']['precipitation'])

17544

In [55]:
len(df['dtime'])

17379