In [2]:
import pandas as pd
from pipeline import ingest, transform, utils, resample
from config.config import EDINBURGH_COLUMNS, EDINBURGH_DATA, STRATHSPEY_COLUMNS, STRATHSPEY_DATA, LONDON_DATA, ROYSTON_COLUMNS, ROYSTON_DATA
from pipeline.ingest import load_sheets
from pipeline.transform import clean_daytime_sheets
from pipeline.transform import clean_weather_sheets
from pipeline.utils import merge_datasets
from pipeline.utils import get_mins_from_time

In [3]:
edin_sheets = load_sheets(EDINBURGH_DATA)
edin_df = clean_daytime_sheets(edin_sheets, EDINBURGH_COLUMNS)
edin_df.head(10)

Unnamed: 0,date,sunrise,sunset,daylength,daylength_diff,astro_twilight_start,astro_twilight_end,nautical_twilight_start,nautical_twilight_end,civil_twilight_start,civil_twilight_end,solar_noon_time,solar_noon_dist
0,2011-12-01,08:18,15:44,07:25:45,-2:37,06:00:00,18:02:00,06:45:00,17:17:00,07:33:00,16:29:00,12:01,147.52
1,2011-12-02,08:20,15:43,07:23:13,-2:31,06:02:00,18:01:00,06:46:00,17:16:00,07:34:00,16:28:00,12:02,147.495
2,2011-12-03,08:21,15:42,07:20:48,-2:25,06:03:00,18:01:00,06:48:00,17:16:00,07:36:00,16:28:00,12:02,147.471
3,2011-12-04,08:23,15:41,07:18:29,-2:18,06:04:00,18:00:00,06:49:00,17:15:00,07:37:00,16:27:00,12:02,147.447
4,2011-12-05,08:24,15:41,07:16:17,-2:12,06:05:00,18:00:00,06:50:00,17:15:00,07:39:00,16:27:00,12:03,147.425
5,2011-12-06,08:26,15:40,07:14:11,-2:05,06:06:00,18:00:00,06:52:00,17:14:00,07:40:00,16:26:00,12:03,147.403
6,2011-12-07,08:27,15:40,07:12:12,-1:58,06:07:00,17:59:00,06:53:00,17:14:00,07:41:00,16:26:00,12:04,147.381
7,2011-12-08,08:29,15:39,07:10:20,-1:51,06:09:00,17:59:00,06:54:00,17:14:00,07:42:00,16:25:00,12:04,147.361
8,2011-12-09,08:30,15:39,07:08:36,-1:44,06:10:00,17:59:00,06:55:00,17:14:00,07:44:00,16:25:00,12:04,147.342
9,2011-12-10,08:31,15:38,07:06:58,-1:37,06:11:00,17:59:00,06:56:00,17:14:00,07:45:00,16:25:00,12:05,147.323


In [4]:
strat_sheets = load_sheets(STRATHSPEY_DATA)
strat_df = clean_weather_sheets(strat_sheets, STRATHSPEY_COLUMNS, drop_n = 5, drop_start=True)
strat_df.head(10)

Unnamed: 0,date,temp_c_mean,temp_c_min,temp_c_max,rain_mm,pressure_mb_am,pressure_mb_pm,wind_mean_mph,wind_max_mph,wind_direction,sun_hours,snow
0,2011-12-01,3.6,-0.4,4.6,0.0,995,1004,4.1,36,SSW,2.7,
1,2011-12-02,2.6,-1.5,7.5,5.5,1003,983,6.5,36,SSW,1.8,
2,2011-12-03,2.4,0.4,4.4,,986,988,9.5,46,SSW,0.75,6.0
3,2011-12-04,-1.6,-0.9,0.4,,988,992,4.5,22,SSW,1.66,3.7
4,2011-12-05,-1.6,-4.2,0.2,,994,998,2.8,17,SSW,1.08,2.9
5,2011-12-06,-2.7,-2.8,1.2,,998,992,37.0,19,WSW,2.92,5.1
6,2011-12-07,0.2,-5.3,3.8,,989,1005,6.1,39,SSW,0.15,8.6
7,2011-12-08,2.4,-0.8,6.7,14.6,979,981,15.9,62,SSW,0.72,
8,2011-12-09,-0.8,0.9,1.4,,1000,1007,6.7,34,WSW,0.75,2.6
9,2011-12-10,-3.2,-8.0,3.5,,1008,1001,1.0,10,SSE,1.46,0.5


In [5]:
scottish_merged = merge_datasets(edin_df, strat_df)
scottish_merged.to_csv('outputs/scottish_weather_data.csv', index=False)
scottish_merged.head()

Unnamed: 0,date,sunrise,sunset,daylength,daylength_diff,astro_twilight_start,astro_twilight_end,nautical_twilight_start,nautical_twilight_end,civil_twilight_start,...,temp_c_min,temp_c_max,rain_mm,pressure_mb_am,pressure_mb_pm,wind_mean_mph,wind_max_mph,wind_direction,sun_hours,snow
0,2011-12-01,08:18,15:44,07:25:45,-2:37,06:00:00,18:02:00,06:45:00,17:17:00,07:33:00,...,-0.4,4.6,0.0,995,1004,4.1,36,SSW,2.7,
1,2011-12-02,08:20,15:43,07:23:13,-2:31,06:02:00,18:01:00,06:46:00,17:16:00,07:34:00,...,-1.5,7.5,5.5,1003,983,6.5,36,SSW,1.8,
2,2011-12-03,08:21,15:42,07:20:48,-2:25,06:03:00,18:01:00,06:48:00,17:16:00,07:36:00,...,0.4,4.4,,986,988,9.5,46,SSW,0.75,6.0
3,2011-12-04,08:23,15:41,07:18:29,-2:18,06:04:00,18:00:00,06:49:00,17:15:00,07:37:00,...,-0.9,0.4,,988,992,4.5,22,SSW,1.66,3.7
4,2011-12-05,08:24,15:41,07:16:17,-2:12,06:05:00,18:00:00,06:50:00,17:15:00,07:39:00,...,-4.2,0.2,,994,998,2.8,17,SSW,1.08,2.9


In [6]:
scottish_merged_filt = utils.filter_by_year(scottish_merged, 2012)
scottish_minute_temp = resample.generate_minute_estimates(scottish_merged_filt)
scottish_minute_temp.to_csv('outputs/scottish_minute_temp.csv', index=False)
scottish_minute_temp.head()

Unnamed: 0,datetime,estimated_temp
0,2012-01-01 00:00:00,2.612
1,2012-01-01 00:01:00,2.611
2,2012-01-01 00:02:00,2.61
3,2012-01-01 00:03:00,2.61
4,2012-01-01 00:04:00,2.609


In [7]:
london_sheets = ingest.load_sheets(LONDON_DATA)
london_df = transform.clean_daytime_sheets(london_sheets, EDINBURGH_COLUMNS)
london_df.head()

Unnamed: 0,date,sunrise,sunset,daylength,daylength_diff,astro_twilight_start,astro_twilight_end,nautical_twilight_start,nautical_twilight_end,civil_twilight_start,civil_twilight_end,solar_noon_time,solar_noon_dist
0,2011-12-01,07:43,15:55,08:11:59,-2:05,05:41:00,17:56:00,06:22:00,17:16:00,07:04:00,16:34:00,11:49,147.52
1,2011-12-02,07:44,15:54,08:09:58,-2:00,05:42:00,17:56:00,06:23:00,17:16:00,07:05:00,16:33:00,11:49,147.495
2,2011-12-03,07:45,15:54,08:08:02,-1:55,05:44:00,17:55:00,06:24:00,17:15:00,07:06:00,16:33:00,11:50,147.471
3,2011-12-04,07:47,15:53,08:06:12,-1:50,05:45:00,17:55:00,06:25:00,17:15:00,07:08:00,16:32:00,11:50,147.448
4,2011-12-05,07:48,15:53,08:04:27,-1:45,05:46:00,17:55:00,06:26:00,17:15:00,07:09:00,16:32:00,11:51,147.425


In [8]:
royston_sheets = ingest.load_sheets(ROYSTON_DATA)
royston_df = transform.clean_weather_sheets(royston_sheets, ROYSTON_COLUMNS, 3, drop_start=False)
royston_df.head()

Unnamed: 0,date,temp_c_max,temp_c_min,grass_temp_c_min,earth_temp_c,sun_hours,rain_mm,wind_direction,wind_mean_mph,wind_max_mph,...,pressure_mb_mean,humid_mean,temp_c_mean,fog_vis_1000,fog_vis_200,snow,snow_lying,hail,thunder,date1
0,2011-12-01,9.5,8.6,7.4,8.3,1.5,0.1,W,6.1,32,...,1006.0,84.1,8.6,0,0,,0,0,0,1
1,2011-12-02,10.5,2.3,-1.0,8.0,4.7,2.1,SW,8.3,34,...,1010.6,86.2,4.9,0,0,,0,0,0,2
2,2011-12-03,10.8,6.8,5.6,7.7,6.2,0.0,W,11.8,35,...,1001.6,80.9,8.8,0,0,,0,0,0,3
3,2011-12-04,8.1,6.0,4.2,7.5,0.4,0.7,SW,9.9,33,...,999.5,81.0,6.2,0,0,,0,0,0,4
4,2011-12-05,6.0,1.5,-0.9,6.8,3.6,0.0,SW,11.5,29,...,1004.1,81.0,3.2,0,0,,0,0,0,5


In [9]:
london_merged = utils.merge_datasets(london_df, royston_df)
london_merged.to_csv('outputs/london_weather_data.csv', index=False)
london_merged.head()

Unnamed: 0,date,sunrise,sunset,daylength,daylength_diff,astro_twilight_start,astro_twilight_end,nautical_twilight_start,nautical_twilight_end,civil_twilight_start,...,pressure_mb_mean,humid_mean,temp_c_mean,fog_vis_1000,fog_vis_200,snow,snow_lying,hail,thunder,date1
0,2011-12-01,07:43,15:55,08:11:59,-2:05,05:41:00,17:56:00,06:22:00,17:16:00,07:04:00,...,1006.0,84.1,8.6,0,0,,0,0,0,1
1,2011-12-02,07:44,15:54,08:09:58,-2:00,05:42:00,17:56:00,06:23:00,17:16:00,07:05:00,...,1010.6,86.2,4.9,0,0,,0,0,0,2
2,2011-12-03,07:45,15:54,08:08:02,-1:55,05:44:00,17:55:00,06:24:00,17:15:00,07:06:00,...,1001.6,80.9,8.8,0,0,,0,0,0,3
3,2011-12-04,07:47,15:53,08:06:12,-1:50,05:45:00,17:55:00,06:25:00,17:15:00,07:08:00,...,999.5,81.0,6.2,0,0,,0,0,0,4
4,2011-12-05,07:48,15:53,08:04:27,-1:45,05:46:00,17:55:00,06:26:00,17:15:00,07:09:00,...,1004.1,81.0,3.2,0,0,,0,0,0,5


In [10]:
london_merged_filt = utils.filter_by_year(london_merged, 2012)
london_minute_temp = resample.generate_minute_estimates(london_merged_filt)
london_minute_temp.to_csv('outputs/london_minute_temp.csv', index=False)
london_minute_temp.head()

Unnamed: 0,datetime,estimated_temp
0,2012-01-01 00:00:00,10.83
1,2012-01-01 00:01:00,10.828
2,2012-01-01 00:02:00,10.827
3,2012-01-01 00:03:00,10.825
4,2012-01-01 00:04:00,10.823
