In [1]:
# change the working directory to point to the root directory
import os

os.chdir("../")

In [2]:
# imports
import numpy as np
import pandas as pd

In [3]:
# imports for plots
from plotly import graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.io as pio

pio.templates.default = "plotly_white"

In [4]:
# load weather region
data_region = pd.read_csv("raw_data/weather_regions/21.79102828115297;-14.47179828879979;Western Sahara;Western Sahara.csv")

In [5]:
data_region

Unnamed: 0.1,Unnamed: 0,DATE,ALLSKY_SFC_SW_DWN,PRECTOT,RH2M,T2M,T2MDEW,T2M_MAX,T2M_MIN,WS2M,crop_year,day,GDD,cumulative_GDD,cumulative_PRECTOT,cumulative_WS2M,cumulative_RH2M
0,287,1981-10-15,-99.00,0.0,20.30,28.65,3.23,37.51,21.13,3.13,1982,1,29.320,29.320,0.00,3.13,20.30
1,288,1981-10-16,-99.00,0.0,19.48,29.49,3.63,36.50,23.71,1.29,1982,2,30.105,59.425,0.00,4.42,39.78
2,289,1981-10-17,-99.00,0.0,30.99,27.88,8.44,35.51,22.04,3.39,1982,3,28.775,88.200,0.00,7.81,70.77
3,290,1981-10-18,-99.00,0.0,21.02,29.17,4.37,37.65,22.10,3.43,1982,4,29.875,118.075,0.00,11.24,91.79
4,291,1981-10-19,-99.00,0.0,20.43,28.94,3.57,37.74,21.56,3.93,1982,5,29.650,147.725,0.00,15.17,112.22
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10486,14431,2020-07-06,20.71,0.0,25.06,35.86,12.78,44.09,27.27,3.84,2020,265,35.680,6690.165,12.72,1113.55,6429.23
10487,14432,2020-07-07,20.06,0.0,28.23,35.61,14.38,44.20,28.02,5.36,2020,266,36.110,6726.275,12.72,1118.91,6457.46
10488,14433,2020-07-08,20.45,0.0,19.99,36.47,9.90,47.67,26.15,5.99,2020,267,36.910,6763.185,12.72,1124.90,6477.45
10489,14434,2020-07-09,20.93,0.0,11.09,37.89,2.33,46.02,30.35,6.87,2020,268,38.185,6801.370,12.72,1131.77,6488.54


In [6]:
# drop unused cols
data_region = data_region.drop(
    labels=["Unnamed: 0", "ALLSKY_SFC_SW_DWN", "T2M", "T2MDEW", "T2M_MAX", "T2M_MIN"], 
    axis=1)


In [7]:
# add columns
region_info = dict(
    lat=21.79102828115297,
    lon=-14.47179828879979,
    region="Western Sahara",
    name="Western Sahara"
)

for key, val in region_info.items():
    data_region[key] = [val for i in range(len(data_region))]

In [8]:
data_region

Unnamed: 0,DATE,PRECTOT,RH2M,WS2M,crop_year,day,GDD,cumulative_GDD,cumulative_PRECTOT,cumulative_WS2M,cumulative_RH2M,lat,lon,region,name
0,1981-10-15,0.0,20.30,3.13,1982,1,29.320,29.320,0.00,3.13,20.30,21.791028,-14.471798,Western Sahara,Western Sahara
1,1981-10-16,0.0,19.48,1.29,1982,2,30.105,59.425,0.00,4.42,39.78,21.791028,-14.471798,Western Sahara,Western Sahara
2,1981-10-17,0.0,30.99,3.39,1982,3,28.775,88.200,0.00,7.81,70.77,21.791028,-14.471798,Western Sahara,Western Sahara
3,1981-10-18,0.0,21.02,3.43,1982,4,29.875,118.075,0.00,11.24,91.79,21.791028,-14.471798,Western Sahara,Western Sahara
4,1981-10-19,0.0,20.43,3.93,1982,5,29.650,147.725,0.00,15.17,112.22,21.791028,-14.471798,Western Sahara,Western Sahara
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10486,2020-07-06,0.0,25.06,3.84,2020,265,35.680,6690.165,12.72,1113.55,6429.23,21.791028,-14.471798,Western Sahara,Western Sahara
10487,2020-07-07,0.0,28.23,5.36,2020,266,36.110,6726.275,12.72,1118.91,6457.46,21.791028,-14.471798,Western Sahara,Western Sahara
10488,2020-07-08,0.0,19.99,5.99,2020,267,36.910,6763.185,12.72,1124.90,6477.45,21.791028,-14.471798,Western Sahara,Western Sahara
10489,2020-07-09,0.0,11.09,6.87,2020,268,38.185,6801.370,12.72,1131.77,6488.54,21.791028,-14.471798,Western Sahara,Western Sahara


In [9]:
# main function
from tqdm import tqdm

# where to stack data frames
big_data = pd.DataFrame()

dir_name = "raw_data/weather_regions/"
for file in tqdm(os.listdir(dir_name)):
    lat, lon, region, name = file[:-4].split(";")

    region_info = {
        "lat": lat,
        "lon": lon,
        "region": region,
        "name": name
    }

    # load weather data
    data_region = pd.read_csv(dir_name + file)

    # drop unused columns
    data_region.drop(
        labels=["Unnamed: 0", "ALLSKY_SFC_SW_DWN", "T2M", "T2MDEW", "T2M_MAX", "T2M_MIN"],
        axis=1,
        inplace=True)

    # construct col of region info
    for info_name, info in region_info.items():
        data_region[info_name] = [info for i in data_region.index]

    # stack the data
    big_data = big_data.append(data_region, ignore_index=True)
    

100%|██████████| 705/705 [16:48<00:00,  1.43s/it]


In [15]:
# convert lat and lon to float
big_data["lat"] = big_data["lat"].apply(lambda s: float(s))
big_data["lon"] = big_data["lon"].apply(lambda s: float(s))

In [22]:
# save big data
big_data.to_csv("raw_data/all_weather_regions.csv", index=False)