In [1]:
# Import packages
import numpy as np
import pandas as pd
import missingno as msno

import warnings
warnings.filterwarnings('ignore')

from datetime import datetime, date, time, timedelta, timezone
import dateutil.parser as parser


import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns

from matplotlib.ticker import PercentFormatter
plt.rcParams.update({ "figure.figsize" : (8, 5),"axes.facecolor" : "white", "axes.edgecolor":  "black"})
plt.rcParams["figure.facecolor"]= "w"
pd.plotting.register_matplotlib_converters()
pd.set_option('display.float_format', lambda x: '%.2f' % x) # change decimal places



In [2]:
# load dataframes
df_sugarbeet = pd.read_pickle('pickles/01_df_sugarbeet.pkl')
df_weatherstations = pd.read_pickle('pickles/01_df_openweather_2021.pkl')
df_locations = pd.read_pickle('pickles/01_df_locations.pkl')

### Functions

In [3]:
def column_rename(dataframe, old_colname, new_colname):
    ''' 
    rename a specific column in a dataframe. Enter the columnnames as strings.
    Arguments:
    dataframe: the dataframe with a column to be renamed
    old_colname: the column to be renamed
    new_colname: the name the column is supposed to have
    '''
    dataframe.rename(columns={old_colname: new_colname}, inplace=True)
    return dataframe

def drop_rows(dataframe, column, row_drop_list):
    ''' 
    Function to drop specific rows according to location. Prepare a list of row-conditions to be dropped beforehand.
    Arguments:
    dataframe: dataframe that needs to be modified
    columns: column which contains the instances to be dropped, enter as a string
    row_drop_list: list of instances to be dropped
    '''
    dataframe.drop(
        dataframe[
        dataframe[column]
        .isin(row_drop_list)]
        .index, 
        axis=0, 
        inplace=True)
    return dataframe

def drop_columns(dataframe, column_drop_list):
    ''' 
    Function to drop specific rows according to location. Prepare a list of row-conditions to be dropped beforehand.
    Arguments:
    dataframe: dataframe that needs to be modified
    columns: column which contains the instances to be dropped, enter as a string
    column_drop_list: list of instances to be dropped
    '''
    dataframe.drop(column_drop_list, axis=1, inplace=True)
    return dataframe

def merge_frames(dataframe_1, dataframe_2, merge_column, how='outer'):
    ''' 
    Function to merge two dataframes on a common column. Enter merge_column and how as a string.
    Arguments:
    dataframe_1: first dataframe to merge
    dataframe_2: second dataframe to merge
    merge_column: column on which to merge
    how: type of merge (outer, inner, left, etc.)
    Output:
    new dataframe called merge_frame
    '''
    merge_frame = dataframe_1.merge(dataframe_2,
                                  on=merge_column,
                                  how=how)
    return merge_frame

def make_datetime(dataframe, column):
    ''' 
    Function to turn object columns into datetime format. Enter column as a string.
    Arguments:
    dataframe: the dataframe containing the column to be turned into datetime format
    column: column to be changed to datetime format
    Output:
    dataframe containing column as new format
    '''
    dataframe[column] = pd.to_datetime(dataframe[column], yearfirst=True, format="%Y-%m-%d %H:%M:%S")
    return dataframe

def combine_datetime(dataframe, column_year, column_month, column_day, new_col):
    ''' 
    Function to compile separate year, month and day columns into one date column in datetime format. Enter columns as a string.
    Arguments:
    dataframe: the dataframe containing the column to be turned into datetime format
    column_year: column containing the year
    column_month: column containing the month
    column_day: column containing the day
    new_col: name of the new column
    Output:
    dataframe containing a new combined date column in the format year-month-day)
    '''
    dataframe[new_col] = pd.to_datetime(dict(year=dataframe[column_year],
                                       month=dataframe[column_month],
                                       day=dataframe[column_day]))
    return dataframe

def column_transform(dataframe, new_col, grouping, col_transform, how='mean'):
    ''' 
    
    '''
    count_new_col = 0
    count_old_col = 0
    for col in new_col:
        if count_new_col <= len(new_col):
            dataframe[new_col[count_new_col]] = dataframe.groupby(grouping)[col_transform[count_old_col]].transform(how)
            dataframe.drop([col_transform[count_old_col]], axis=1, inplace=True)
            count_new_col += 1
            count_old_col += 1
    dataframe.reset_index(inplace=True)
    dataframe.drop(['index'], axis=1, inplace=True)
    return dataframe

def pivot_frame(dataframe, index, column, values):
    '''
    Function to pivot a dataframe and flatten newly generated columns. Enter index and column as string.
    Arguments:
    dataframe = dataframe to pivot
    index = column on which the dataframe is supposed to be pivoted
    column = column or list of columns to be retained
    values = (previously defined) list of columns which are supposed to be sorted according to column entry
    Output:
    pivoted dataframe with flattened columns.
    '''
    dataframe = pd.pivot_table(dataframe, index=index, columns=[column], values=values)
    # flatten the multi-index columns
    dataframe.columns = ['_'.join(col) for col in dataframe.columns.values]
    # flatten all columns to one level
    dataframe.reset_index(inplace=True)
    return dataframe

In [4]:
# rename fieldid and city_name to station_location
column_rename(df_locations, 'fieldid', 'station_location')
column_rename(df_weatherstations, 'city_name', 'station_location')

Unnamed: 0,dt,dt_iso,timezone,station_location,lat,lon,temp,dew_point,feels_like,temp_min,...,clouds_all,weather_id,weather_main,weather_description,weather_icon,date,year,month,day,plotting_date
0,1262304000,2010-01-01 00:00:00 +0000 UTC,3600,Anklam,53.94,13.60,-0.89,-3.12,-4.97,-1.70,...,98,804,Clouds,overcast clouds,04n,2010-01-01 00:00:00,2010,1,1,1
1,1262307600,2010-01-01 01:00:00 +0000 UTC,3600,Anklam,53.94,13.60,-0.81,-3.04,-4.96,-1.40,...,98,804,Clouds,overcast clouds,04n,2010-01-01 01:00:00,2010,1,1,1
2,1262311200,2010-01-01 02:00:00 +0000 UTC,3600,Anklam,53.94,13.60,-0.70,-2.93,-4.83,-1.40,...,96,804,Clouds,overcast clouds,04n,2010-01-01 02:00:00,2010,1,1,1
3,1262314800,2010-01-01 03:00:00 +0000 UTC,3600,Anklam,53.94,13.60,-0.72,-2.95,-4.57,-1.08,...,96,804,Clouds,overcast clouds,04n,2010-01-01 03:00:00,2010,1,1,1
4,1262318400,2010-01-01 04:00:00 +0000 UTC,3600,Anklam,53.94,13.60,-0.73,-3.11,-4.68,-1.11,...,98,804,Clouds,overcast clouds,04n,2010-01-01 04:00:00,2010,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1795030,1664391600,2022-09-28 19:00:00 +0000 UTC,7200,VierhÃ¶fen,48.75,12.71,9.67,7.61,8.78,8.71,...,100,500,Rain,light rain,10n,2022-09-28 19:00:00,2022,9,28,271
1795031,1664395200,2022-09-28 20:00:00 +0000 UTC,7200,VierhÃ¶fen,48.75,12.71,9.31,7.26,7.45,7.90,...,100,501,Rain,moderate rain,10n,2022-09-28 20:00:00,2022,9,28,271
1795032,1664398800,2022-09-28 21:00:00 +0000 UTC,7200,VierhÃ¶fen,48.75,12.71,9.30,7.42,7.43,7.84,...,100,501,Rain,moderate rain,10n,2022-09-28 21:00:00,2022,9,28,271
1795033,1664402400,2022-09-28 22:00:00 +0000 UTC,7200,VierhÃ¶fen,48.75,12.71,9.04,7.16,7.12,7.84,...,100,501,Rain,moderate rain,10n,2022-09-28 22:00:00,2022,9,28,271


In [5]:
# drop Lamotte, Lelystad, Stadthagen
locationdroplist = ['Lamotte', 'Lelystad', 'Stadthagen'] 
drop_rows(df_weatherstations, 'station_location', locationdroplist)
drop_rows(df_sugarbeet, 'station_location', locationdroplist)
drop_rows(df_locations, 'station_location', locationdroplist)

Unnamed: 0,station_location,latitude,longitude,sowing_year,sowing_month,sowing_day,harvesting_year,harvesting_month,havesting_day
0,Herchsheim,49.64,9.93,2021,4,11,2021,10,13
1,Herchsheim_2,49.64,9.94,2021,4,9,2021,9,26
2,Vierhöfen,48.78,12.74,2021,3,31,2021,10,11
3,Söllingen,52.11,10.93,2021,4,24,2021,11,1
4,Berklingen,52.1,10.73,2021,4,1,2021,9,11
6,Emmeloord,52.7,5.71,2021,4,21,2021,9,17
8,Rittershausen,49.6,10.01,2021,4,14,2021,10,13
9,Sommepy,49.25,4.6,2021,3,25,2021,10,18
11,Mattenkofen,48.78,12.74,2021,3,30,2021,10,7
12,Pithiviers,48.17,2.33,2021,3,24,2021,9,30


In [6]:
# exclude jan, feb, mar, nov, dec
monthkeep = [4,  5,  6,  7,  8,  9, 10]
df_weatherstations = df_weatherstations[df_weatherstations.month.isin(monthkeep)]

In [7]:
# join them on station_location columns
df_weatherlocations = merge_frames(df_weatherstations, df_locations, 'station_location', 'outer')

In [8]:
# create sowing and harvesting date columns
combine_datetime(df_weatherlocations, 'sowing_year', 'sowing_month', 'sowing_day', 'sowing_date')
combine_datetime(df_weatherlocations, 'harvesting_year', 'harvesting_month', 'havesting_day', 'harvesting_date')

Unnamed: 0,dt,dt_iso,timezone,station_location,lat,lon,temp,dew_point,feels_like,temp_min,...,latitude,longitude,sowing_year,sowing_month,sowing_day,harvesting_year,harvesting_month,havesting_day,sowing_date,harvesting_date
0,1270080000.00,2010-04-01 00:00:00 +0000 UTC,7200.00,Anklam,53.94,13.60,5.80,2.62,3.34,5.56,...,53.94,13.60,2021.00,4.00,14.00,2021.00,11.00,1.00,2021-04-14,2021-11-01
1,1270083600.00,2010-04-01 01:00:00 +0000 UTC,7200.00,Anklam,53.94,13.60,5.39,2.22,2.77,5.15,...,53.94,13.60,2021.00,4.00,14.00,2021.00,11.00,1.00,2021-04-14,2021-11-01
2,1270087200.00,2010-04-01 02:00:00 +0000 UTC,7200.00,Anklam,53.94,13.60,4.91,1.75,2.03,4.69,...,53.94,13.60,2021.00,4.00,14.00,2021.00,11.00,1.00,2021-04-14,2021-11-01
3,1270090800.00,2010-04-01 03:00:00 +0000 UTC,7200.00,Anklam,53.94,13.60,4.54,1.57,1.53,4.27,...,53.94,13.60,2021.00,4.00,14.00,2021.00,11.00,1.00,2021-04-14,2021-11-01
4,1270094400.00,2010-04-01 04:00:00 +0000 UTC,7200.00,Anklam,53.94,13.60,4.10,1.48,0.94,3.82,...,53.94,13.60,2021.00,4.00,14.00,2021.00,11.00,1.00,2021-04-14,2021-11-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
857689,,,,Vierhöfen,,,,,,,...,48.78,12.74,2021.00,3.00,31.00,2021.00,10.00,11.00,2021-03-31,2021-10-11
857690,,,,Söllingen,,,,,,,...,52.11,10.93,2021.00,4.00,24.00,2021.00,11.00,1.00,2021-04-24,2021-11-01
857691,,,,Berklingen,,,,,,,...,52.10,10.73,2021.00,4.00,1.00,2021.00,9.00,11.00,2021-04-01,2021-09-11
857692,,,,Rittershausen,,,,,,,...,49.60,10.01,2021.00,4.00,14.00,2021.00,10.00,13.00,2021-04-14,2021-10-13


In [9]:
# drop unnecessary columns
dropcollist2 = ['sowing_year', 'sowing_month', 'sowing_day', 'harvesting_year', 'harvesting_month', 'havesting_day']
drop_columns(df_weatherlocations, dropcollist2)

Unnamed: 0,dt,dt_iso,timezone,station_location,lat,lon,temp,dew_point,feels_like,temp_min,...,weather_icon,date,year,month,day,plotting_date,latitude,longitude,sowing_date,harvesting_date
0,1270080000.00,2010-04-01 00:00:00 +0000 UTC,7200.00,Anklam,53.94,13.60,5.80,2.62,3.34,5.56,...,04n,2010-04-01 00:00:00,2010.00,4.00,1.00,91.00,53.94,13.60,2021-04-14,2021-11-01
1,1270083600.00,2010-04-01 01:00:00 +0000 UTC,7200.00,Anklam,53.94,13.60,5.39,2.22,2.77,5.15,...,03n,2010-04-01 01:00:00,2010.00,4.00,1.00,91.00,53.94,13.60,2021-04-14,2021-11-01
2,1270087200.00,2010-04-01 02:00:00 +0000 UTC,7200.00,Anklam,53.94,13.60,4.91,1.75,2.03,4.69,...,01n,2010-04-01 02:00:00,2010.00,4.00,1.00,91.00,53.94,13.60,2021-04-14,2021-11-01
3,1270090800.00,2010-04-01 03:00:00 +0000 UTC,7200.00,Anklam,53.94,13.60,4.54,1.57,1.53,4.27,...,01n,2010-04-01 03:00:00,2010.00,4.00,1.00,91.00,53.94,13.60,2021-04-14,2021-11-01
4,1270094400.00,2010-04-01 04:00:00 +0000 UTC,7200.00,Anklam,53.94,13.60,4.10,1.48,0.94,3.82,...,01n,2010-04-01 04:00:00,2010.00,4.00,1.00,91.00,53.94,13.60,2021-04-14,2021-11-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
857689,,,,Vierhöfen,,,,,,,...,,NaT,,,,,48.78,12.74,2021-03-31,2021-10-11
857690,,,,Söllingen,,,,,,,...,,NaT,,,,,52.11,10.93,2021-04-24,2021-11-01
857691,,,,Berklingen,,,,,,,...,,NaT,,,,,52.10,10.73,2021-04-01,2021-09-11
857692,,,,Rittershausen,,,,,,,...,,NaT,,,,,49.60,10.01,2021-04-14,2021-10-13


In [10]:
# check for missing values
df_weatherlocations.isnull().sum()

dt                          6
dt_iso                      6
timezone                    6
station_location            0
lat                         6
lon                         6
temp                        6
dew_point                   6
feels_like                  6
temp_min                    6
temp_max                    6
pressure                    6
humidity                    6
wind_speed                  6
wind_deg                    6
clouds_all                  6
weather_id                  6
weather_main                6
weather_description         6
weather_icon                6
date                        6
year                        6
month                       6
day                         6
plotting_date               6
latitude               197928
longitude              197928
sowing_date            197928
harvesting_date        197928
dtype: int64

In [11]:
# remove missing values according to the datetime columns
df_weatherlocations = df_weatherlocations.dropna(subset=['dt_iso'], axis=0)

In [12]:
# replace station locations according to discussions with the stakeholder
df_weatherlocations['station_location'] = df_weatherlocations['station_location'].replace(r'Hamm', r'Soest', regex=True)
df_weatherlocations['station_location'] = df_weatherlocations['station_location'].replace(r'Herchsheim_2', r'Herchsheim', regex=True)
df_weatherlocations['station_location'] = df_weatherlocations['station_location'].replace(r'Rittershausen', r'Herchsheim', regex=True)
df_weatherlocations['station_location'] = df_weatherlocations['station_location'].replace(r'Oberviehhausen', r'Mattenkofen', regex=True)
df_weatherlocations['station_location'] = df_weatherlocations['station_location'].replace(r'Vierhöfen', r'Mattenkofen', regex=True)

In [13]:
# create columns for the first and the last growth stage
df_weatherlocations['s1'] = 30
df_weatherlocations['s2'] = - 45

# to create a development category, use np.where. First: create conditions (time frames) and values (category names)
conditions = [(df_weatherlocations.date >= (df_weatherlocations.sowing_date)) & (df_weatherlocations.date <= (df_weatherlocations.sowing_date + pd.to_timedelta(df_weatherlocations.s1, unit='d'))),
              (df_weatherlocations.date > (df_weatherlocations.sowing_date + pd.to_timedelta(df_weatherlocations.s1, unit='d'))) & (df_weatherlocations.date <= (df_weatherlocations.harvesting_date + pd.to_timedelta(df_weatherlocations.s2, unit='d'))),
              (df_weatherlocations.date > (df_weatherlocations.sowing_date + pd.to_timedelta(df_weatherlocations.s2, unit='d'))) & (df_weatherlocations.date <= (df_weatherlocations.harvesting_date))]
              #(df_weatherlocations.date_time > (df_weatherlocations.sowing_date + pd.to_timedelta(df_weatherlocations.s3, unit='d'))) & (df_weatherlocations.date_time <= (df_weatherlocations.sowing_date + pd.to_timedelta(df_weatherlocations.s4, unit='d')))
            

values = [1,2,3]
df_weatherlocations['development_category'] = np.select(conditions, values)

In [14]:
# make lists based on which columns are transformed (mean or sum) according to specific grouping (in this case month and location)
# 
new_col = ['temp_monthly', 
           'temp_min_monthly', 
           'temp_max_monthly', 
            'dew_point_monthly', 
           'pressure_monthly', 
           'humidity_monthly', 
           'wind_speed_monthly',
           'wind_deg_monthly'
           ]
grouping = ['station_location', 'month']
col_transform = ['temp',
                 'temp_min',
                 'temp_max',
                 'dew_point',
                 'pressure',
                 'humidity',
                 'wind_speed',
                 'wind_deg'
                 ]
dropcollist3 = ['year', 
                'dt', 
                'day', 
                'dt_iso', 
                'timezone', 
                'feels_like',
                'weather_main', 
                'weather_description', 
                'weather_icon', 
                'weather_id',
                'clouds_all',
                's1', 
                's2',
                'plotting_date',
                'lat', 
                'lon', 
                'date'
                ]

In [15]:
# create monthly dataframe
df_weatherlocations_monthly = drop_columns(df_weatherlocations, dropcollist3)

In [16]:
# make monthly average out of columns in col_transform list
column_transform(df_weatherlocations_monthly, new_col, grouping, col_transform, how='mean')

Unnamed: 0,station_location,month,latitude,longitude,sowing_date,harvesting_date,development_category,temp_monthly,temp_min_monthly,temp_max_monthly,dew_point_monthly,pressure_monthly,humidity_monthly,wind_speed_monthly,wind_deg_monthly
0,Anklam,4.00,53.94,13.60,2021-04-14,2021-11-01,0,8.11,7.04,9.09,3.44,1015.48,74.09,3.65,186.75
1,Anklam,4.00,53.94,13.60,2021-04-14,2021-11-01,0,8.11,7.04,9.09,3.44,1015.48,74.09,3.65,186.75
2,Anklam,4.00,53.94,13.60,2021-04-14,2021-11-01,0,8.11,7.04,9.09,3.44,1015.48,74.09,3.65,186.75
3,Anklam,4.00,53.94,13.60,2021-04-14,2021-11-01,0,8.11,7.04,9.09,3.44,1015.48,74.09,3.65,186.75
4,Anklam,4.00,53.94,13.60,2021-04-14,2021-11-01,0,8.11,7.04,9.09,3.44,1015.48,74.09,3.65,186.75
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
857683,VierhÃ¶fen,9.00,,,NaT,NaT,0,14.99,14.17,15.60,9.60,1017.85,74.37,2.41,196.29
857684,VierhÃ¶fen,9.00,,,NaT,NaT,0,14.99,14.17,15.60,9.60,1017.85,74.37,2.41,196.29
857685,VierhÃ¶fen,9.00,,,NaT,NaT,0,14.99,14.17,15.60,9.60,1017.85,74.37,2.41,196.29
857686,VierhÃ¶fen,9.00,,,NaT,NaT,0,14.99,14.17,15.60,9.60,1017.85,74.37,2.41,196.29


## PIVOT

In [17]:
# create list of columns to include in pivot table
pivotvaluemonthlylist = ['latitude', 
                         'longitude', 
                         'sowing_date', 
                         'harvesting_date',
                         'development_category', 
                         'temp_monthly', 
                         'temp_min_monthly',
                         'temp_max_monthly', 
                         'dew_point_monthly', 
                         'pressure_monthly',
                         'humidity_monthly', 
                         'wind_speed_monthly', 
                         'wind_deg_monthly']


In [18]:
# make column name to str to facilitate flattening in the pivot dataframe later
df_weatherlocations_monthly.month = df_weatherlocations_monthly.month.astype(str)

In [19]:
df_weatherlocations_monthlypiv = pivot_frame(df_weatherlocations_monthly, 'station_location', 'month', pivotvaluemonthlylist)
df_weatherlocations_monthlypiv

Unnamed: 0,station_location,development_category_10.0,development_category_4.0,development_category_5.0,development_category_6.0,development_category_7.0,development_category_8.0,development_category_9.0,dew_point_monthly_10.0,dew_point_monthly_4.0,...,wind_deg_monthly_7.0,wind_deg_monthly_8.0,wind_deg_monthly_9.0,wind_speed_monthly_10.0,wind_speed_monthly_4.0,wind_speed_monthly_5.0,wind_speed_monthly_6.0,wind_speed_monthly_7.0,wind_speed_monthly_8.0,wind_speed_monthly_9.0
0,Anklam,0.25,0.14,0.12,0.15,0.15,0.15,0.19,7.81,3.44,...,208.02,196.87,201.52,3.73,3.65,3.45,3.16,3.27,3.11,3.42
1,Bautzen,0.16,0.21,0.09,0.15,0.15,0.15,0.22,7.53,4.14,...,222.35,204.85,209.46,3.43,3.22,3.07,2.77,2.75,2.54,2.83
2,Emmeloord,0.0,0.18,0.1,0.15,0.15,0.23,0.12,8.81,4.67,...,208.36,194.73,192.99,3.76,3.99,3.75,3.33,3.04,3.08,3.16
3,Goderville,0.25,0.08,0.15,0.15,0.15,0.15,0.17,10.07,6.1,...,205.9,210.39,195.64,5.16,4.43,4.45,4.21,4.12,4.13,4.33
4,Herchsheim,0.1,0.13,0.13,0.15,0.15,0.16,0.23,7.7,4.05,...,200.36,193.42,182.93,2.92,2.87,2.8,2.52,2.51,2.49,2.54
5,Mattenkofen,0.05,0.1,0.14,0.15,0.15,0.18,0.23,5.98,4.08,...,211.68,194.25,194.42,2.47,2.7,2.63,2.34,2.3,2.14,2.29
6,Pithiviers,0.0,0.1,0.15,0.15,0.15,0.19,0.22,9.61,5.71,...,189.67,194.48,177.02,3.57,3.47,3.45,3.27,3.2,3.05,3.14
7,Soest,0.25,0.16,0.11,0.15,0.15,0.15,0.19,8.58,4.65,...,210.1,198.07,195.71,3.46,2.93,2.9,2.76,2.65,2.79,2.86
8,Sommepy1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.85,4.72,...,191.31,197.19,176.13,3.54,3.32,3.26,3.1,3.04,2.91,3.04
9,Sommepy2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.73,4.67,...,193.49,197.69,179.24,3.67,3.4,3.33,3.17,3.1,3.0,3.13


In [20]:
# merge sugar beet dataframe with the pivoted monthly weather info
df_merge_weatherloc_monthly = merge_frames(df_weatherlocations_monthlypiv, df_sugarbeet, 'station_location')
print(f'the sugarbeet monthly dataframe has {df_merge_weatherloc_monthly.shape[0]} rows and {df_merge_weatherloc_monthly.shape[1]} columns')

the sugarbeet monthly dataframe has 12905 rows and 96 columns


In [21]:
df_merge_weatherloc_monthly.isnull().sum()

station_location                0
development_category_10.0    1735
development_category_4.0     1735
development_category_5.0     1735
development_category_6.0     1735
                             ... 
seednames_coded                 3
seriesid                        3
totaln_nir                      3
x                               3
y                               3
Length: 96, dtype: int64

In [22]:
df_merge_weatherloc_monthly = df_merge_weatherloc_monthly.dropna()

In [23]:
df_merge_weatherloc_monthly.to_pickle('pickles/01_df_merge_openweatherloc_monthly.pkl')