In [179]:
# import libraries and tools
import pandas as pd
import numpy as np
import missingno as msno
import metpy.calc
from metpy.units import units
from datetime import datetime, date, time, timedelta

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

from sklearn.model_selection import train_test_split

from variable_names import vars_drop

import warnings
warnings.filterwarnings('ignore')

In [180]:
# get data and prepare it as in the main notebook
df = pd.read_csv('data/Train.csv')

df.drop(vars_drop,axis=1,inplace=True)

# calculate wind direction from components u and v
uwind = np.array(df['u_component_of_wind_10m_above_ground']) * units('m/s')
vwind = np.array(df['v_component_of_wind_10m_above_ground']) * units('m/s')
df['winddir'] = metpy.calc.wind_direction(uwind, vwind)
df['windspeed'] = metpy.calc.wind_speed(uwind, vwind)
df = df.drop('u_component_of_wind_10m_above_ground', axis=1)
df = df.drop('v_component_of_wind_10m_above_ground', axis=1)

# convert to pd datetime
df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d')

# create a column with the  weekday
df['weekday'] = df.Date.dt.dayofweek # Monday = 0

In [198]:
# create dataframe with features of the prior day assigned to the respective days
list_features_keep = ['Date', 'Place_ID', 'target'] #list of all features to keep in place in the new dataframe
list_features_drop = 'target'
list_features_drop2 = ['Date_prior', 'Place_ID_prior', 'deltatime']

In [None]:
# function_prior_day_features(df,features_to_transform (exclude date column, place column))
def function_prior_day_features(df, list_features_keep, list_features_drop, list_features_drop2):
    '''Function to create a dataframe with values of the prior day. Might result in missing rows (when no prior day is found)
    Args:   df DataFrame
            lists with features to keep or drop
    Returns: df'''

    # create new dataframe trunk
    df_prior = df[list_features_keep]

    # create new columns with suffix _prior in name
    df = df.drop(columns = list_features_drop, axis=1)
    df = df.add_suffix('_prior')

    # shift all variables to the next value
    df_shift = df.shift(periods=1)  

    # concat them with the new dataframe
    df_concat = pd.concat([df_prior, df_shift], axis=1)

    # compare the Place_ID and Date columns of the regular dataframe with the ones of the prior

    # create column with timedelta
    df_concat['deltatime'] = (df_concat['Date']-df_concat['Date'].shift()).fillna(pd.Timedelta('0 days'))

    # If timedelta is 1 day and they are the same location, then the values in the row are the ones of the day prior
    # Else: drop row, because they do not belong to the same location and no values for the prior day are available
    df_concat = df_concat.drop(df_concat[(df_concat.deltatime != '1 days')].index)
    df_concat = df_concat.drop(df_concat[(df_concat['Place_ID'] != df_concat['Place_ID_prior'])].index)

    #drop unnecessary columns
    df_concat = df_concat.drop(columns = list_features_drop2, axis=1)
    
    # return a new dataframe with the new features (concat)
    return df_concat