In [1]:
import sqlalchemy as sq
import geopandas as gpd  # type: ignore
import pandas as pd
import numpy as np
from dotenv import load_dotenv
import os, sys, calendar
from datetime import datetime as dt

sys.path.append("../")
from Shared.DataService import DataService


import os
os.environ['USE_PYGEOS'] = '0'
import geopandas

In the next release, GeoPandas will switch to using Shapely by default, even if PyGEOS is installed. If you only have PyGEOS installed to get speed-ups, this switch should be smooth. However, if you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://shapely.readthedocs.io/en/latest/migration_pygeos.html).
  import geopandas as gpd  # type: ignore


In [2]:
load_dotenv()
PG_DB = os.getenv("POSTGRES_DB")
PG_ADDR = os.getenv("POSTGRES_ADDR")
PG_PORT = os.getenv("POSTGRES_PORT")
PG_USER = os.getenv("POSTGRES_USER")
PG_PW = os.getenv("POSTGRES_PW")

In [None]:
def getConn():
    if (
        PG_DB is None
        or PG_ADDR is None
        or PG_PORT is None
        or PG_USER is None
        or PG_PW is None
    ):
        raise Exception("Missing required env var(s)")
    db = DataService(PG_DB, PG_ADDR, int(PG_PORT), PG_USER, PG_PW)
    
    return db.connect()

In [3]:
def pullWeatherStationData(conn: sq.engine.Connection) -> pd.DataFrame:
    # pulling weather station data from the database
    weatherDataQuery = sq.text(
        """
        SELECT * FROM public.agg_weather_combined
        """
    )

    return pd.read_sql(weatherDataQuery, conn)

In [4]:
def pullWeatherCopernicusData(conn: sq.engine.Connection) -> pd.DataFrame:
    # pulling weather station data from the database
    weatherDataQuery = sq.text(
        """
        SELECT * FROM public.agg_day_copernicus_satellite_data
        """
    )

    return pd.read_sql(weatherDataQuery, conn)

In [5]:
def pullSoilMoistureData(conn: sq.engine.Connection) -> pd.DataFrame:
    # pulling weather station data from the database
    weatherDataQuery = sq.text(
        """
        SELECT year, month, day, district, 
        soil_moisture_min, soil_moisture_max, soil_moisture_mean
        FROM public.agg_soil_moisture
        """
    )

    return pd.read_sql(weatherDataQuery, conn)

In [6]:
def pullAggErgotData(conn: sq.engine.Connection) -> pd.DataFrame:
    # pulling weather station data from the database
    weatherDataQuery = sq.text(
        """
        SELECT year, district, 
            present_prev1, 
            present_prev2, 
            present_prev3, 
            sum_severity_prev1, 
            sum_severity_prev2, 
            sum_severity_prev3, 
            percnt_true_prev1,
            percnt_true_prev2,
            percnt_true_prev3,
            median_prev1,
            median_prev2,
            median_prev3
        FROM public.agg_ergot_sample_v2
        """
    )

    return pd.read_sql(weatherDataQuery, conn)

In [7]:
def pullIndividualErgotSampleData(conn: sq.engine.Connection) -> pd.DataFrame:
    # pulling weather station data from the database
    weatherDataQuery = sq.text(
        """
        SELECT * FROM public.ergot_sample_feat_eng
        """
    )

    return pd.read_sql(weatherDataQuery, conn)

In [8]:
def addErgotData(df: pd.DataFrame) -> pd.DataFrame:
    conn = getConn()
    aggErgotDf = pullAggErgotData(conn)
    ergotDf = pullIndividualErgotSampleData(conn)

    # right join on year, district
    mergedDf = pd.merge(df, aggErgotDf, on=["year", "district"], how="right")
    mergedDf = pd.merge(mergedDf, ergotDf, on=["year", "district"], how="left")
    conn.close()
    return mergedDf

In [9]:
def getDailySatSoil() -> pd.DataFrame:
    conn = getConn()

    satelliteDf = pullWeatherCopernicusData(conn)
    soilDf = pullSoilMoistureData(conn)

    # merge on year, month, day, district
    mergedDf = pd.merge(satelliteDf, soilDf, on=["year", "month", "day", "district"])

    mergedDf["date"] = pd.to_datetime(mergedDf[["year", "month", "day"]])
    # add day of year column
    mergedDf["day_of_year"] = mergedDf["date"].dt.dayofyear

    mergedDf = mergedDf.drop(columns=["date"])

    conn.close()
    return mergedDf

In [10]:
def getWeeklySatSoil(dailyDf: pd.DataFrame) -> pd.DataFrame:
    dailyDf["date"] = pd.to_datetime(dailyDf[["year", "month", "day"]])
    # add a week of year column
    dailyDf["week_of_year"] = dailyDf["date"].dt.isocalendar().week
    dailyDf = dailyDf.drop(columns=["date"])

    # aggregate by week of year year and district
    weeklyDf = dailyDf.groupby(["year", "month", "week_of_year", "district"]).agg(
        {
            "min_dewpoint_temperature": "min",
            "min_temperature": "min",
            "min_evaporation_from_bare_soil": "min",
            "min_skin_reservoir_content": "min",
            "min_skin_temperature": "min",
            "min_snowmelt": "min",
            "min_soil_temperature_level_1": "min",
            "min_soil_temperature_level_2": "min",
            "min_soil_temperature_level_3": "min",
            "min_soil_temperature_level_4": "min",
            "min_surface_net_solar_radiation": "min",
            "min_surface_pressure": "min",
            "min_volumetric_soil_water_layer_1": "min",
            "min_volumetric_soil_water_layer_2": "min",
            "min_volumetric_soil_water_layer_3": "min",
            "min_volumetric_soil_water_layer_4": "min",
            "min_leaf_area_index_high_vegetation": "min",
            "min_leaf_area_index_low_vegetation": "min",
            "soil_moisture_min": "min",
            "max_dewpoint_temperature": "max",
            "max_temperature": "max",
            "max_evaporation_from_bare_soil": "max",
            "max_skin_reservoir_content": "max",
            "max_skin_temperature": "max",
            "max_snowmelt": "max",
            "max_soil_temperature_level_1": "max",
            "max_soil_temperature_level_2": "max",
            "max_soil_temperature_level_3": "max",
            "max_soil_temperature_level_4": "max",
            "max_surface_net_solar_radiation": "max",
            "max_surface_pressure": "max",
            "max_volumetric_soil_water_layer_1": "max",
            "max_volumetric_soil_water_layer_2": "max",
            "max_volumetric_soil_water_layer_3": "max",
            "max_volumetric_soil_water_layer_4": "max",
            "max_leaf_area_index_high_vegetation": "max",
            "max_leaf_area_index_low_vegetation": "max",
            "soil_moisture_max": "max",
            "mean_dewpoint_temperature": "mean",
            "mean_temperature": "mean",
            "mean_evaporation_from_bare_soil": "mean",
            "mean_skin_reservoir_content": "mean",
            "mean_skin_temperature": "mean",
            "mean_snowmelt": "mean",
            "mean_soil_temperature_level_1": "mean",
            "mean_soil_temperature_level_2": "mean",
            "mean_soil_temperature_level_3": "mean",
            "mean_soil_temperature_level_4": "mean",
            "mean_surface_net_solar_radiation": "mean",
            "mean_surface_pressure": "mean",
            "mean_volumetric_soil_water_layer_1": "mean",
            "mean_volumetric_soil_water_layer_2": "mean",
            "mean_volumetric_soil_water_layer_3": "mean",
            "mean_volumetric_soil_water_layer_4": "mean",
            "mean_leaf_area_index_high_vegetation": "mean",
            "mean_leaf_area_index_low_vegetation": "mean",
            "soil_moisture_mean": "mean",
        }
    )

    return weeklyDf

In [11]:
def getMonthlySatSoil(dailyDf: pd.DataFrame) -> pd.DataFrame:
    monthlyDf = dailyDf.groupby(["year", "month", "district"]).agg(
        {
            "min_dewpoint_temperature": "min",
            "min_temperature": "min",
            "min_evaporation_from_bare_soil": "min",
            "min_skin_reservoir_content": "min",
            "min_skin_temperature": "min",
            "min_snowmelt": "min",
            "min_soil_temperature_level_1": "min",
            "min_soil_temperature_level_2": "min",
            "min_soil_temperature_level_3": "min",
            "min_soil_temperature_level_4": "min",
            "min_surface_net_solar_radiation": "min",
            "min_surface_pressure": "min",
            "min_volumetric_soil_water_layer_1": "min",
            "min_volumetric_soil_water_layer_2": "min",
            "min_volumetric_soil_water_layer_3": "min",
            "min_volumetric_soil_water_layer_4": "min",
            "min_leaf_area_index_high_vegetation": "min",
            "min_leaf_area_index_low_vegetation": "min",
            "soil_moisture_min": "min",
            "max_dewpoint_temperature": "max",
            "max_temperature": "max",
            "max_evaporation_from_bare_soil": "max",
            "max_skin_reservoir_content": "max",
            "max_skin_temperature": "max",
            "max_snowmelt": "max",
            "max_soil_temperature_level_1": "max",
            "max_soil_temperature_level_2": "max",
            "max_soil_temperature_level_3": "max",
            "max_soil_temperature_level_4": "max",
            "max_surface_net_solar_radiation": "max",
            "max_surface_pressure": "max",
            "max_volumetric_soil_water_layer_1": "max",
            "max_volumetric_soil_water_layer_2": "max",
            "max_volumetric_soil_water_layer_3": "max",
            "max_volumetric_soil_water_layer_4": "max",
            "max_leaf_area_index_high_vegetation": "max",
            "max_leaf_area_index_low_vegetation": "max",
            "soil_moisture_max": "max",
            "mean_dewpoint_temperature": "mean",
            "mean_temperature": "mean",
            "mean_evaporation_from_bare_soil": "mean",
            "mean_skin_reservoir_content": "mean",
            "mean_skin_temperature": "mean",
            "mean_snowmelt": "mean",
            "mean_soil_temperature_level_1": "mean",
            "mean_soil_temperature_level_2": "mean",
            "mean_soil_temperature_level_3": "mean",
            "mean_soil_temperature_level_4": "mean",
            "mean_surface_net_solar_radiation": "mean",
            "mean_surface_pressure": "mean",
            "mean_volumetric_soil_water_layer_1": "mean",
            "mean_volumetric_soil_water_layer_2": "mean",
            "mean_volumetric_soil_water_layer_3": "mean",
            "mean_volumetric_soil_water_layer_4": "mean",
            "mean_leaf_area_index_high_vegetation": "mean",
            "mean_leaf_area_index_low_vegetation": "mean",
            "soil_moisture_mean": "mean",
        }
    )

    return monthlyDf

In [12]:
def getDailyStationSoil() -> pd.DataFrame:
    conn = getConn()

    stationDf = pullWeatherStationData(conn)
    soilDf = pullSoilMoistureData(conn)

    # merge on year, month, day, district
    mergedDf = pd.merge(stationDf, soilDf, on=["year", "month", "day", "district"])

    mergedDf["date"] = pd.to_datetime(mergedDf[["year", "month", "day"]])
    # add day of year column
    mergedDf["day_of_year"] = mergedDf["date"].dt.dayofyear

    mergedDf = mergedDf.drop(columns=["date"])

    conn.close()
    return mergedDf

In [13]:
def getWeeklyStationSoil(dailyDf: pd.DataFrame) -> pd.DataFrame:
    dailyDf["date"] = pd.to_datetime(dailyDf[["year", "month", "day"]])
    # add a week of year column
    dailyDf["week_of_year"] = dailyDf["date"].dt.isocalendar().week
    dailyDf = dailyDf.drop(columns=["date"])

    # aggregate by week of year year and district
    weeklyDf = dailyDf.groupby(["year", "month", "week_of_year", "district"]).agg(
        {
            "min_temp_x": "min",
            "max_temp_x": "max",
            "mean_temp_x": "mean",
            "min_dew_point_temp": "min",
            "max_dew_point_temp": "max",
            "mean_dew_point_temp": "mean",
            "min_humidex": "min",
            "max_humidex": "max",
            "mean_humidex": "mean",
            "min_precip": "min",
            "max_precip": "max",
            "mean_precip": "mean",
            "min_rel_humid": "min",
            "max_rel_humid": "max",
            "mean_rel_humid": "mean",
            "min_stn_press": "min",
            "max_stn_press": "max",
            "mean_stn_press": "mean",
            "min_visibility": "min",
            "max_visibility": "max",
            "mean_visibility": "mean",
            "max_temp_y": "max",
            "min_temp_y": "min",
            "mean_temp_y": "mean",
            "min_total_rain": "min",
            "max_total_rain": "max",
            "mean_total_rain": "mean",
            "min_total_snow": "min",
            "max_total_snow": "max",
            "mean_total_snow": "mean",
            "min_total_precip": "min",
            "max_total_precip": "max",
            "mean_total_precip": "mean",
            "min_snow_on_grnd": "min",
            "max_snow_on_grnd": "max",
            "mean_snow_on_grnd": "mean",
            "soil_moisture_min": "min",
            "soil_moisture_max": "max",
            "soil_moisture_mean": "mean",
        }
    )
    return weeklyDf

In [14]:
def getMonthlyStationSoil(dailyDf: pd.DataFrame) -> pd.DataFrame:
    monthlyDf = dailyDf.groupby(["year", "month", "district"]).agg(
        {
            "min_temp_x": "min",
            "max_temp_x": "max",
            "mean_temp_x": "mean",
            "min_dew_point_temp": "min",
            "max_dew_point_temp": "max",
            "mean_dew_point_temp": "mean",
            "min_humidex": "min",
            "max_humidex": "max",
            "mean_humidex": "mean",
            "min_precip": "min",
            "max_precip": "max",
            "mean_precip": "mean",
            "min_rel_humid": "min",
            "max_rel_humid": "max",
            "mean_rel_humid": "mean",
            "min_stn_press": "min",
            "max_stn_press": "max",
            "mean_stn_press": "mean",
            "min_visibility": "min",
            "max_visibility": "max",
            "mean_visibility": "mean",
            "max_temp_y": "max",
            "min_temp_y": "min",
            "mean_temp_y": "mean",
            "min_total_rain": "min",
            "max_total_rain": "max",
            "mean_total_rain": "mean",
            "min_total_snow": "min",
            "max_total_snow": "max",
            "mean_total_snow": "mean",
            "min_total_precip": "min",
            "max_total_precip": "max",
            "mean_total_precip": "mean",
            "min_snow_on_grnd": "min",
            "max_snow_on_grnd": "max",
            "mean_snow_on_grnd": "mean",
            "soil_moisture_min": "min",
            "soil_moisture_max": "max",
            "soil_moisture_mean": "mean",
        }
    )
    return monthlyDf

In [15]:
def generateSatelliteWeatherDatasetCSVs():
    TABLEDAILY = "dataset_daily_sat_soil_ergot"
    TABLEWEEKLY = "dataset_weekly_sat_soil_ergot"
    TABLEMONTHLY = "dataset_monthly_sat_soil_ergot"
    conn = getConn()

    dailyDf = getDailySatSoil()
    dailyDf = addErgotData(dailyDf)
    # dailyDf.to_sql(TABLEDAILY, conn, schema="public", if_exists="replace", index=False)
    dailyDf.to_csv("data/daily_sat_soil_ergot.csv", index=False)

    dailyDf = getDailySatSoil()
    weeklyDf = getWeeklySatSoil(dailyDf)
    weeklyDf = addErgotData(weeklyDf)
    # weeklyDf.to_sql(TABLEWEEKLY, conn, schema="public", if_exists="replace", index=False)
    weeklyDf.to_csv("data/weekly_sat_soil_ergot.csv", index=False)

    dailyDf = getDailySatSoil()
    monthlyDf = getMonthlySatSoil(dailyDf)
    monthlyDf = addErgotData(monthlyDf)
    # monthlyDf.to_sql(TABLEMONTHLY, conn, schema="public", if_exists="replace", index=False)
    monthlyDf.to_csv("data/monthly_sat_soil_ergot.csv", index=False)

    conn.close()

In [16]:
def generateStationWeatherDatasetCSVs():
    TABLEDAILY = "dataset_daily_station_soil_ergot"
    TABLEWEEKLY = "dataset_weekly_station_soil_ergot"
    TABLEMONTHLY = "dataset_monthly_station_soil_ergot"
    conn = getConn()

    dailyDf = getDailyStationSoil()
    dailyDf = addErgotData(dailyDf)
    # dailyDf.to_sql(TABLEDAILY, conn, schema="public", if_exists="replace", index=False)
    dailyDf.to_csv("data/daily_station_ergot.csv", index=False)

    dailyDf = getDailyStationSoil()
    weeklyDf = getWeeklyStationSoil(dailyDf)
    weeklyDf = addErgotData(weeklyDf)
    # weeklyDf.to_sql(TABLEWEEKLY, conn, schema="public", if_exists="replace", index=False)
    weeklyDf.to_csv("data/weekly_station_ergot.csv", index=False)

    dailyDf = getDailyStationSoil()
    monthlyDf = getMonthlyStationSoil(dailyDf)
    monthlyDf = addErgotData(monthlyDf)
    # monthlyDf.to_sql(TABLEMONTHLY, conn, schema="public", if_exists="replace", index=False)
    monthlyDf.to_csv("data/monthly_station_ergot.csv", index=False)

    conn.close()

In [17]:
def pushCsvToDB(filename: str, tablename: str) -> None:
    conn = getConn()

    for df in pd.read_csv(filename, chunksize=1000):
        df.to_sql(tablename, conn, schema="public", if_exists="append", index=False)

    conn.close()

In [18]:
def pushCSV():
    # pushCsvToDB("data/daily_sat_soil_ergot.csv", "dataset_daily_sat_soil_ergot")
    pushCsvToDB("data/weekly_sat_soil_ergot.csv", "dataset_weekly_sat_soil_ergot")
    pushCsvToDB("data/monthly_sat_soil_ergot.csv", "dataset_monthly_sat_soil_ergot")
    # pushCsvToDB("data/daily_station_ergot.csv", "dataset_daily_station_soil_ergot")
    pushCsvToDB("data/weekly_station_ergot.csv", "dataset_weekly_station_soil_ergot")
    pushCsvToDB("data/monthly_station_ergot.csv", "dataset_monthly_station_soil_ergot")

In [19]:
def pushChunkwise(df: pd.DataFrame, tablename: str) -> None:
    conn = getConn()
    
    df.to_sql(tablename, conn, schema="public", if_exists="append", index=False, chunksize=1000)

    conn.close()

In [23]:
def generateNoErgotTables():
    TABLESATDAILY = "dataset_daily_sat_soil"
    TABLESATWEEKLY = "dataset_weekly_sat_soil"
    TABLESATMONTHLY = "dataset_monthly_sat_soil"
    TABLESTATIONDAILY = "dataset_daily_station_soil"
    TABLESTATIONWEEKLY = "dataset_weekly_station_soil"
    TABLESTATIONMONTHLY = "dataset_monthly_station_soil"
    TABLESATDAILYNOSOILM = "dataset_daily_sat"
    TABLESATWEEKLYNOSOILM = "dataset_weekly_sat"
    TABLESATMONTHLYNOSOILM = "dataset_monthly_sat"
    TABLESTATIONDAILYNOSOILM = "dataset_daily_station"
    TABLESTATIONWEEKLYNOSOILM = "dataset_weekly_station"
    TABLESTATIONMONTHLYNOSOILM = "dataset_monthly_station"

    # dailyDf = getDailySatSoil()
    # pushChunkwise(dailyDf, TABLESATDAILY)

    # dailyDf = getDailyStationSoil()
    # pushChunkwise(dailyDf, TABLESTATIONDAILY)

    dailyDf = getDailySatSoil()
    weeklyDf = getWeeklySatSoil(dailyDf)
    pushChunkwise(weeklyDf, TABLESATWEEKLY)

    dailyDf = getDailyStationSoil()
    weeklyDf = getWeeklyStationSoil(dailyDf)
    pushChunkwise(weeklyDf, TABLESTATIONWEEKLY)

    dailyDf = getDailySatSoil()
    monthlyDf = getMonthlySatSoil(dailyDf)
    pushChunkwise(monthlyDf, TABLESATMONTHLY)

    dailyDf = getDailyStationSoil()
    monthlyDf = getMonthlyStationSoil(dailyDf)
    pushChunkwise(monthlyDf, TABLESTATIONMONTHLY)

    dailyDf = getDailySatSoil()
    dailyDf = dailyDf.drop(columns=["soil_moisture_min", "soil_moisture_max", "soil_moisture_mean"])
    pushChunkwise(dailyDf, TABLESATDAILYNOSOILM)

    dailyDf = getDailyStationSoil()
    dailyDf = dailyDf.drop(columns=["soil_moisture_min", "soil_moisture_max", "soil_moisture_mean"])
    pushChunkwise(dailyDf, TABLESTATIONDAILYNOSOILM)

    dailyDf = getDailySatSoil()
    weeklyDf = getWeeklySatSoil(dailyDf)
    weeklyDf = weeklyDf.drop(columns=["soil_moisture_min", "soil_moisture_max", "soil_moisture_mean"])
    pushChunkwise(weeklyDf, TABLESATWEEKLYNOSOILM)

    dailyDf = getDailyStationSoil()
    weeklyDf = getWeeklyStationSoil(dailyDf)
    weeklyDf = weeklyDf.drop(columns=["soil_moisture_min", "soil_moisture_max", "soil_moisture_mean"])
    pushChunkwise(weeklyDf, TABLESTATIONWEEKLYNOSOILM)

    dailyDf = getDailySatSoil()
    monthlyDf = getMonthlySatSoil(dailyDf)
    monthlyDf = monthlyDf.drop(columns=["soil_moisture_min", "soil_moisture_max", "soil_moisture_mean"])
    pushChunkwise(monthlyDf, TABLESATMONTHLYNOSOILM)

    dailyDf = getDailyStationSoil()
    monthlyDf = getMonthlyStationSoil(dailyDf)
    monthlyDf = monthlyDf.drop(columns=["soil_moisture_min", "soil_moisture_max", "soil_moisture_mean"])
    pushChunkwise(monthlyDf, TABLESTATIONMONTHLYNOSOILM)




In [None]:
def getDailySat() -> pd.DataFrame:
    conn = getConn()

    query = sq.text(
        """
        SELECT * FROM public.dataset_daily_sat
        """
    )

    df = pd.read_sql(query, conn)

    conn.close()
    return df

In [None]:
def getDailyStation() -> pd.DataFrame:
    conn = getConn()

    query = sq.text(
        """
        SELECT * FROM public.dataset_daily_station
        """
    )

    df = pd.read_sql(query, conn)

    conn.close()
    return df

In [None]:
def getWeeklySat() -> pd.DataFrame:
    conn = getConn()
    
    query = sq.text(
        """
        SELECT * FROM public.dataset_weekly_sat
        """
    )

    df = pd.read_sql(query, conn)
    conn.close()
    
    return df

In [None]:
def getWeeklyStation() -> pd.DataFrame:
    conn = getConn()
    
    query = sq.text(
        """
        SELECT * FROM public.dataset_weekly_station
        """
    )

    df = pd.read_sql(query, conn)
    conn.close()
    
    return df

In [None]:
def getMonthlySat() -> pd.DataFrame:
    conn = getConn()
    
    query = sq.text(
        """
        SELECT * FROM public.dataset_monthly_sat
        """
    )

    df = pd.read_sql(query, conn)
    conn.close()
    
    return df

In [None]:
def getMonthlyStation() -> pd.DataFrame:
    conn = getConn()
    
    query = sq.text(
        """
        SELECT * FROM public.dataset_monthly_station
        """
    )

    df = pd.read_sql(query, conn)
    conn.close()

    return df

In [None]:
def crossWeekOfYear(df: pd.Dataframe, weekRange: list(int), exclude: list(str)) -> pd.DataFrame:
    ''' We create a table where each row is a district and each column is a week of the year crossed with a weather attribute'''
    ''' the columns are labeled as such: attribute_week_of_year '''
    ''' the weekRange is a list of ints that represent the weeks of the year we want to include in the table '''

    crossedDf = pd.DataFrame()
    crossedDf["year"] = df["year"]
    crossedDf["district"] = df["district"]

    # get the columns we will want to pull information from
    cols = df.columns.tolist()  # type: ignore
    for col in exclude:
        cols.remove(col)

    for week in weekRange:
        for col in cols:
            crossedDf[f"{col}_{week}"] = df.loc[df["week_of_year"] == week][f"{col}"]
    
    return crossedDf

In [None]:
def crossMonthOfYear(df: pd.Dataframe, monthRange: list(int), exclude: list(str)) -> pd.DataFrame:
    ''' We create a table where each row is a district and each column is a month of the year crossed with a weather attribute'''
    ''' the columns are labeled as such: attribute_month_of_year '''
    ''' the monthRange is a list of ints that represent the months of the year we want to include in the table '''

    crossedDf = pd.DataFrame()
    crossedDf["year"] = df["year"]
    crossedDf["district"] = df["district"]

    # get the columns we will want to pull information from
    cols = df.columns.tolist()  # type: ignore
    for col in exclude:
        cols.remove(col)

    for month in monthRange:
        for col in cols:
            crossedDf[f"{col}_{month}"] = df.loc[df["month"] == month][f"{col}"]
    
    return crossedDf

In [None]:
def generateFeatureCrossTables():
    TABLECROSSSATWEEKLYA = "dataset_cross_weekly_sat_JFMA"
    TABLECROSSSATWEEKLYB = "dataset_cross_weekly_sat_MAMJ"
    TABLECROSSSATWEEKLYC = "dataset_cross_weekly_sat_MJJA"
    TABLECROSSSATWEEKLYD = "dataset_cross_weekly_sat_JASO"
    TABLECROSSSATMONTHLY = "dataset_cross_monthly_sat"
    TABLECROSSSTATIONWEEKLYA = "dataset_cross_weekly_station_JFMA"
    TABLECROSSSTATIONWEEKLYB = "dataset_cross_weekly_station_MAMJ"
    TABLECROSSSTATIONWEEKLYC = "dataset_cross_weekly_station_MJJA"
    TABLECROSSSTATIONWEEKLYD = "dataset_cross_weekly_station_JASO"
    TABLECROSSSTATIONMONTHLY = "dataset_cross_monthly_station"

    # weekly jan feb mar apr
    weeks = [1, 2, 3, 4]
    satDf = getWeeklySat()
    crossedDf = crossWeekOfYear(satDf, weeks, ["year", "district", "week_of_year"])


In [24]:
if __name__ == "__main__":
    # generateSatelliteWeatherDatasetCSVs()
    # generateStationWeatherDatasetCSVs()
    # pushCSV()
    generateNoErgotTables()
    