Finns and Alcohol Consumption: Do Seasonal Changes in Weather Affect Our Drinking Habits? 
Sini Suihkonen, Outi Savolainen and Fanni Franssila

In [2]:
import pandas as pd
import numpy as numpy
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

In [30]:
print()




In [42]:
def load_xls(file_name:str, year: int):
    dataframes = []
    months = ["Tammi", "Helmi", "Maalis", "Huhti", "Touko", "Kesä", "Heinä", "Elo", "Syys", "Loka", "Marras", "Joulu"]
    xls = pd.ExcelFile(file_name)
    for month in months:
        name_of_sheet = f"{month}kuu {year}"
        df = pd.read_excel(xls, name_of_sheet,skiprows=[0,1,2], usecols=("I"))
        # Add month column to the dataframe. For example "Tammikuu2020".
        df[f"{month}kuu{year}"] = pd.concat([df.iloc[3:4], df.iloc[14:15], df.iloc[21:22], df.iloc[34:35]])
        # Drop rows with NaNs. After this we have only four rows containing total consumption for all types of alchohol.
        df = df.dropna()
        # Remove the first useless row of the dataframe.
        df = df.iloc[:, 1:]
        # Change index names
        df = df.rename(index={3:"Beer", 14:"Wine", 21:"Strong Wine", 34:"Spririts"})
        dataframes.append(df)
    total = pd.concat(dataframes, axis=1)
    
    return total

alc_data2020 = load_xls("Alkoholimyyntitilasto_tammi_joulukuu_2020.xlsx", 2020)
alc_data2019 = load_xls("Alkoholimyyntitilasto_tammi_joulukuu_2019.xlsx", 2019)
alc_data2018 = load_xls("Alkoholimyyntitilasto_tammi_joulukuu_2018.xlsx", 2018)
alc_data2017 = load_xls("Alkoholimyyntitilasto_tammi_joulukuu_2017.xlsx", 2017)
print(alc_data2020)
print(alc_data2019)
print()
print(alc_data2018)
print()
print(alc_data2017)
print()

             Tammikuu2020 Helmikuu2020 Maaliskuu2020 Huhtikuu2020  \
Beer              24824.0        24832         28384        32975   
Wine               5675.0         6277          7240         9430   
Strong Wine         146.0          147           152          170   
Spririts           3978.0         4361          4955         5887   

            Toukokuu2020 Kesäkuu2020 Heinäkuu2020 Elokuu2020 Syyskuu2020  \
Beer               32232       43291        36599      32270       29856   
Wine                8331       11718        10699       8431        7311   
Strong Wine          149         162          145         95         147   
Spririts            5483        8410         7608       6377        5416   

            Lokakuu2020 Marraskuu2020 Joulukuu2020  
Beer              28856         26986        36284  
Wine               7867          7202        10834  
Strong Wine         206           281          549  
Spririts           5859          4724         6387  
        

In [8]:
# WEATHER DATA
def load_csv(file_name:str):
    df_weather = pd.read_csv(file_name)

    # replace negative snow depth values
    df_weather["Lumensyvyys (cm)"].replace({-1: 0}, inplace=True)

    #count column means by month
    df_mean = df_weather.groupby("Kk").mean()

    # drop year and day column
    df_mean = df_mean.iloc[: , 2:]

    # limit columns here 
    col = [0,1,2,5,6,11] 
    df_mean = df_mean.iloc[:,col]

    return df_mean

# SUN DATA
def load_sun_csv(file_name:str):
    df_sun = pd.read_csv(file_name, sep=",")

    # Drop the time zone, year, day and time of day
    df_sun = df_sun.drop(["Aikavyöhyke", "Vuosi", "Pv", "Klo"], axis=1)

    # Take monthly mean for sunlight
    df_sun = df_sun.groupby("Kk").mean()

    return df_sun


    
#Joensuu - Airport
weatherdata_JOE_2020 = load_csv("weather2020-JOE.csv")
weatherdata_JOE_2020 = weatherdata_JOE_2020.merge(load_sun_csv("sun2020-JOE.csv"), on="Kk")
weatherdata_JOE_2019 = load_csv("weather2019-JOE.csv")
weatherdata_JOE_2019 = weatherdata_JOE_2019.merge(load_sun_csv("sun2019-JOE.csv"), on="Kk")
weatherdata_JOE_2018 = load_csv("weather2018-JOE.csv")
weatherdata_JOE_2018 = weatherdata_JOE_2018.merge(load_sun_csv("sun2018-JOE.csv"), on="Kk")
weatherdata_JOE_2017 = load_csv("weather2017-JOE.csv")
weatherdata_JOE_2017 = weatherdata_JOE_2017.merge(load_sun_csv("sun2017-JOE.csv"), on="Kk")

#Helsinki - Kumpula
weatherdata_HEL_2020 = load_csv("weather2020-HEL.csv")
weatherdata_HEL_2020 = weatherdata_HEL_2020.merge(load_sun_csv("sun2020-HEL.csv"), on="Kk")
weatherdata_HEL_2019 = load_csv("weather2019-HEL.csv")
weatherdata_HEL_2019 = weatherdata_HEL_2019.merge(load_sun_csv("sun2019-HEL.csv"), on="Kk")
weatherdata_HEL_2018 = load_csv("weather2018-HEL.csv")
weatherdata_HEL_2018 = weatherdata_HEL_2018.merge(load_sun_csv("sun2018-HEL.csv"), on="Kk")
weatherdata_HEL_2017 = load_csv("weather2017-HEL.csv")
weatherdata_HEL_2017 = weatherdata_HEL_2017.merge(load_sun_csv("sun2017-HEL.csv"), on="Kk")

#Oulu - ?
#weatherdata_OULU_2020 = load_csv("weather2020-OULU.csv")
#weatherdata_OULU_2020 = weatherdata_OULU_2020.merge(load_sun_csv("sun2020-OULU.csv"), on="Kk")
#weatherdata_OULU_2019 = load_csv("weather2019-OULU.csv")
#weatherdata_OULU_2019 = weatherdata_OULU_2019.merge(load_sun_csv("sun2019-OULU.csv"), on="Kk")
#weatherdata_OULU_2018 = load_csv("weather2018-OULU.csv")
#weatherdata_OULU_2018 = weatherdata_OULU_2018.merge(load_sun_csv("sun2018-OULU.csv"), on="Kk")
#weatherdata_OULU_2017 = load_csv("weather2017-OULU.csv")
#weatherdata_OULU_2017 = weatherdata_OULU_2017.merge(load_sun_csv("sun2017-OULU.csv"), on="Kk")

weatherdata_HEL_2020.head()

Unnamed: 0_level_0,Pilvien määrä (1/8),Ilmanpaine (msl) (hPa),Sademäärä (mm),Lumensyvyys (cm),Ilman lämpötila (degC),Tuulen nopeus (m/s),Paisteaika (s)
Kk,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,5.279195,1006.211544,0.101611,0.095302,2.464564,6.067651,3.004032
2,5.307471,996.947701,0.165468,0.051873,0.950287,5.952374,6.400862
3,4.40457,1012.706048,0.091129,0.031081,2.072581,5.260162,11.807796
4,3.668056,1009.423611,0.064534,0.066667,4.94375,4.715417,18.159722
5,2.829071,1013.596501,0.065949,0.0,9.641992,4.185175,23.71467
