**1. IMPORTS**

In [1]:
#imports
import pandas as pd
import numpy as np
# ????? import matplotlib.pyplot as plt

#import data file, skip first row and adjust na-values
data = pd.read_csv("data/2315676.txt", delim_whitespace=True, skiprows = [1], na_values = ["-9999"])

#check data
data

Unnamed: 0,STATION,STATION_NAME,DATE,TAVG,TMAX,TMIN
0,GHCND:FI000007501,SODANKYLA-AWS-FI,19080101,,2.0,-37.0
1,GHCND:FI000007501,SODANKYLA-AWS-FI,19080102,,6.0,-26.0
2,GHCND:FI000007501,SODANKYLA-AWS-FI,19080103,,7.0,-27.0
3,GHCND:FI000007501,SODANKYLA-AWS-FI,19080104,,-3.0,-27.0
4,GHCND:FI000007501,SODANKYLA-AWS-FI,19080105,,4.0,-36.0
...,...,...,...,...,...,...
41060,GHCND:FI000007501,SODANKYLA-AWS-FI,20201003,47.0,51.0,
41061,GHCND:FI000007501,SODANKYLA-AWS-FI,20201004,43.0,47.0,37.0
41062,GHCND:FI000007501,SODANKYLA-AWS-FI,20201005,42.0,,37.0
41063,GHCND:FI000007501,SODANKYLA-AWS-FI,20201006,45.0,46.0,43.0


**2. CALCULATE AVERAGE AND REPLACE NaN VALUES IN TAVG**

In [2]:
#create new column and fill it with mean calculations
data["temp_avg"] = (data["TMAX"] + data["TMIN"]) / 2
#replace NaN in TAVG with temp_avg and drop still remaining NaN
data.TAVG.fillna(data.temp_avg, inplace = True)
data.dropna(subset=["TAVG"], inplace = True)

#check data
data

Unnamed: 0,STATION,STATION_NAME,DATE,TAVG,TMAX,TMIN,temp_avg
0,GHCND:FI000007501,SODANKYLA-AWS-FI,19080101,-17.5,2.0,-37.0,-17.5
1,GHCND:FI000007501,SODANKYLA-AWS-FI,19080102,-10.0,6.0,-26.0,-10.0
2,GHCND:FI000007501,SODANKYLA-AWS-FI,19080103,-10.0,7.0,-27.0,-10.0
3,GHCND:FI000007501,SODANKYLA-AWS-FI,19080104,-15.0,-3.0,-27.0,-15.0
4,GHCND:FI000007501,SODANKYLA-AWS-FI,19080105,-16.0,4.0,-36.0,-16.0
...,...,...,...,...,...,...,...
41060,GHCND:FI000007501,SODANKYLA-AWS-FI,20201003,47.0,51.0,,
41061,GHCND:FI000007501,SODANKYLA-AWS-FI,20201004,43.0,47.0,37.0,42.0
41062,GHCND:FI000007501,SODANKYLA-AWS-FI,20201005,42.0,,37.0,
41063,GHCND:FI000007501,SODANKYLA-AWS-FI,20201006,45.0,46.0,43.0,44.5


**3. CONVERT TEMPERATURES FROM FAHRENHEIT TO CELSIUS WITH A FUNCTION**

In [3]:
#create function to convert temps
def fahr_to_celsius(temp_fahr):
    """
    Function to calculate degree Farenheit to degree Celsius
    temp_fahr is number of degree Farenheit that should be calculated
    returns this number in degree Celsius
    """
    temp_cels = (temp_fahr - 32) / 1.8
    return temp_cels

#apply function to TAVG and rename column
data["TAVG"] = data["TAVG"].apply(fahr_to_celsius)
data.rename(columns = {"TAVG": "TAVG_C"}, inplace = True)

#check data
data

Unnamed: 0,STATION,STATION_NAME,DATE,TAVG_C,TMAX,TMIN,temp_avg
0,GHCND:FI000007501,SODANKYLA-AWS-FI,19080101,-27.500000,2.0,-37.0,-17.5
1,GHCND:FI000007501,SODANKYLA-AWS-FI,19080102,-23.333333,6.0,-26.0,-10.0
2,GHCND:FI000007501,SODANKYLA-AWS-FI,19080103,-23.333333,7.0,-27.0,-10.0
3,GHCND:FI000007501,SODANKYLA-AWS-FI,19080104,-26.111111,-3.0,-27.0,-15.0
4,GHCND:FI000007501,SODANKYLA-AWS-FI,19080105,-26.666667,4.0,-36.0,-16.0
...,...,...,...,...,...,...,...
41060,GHCND:FI000007501,SODANKYLA-AWS-FI,20201003,8.333333,51.0,,
41061,GHCND:FI000007501,SODANKYLA-AWS-FI,20201004,6.111111,47.0,37.0,42.0
41062,GHCND:FI000007501,SODANKYLA-AWS-FI,20201005,5.555556,,37.0,
41063,GHCND:FI000007501,SODANKYLA-AWS-FI,20201006,7.222222,46.0,43.0,44.5


**4. CALCULATE SEASONAL AVERAGE TEMPERATURES**

In [4]:
#slice date to get seperate columns with month and year
data["DATE_STR"] = data["DATE"].astype(str)
data["month"] = data["DATE_STR"].str.slice(start = 4, stop = 6)
data["year"] = data["DATE_STR"].str.slice(start = 0, stop = 4)

#create function to assign season to rows
def assign_season(row):
    if (row["month"] == "12") or (row["month"] == "01") or (row["month"] == "02"):
        return "winter"
    if (row["month"] == "03") or (row["month"] == "04") or (row["month"] == "05"):
        return "spring"
    if (row["month"] == "06") or (row["month"] == "07") or (row["month"] == "08"):
        return "summer"
    else:
        return "autumn"
#create season column and add data with applying function
data["season"] = data.apply(lambda row: assign_season(row), axis=1)

#group season and year to get avg for individual years
data["season_year"] = data["season"] + data["year"]
season_grouped = data.groupby("season_year")

#create empty data frame for season avg
season_avg = pd.DataFrame()
# loop to caculate mean and add to season_avg
mean = ["TAVG_C"]
#iterating over all groups
for key, group in season_grouped:
    #calculate mean
    mean_value = group[mean].mean()
    #add key
    mean_value["season_year"] = key
    #add mean values to data frame
    row = mean_value.to_frame().transpose()
    season_avg = pd.concat([season_avg, row], ignore_index = True)

#rename column TAVG_C
season_avg.rename(columns = {"TAVG_C": "temp_avg"}, inplace = True)

#check data
season_avg

Unnamed: 0,temp_avg,season_year
0,-1.859217,autumn1908
1,-0.155827,autumn1909
2,-1.742063,autumn1910
3,-1.27915,autumn1911
4,-2.073413,autumn1912
...,...,...
447,-11.190476,winter2016
448,-10.111111,winter2017
449,-11.555556,winter2018
450,-11.395062,winter2019


**5. CALCULATE SEASONAL AVERAGE TEMPERATURE FOR REFERENCE PERIOD 1951-1980**


In [11]:
#take only season specific data 
winter_data = data.loc[data["season"] == "winter"]
spring_data = data.loc[data["season"] == "spring"]
summer_data = data.loc[data["season"] == "summer"]
autumn_data = data.loc[data["season"] == "autumn"]

#filter season specific data according to reference time
winter_ref = winter_data.loc[(data["year"] >= "1951") & (data["year"] < "1980")]
spring_ref = spring_data.loc[(data["year"] >= "1951") & (data["year"] < "1980")]
summer_ref = summer_data.loc[(data["year"] >= "1951") & (data["year"] < "1980")]
autumn_ref = autumn_data.loc[(data["year"] >= "1951") & (data["year"] < "1980")]

#calculate and print average for each season
winter_avg = winter_ref["TAVG_C"].mean()
print(f"The average temperature for winter is", winter_avg)
spring_avg = spring_ref["TAVG_C"].mean()
print(f"The average temperature for spring is",spring_avg)
summer_avg = summer_ref["TAVG_C"].mean()
print(f"The average temperature for summer is",summer_avg)
autumn_avg = autumn_ref["TAVG_C"].mean()
print(f"The average temperature for autumn is",autumn_avg)

The average temperature for winter is -13.447013119347853
The average temperature for spring is -2.343561221513977
The average temperature for summer is 12.370689655172415
The average temperature for autumn is -0.747199056524303


**6. CALCULATE SEASONAL TEMPERATURE ANOMALIES**

In [27]:
#add column season
season_avg["season"] = season_avg["season_year"].str.slice(stop = -4)

def assign_refvalue(row):
    if row["season"] == "winter":
        return winter_avg
    if row["season"] == "spring":
        return spring_avg
    if row["season"] == "summer":
        return summer_avg
    else:
        return autumn_avg


#create function to assign season to rows
def assign_season(row):
    if (row["month"] == "12") or (row["month"] == "01") or (row["month"] == "02"):
        return "winter"
    if (row["month"] == "03") or (row["month"] == "04") or (row["month"] == "05"):
        return "spring"
    if (row["month"] == "06") or (row["month"] == "07") or (row["month"] == "08"):
        return "summer"
    else:
        return "autumn"
#create season column and add data with applying function
data["season"] = data.apply(lambda row: assign_season(row), axis=1)

#season_avg["season_ref"] = 0.0
#for i in season_avg["season"]:
 #   if i == "winter":
  #      season_avg["season_ref".append(winter[i])],
   # elif i == "spring":
    #    season_avg["season_ref".append(spring[i])]
                   

KeyError: 'winter_avg'

In [None]:


#plot like example