In [10]:
# Import needed module
import pandas as pd

# Create a filepaths
fp_summer_data = "C:\...\summer_data.csv"
fp_winter_data = "C:\...\winter_data.csv"
fp_districts = "C:\...\zip.csv"

# Read the data for summer and winter data
summer_data = pd.read_csv(fp_summer_data, sep=",")
winter_data = pd.read_csv(fp_winter_data, sep=",")

# Read the data of Helsinki major districts and zipcode areas
districts = pd.read_csv(fp_districts, sep=";")

# Create a list of Telia grid ID-numbers which intersect with the layers of central Helsinki districts Kamppi and Kluuvi
centre_list = [9014, 9016, 9038, 9054, 9055, 9056, 9057, 9058, 9060, 9072, 9073, 9076]

# Reduce the dataframes to contain only the grids of central Helsinki
summer_centre_data = summer_data.loc[summer_data['grid_id'].isin(centre_list)]
winter_centre_data = winter_data.loc[winter_data['grid_id'].isin(centre_list)]

# Include only the data of people whose home municipality is Helsinki into the dataframe
summer_centre_data_hki = summer_centre_data.loc[summer_data['home_mun_ID'].isin([91])]
winter_centre_data_hki = winter_centre_data.loc[winter_data['home_mun_ID'].isin([91])]

# Drop unnecessary columns
summer_centre_data_hki = summer_centre_data_hki.drop(['weekday', 'suuralue_ID', 'suuralue_name', 'hour', 'home_mun_ID', "home_mun_name", "home_cnty_name"], axis=1)
winter_centre_data_hki = winter_centre_data_hki.drop(['weekday', 'suuralue_ID', 'suuralue_name', 'hour', 'home_mun_ID', "home_mun_name", "home_cnty_name"], axis=1)

# Rename column 
districts = districts.rename(columns={"posno": "home_grid_ID"})

# Join the season dataframes with district dataframes
joined_summer = summer_centre_data_hki.set_index("home_grid_ID").join(districts.set_index("home_grid_ID"))
joined_winter = winter_centre_data_hki.set_index("home_grid_ID").join(districts.set_index("home_grid_ID"))

# Group the data by home major district and calculate sum of presence count
joined_summer_grouped = joined_summer.groupby("Major districts", as_index=False)["presence_cnt"].sum()
joined_winter_grouped = joined_winter.groupby("Major districts", as_index=False)["presence_cnt"].sum()

# Drop the Southern and Östersundom major districts from the data
joined_summer_grouped.drop([5, 7], inplace=True)
joined_winter_grouped.drop([5, 7], inplace=True)

# Calculate sum of "presence_cnt" columns
total_summer = joined_summer_grouped["presence_cnt"].sum()
total_winter = joined_winter_grouped["presence_cnt"].sum()

# Calculate proportional values of presence count in every grid and assign the values to "percent" column
joined_summer_grouped["percent"] = round(((joined_summer_grouped["presence_cnt"] / total_summer) * 100), 2)
joined_winter_grouped["percent"] = round(((joined_winter_grouped["presence_cnt"] / total_winter) * 100), 2)

# Drop the "presence_cnt" column and rename remaining columns
joined_summer_grouped = joined_summer_grouped.drop(['presence_cnt'], axis=1) 
joined_winter_grouped = joined_winter_grouped.drop(['presence_cnt'], axis=1)
joined_summer_grouped = joined_summer_grouped.rename(columns={'percent': "Summer (%)"})
joined_winter_grouped = joined_winter_grouped.rename(columns={'percent': "Winter (%)"})

# Join summer and winter data
join = pd.merge(joined_winter_grouped, joined_summer_grouped, on=["Major districts"])

# Save the data for figure 3 to csv-file
join.to_csv("helsinki_home_locations.csv", index=False)