In [25]:
# Import needed module
import pandas as pd

# Create a filepath
fp_summer_data = "C:\...\summer_data.csv"
fp_winter_data = "C:\...\winter_data.csv"

# Read the data
summer_data = pd.read_csv(fp_summer_data, sep=",")
winter_data = pd.read_csv(fp_winter_data, sep=",")

# Create a list of Telia grid ID-numbers which intersect with the layers of central Helsinki districts Kamppi and Kluuvi
centre_list = [9014, 9016, 9038, 9054, 9055, 9056, 9057, 9058, 9060, 9072, 9073, 9076]

# Reduce the dataframes to contain only the grids of central Helsinki
summer_centre_data = summer_data.loc[summer_data['grid_id'].isin(centre_list)]
winter_centre_data = winter_data.loc[winter_data['grid_id'].isin(centre_list)]

# Group the data by home region and calculate sum of presence count
summer_centre_data_grouped = summer_centre_data.groupby("home_cnty_name", as_index=False)["presence_cnt"].sum()
winter_centre_data_grouped = winter_centre_data.groupby("home_cnty_name", as_index=False)["presence_cnt"].sum()

# Drop rows which "home_cnty_name" column has value "Uusimaa"
for idx, row in winter_centre_data_grouped.iterrows():
    if row["home_cnty_name"] == "Uusimaa":
        winter_centre_data_grouped.drop(idx, inplace=True)
for idx, row in summer_centre_data_grouped.iterrows():
    if row["home_cnty_name"] == "Uusimaa":
        summer_centre_data_grouped.drop(idx, inplace=True)

# Sort dataframes by "presence_cnt" column
winter_centre_data_grouped = winter_centre_data_grouped.sort_values(by=['presence_cnt'], ascending=False, ignore_index=True)
summer_centre_data_grouped = summer_centre_data_grouped.sort_values(by=['presence_cnt'], ascending=False, ignore_index=True)

# Leave only rows of top 4 biggest "presence_cnt" column values and combine other "presence_cnt" values
others_winter = 0
for idx, row in winter_centre_data_grouped.iloc[4:].iterrows():
    others_winter += row["presence_cnt"]
    winter_centre_data_grouped.drop(idx, inplace=True)
others_summer = 0
for idx, row in summer_centre_data_grouped.iloc[4:].iterrows():
    others_summer += row["presence_cnt"]
    summer_centre_data_grouped.drop(idx, inplace=True)

# Create dataframes of combined "Others" values    
df_winter = pd.DataFrame({"home_cnty_name": "Others", "presence_cnt": others_winter}, index=[0])
df_summer = pd.DataFrame({"home_cnty_name": "Others", "presence_cnt": others_summer}, index=[0])

# Add "Others" dataframes to original dataframes 
winter_centre_data_grouped = winter_centre_data_grouped.append(df_winter, ignore_index = True)
summer_centre_data_grouped = summer_centre_data_grouped.append(df_summer, ignore_index = True)

# Calculate sum of "presence_cnt" columns
total_summer = summer_centre_data_grouped["presence_cnt"].sum()
total_winter = winter_centre_data_grouped["presence_cnt"].sum()

# Calculate proportional values of presence count in every grid and assign the values to "percent" column
summer_centre_data_grouped["percent"] = round(((summer_centre_data_grouped["presence_cnt"] / total_summer) * 100), 2)
winter_centre_data_grouped["percent"] = round(((winter_centre_data_grouped["presence_cnt"] / total_winter) * 100), 2)

# Drop the "presence_cnt" column and rename remaining columns
summer_centre_data_grouped = summer_centre_data_grouped.drop(['presence_cnt'], axis=1) 
winter_centre_data_grouped = winter_centre_data_grouped.drop(['presence_cnt'], axis=1)
summer_centre_data_grouped = summer_centre_data_grouped.rename(columns={"home_cnty_name": "Home region", 'percent': "Summer (%)"})
winter_centre_data_grouped = winter_centre_data_grouped.rename(columns={"home_cnty_name": "Home region", 'percent': "Winter (%)"})
                                                                        
# Join summer and winter data
join = pd.merge(winter_centre_data_grouped, summer_centre_data_grouped, on=["Home region"])                                                                  

# Save the data for figure 2 to csv-files
join.to_csv('centre_regions.csv', index=False)

  winter_centre_data_grouped = winter_centre_data_grouped.append(df_winter, ignore_index = True)
  summer_centre_data_grouped = summer_centre_data_grouped.append(df_summer, ignore_index = True)
