In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

sns.set_style('darkgrid')
sns.set(font_scale=1.3)

In [2]:
#Load the 2 datasets
mhdf = pd.read_csv('mental_data_ccgs_dec2019_to_feb2022.csv')
wdf = pd.read_csv('merged_weather_temp_and_daylight.csv')

In [3]:
#Merge the two
df = mhdf.merge(wdf, right_on='Date', left_on='data month')

In [4]:
df.head()

Unnamed: 0,status,primary level description,measure value,data month,period start,period end,period diff,Date,daylight_hours,Average Temperature
0,Final,NHS WEST LONDON CCG,4930.0,Dec-19,2019-12-01,2019-12-31,30,Dec-19,7.23,6.79
1,Final,NHS CENTRAL LONDON (WESTMINSTER) CCG,4440.0,Dec-19,2019-12-01,2019-12-31,30,Dec-19,7.23,6.79
2,Final,NHS WEST LONDON CCG,4995.0,Jan-20,2020-01-01,2020-01-31,30,Jan-20,7.94,7.33
3,Final,NHS CENTRAL LONDON (WESTMINSTER) CCG,4790.0,Jan-20,2020-01-01,2020-01-31,30,Jan-20,7.94,7.33
4,Final,NHS WEST LONDON CCG,5095.0,Feb-20,2020-02-01,2020-02-29,28,Feb-20,11.95,12.363636


In [5]:
#confirm that the date and data month columns are the same
(df['data month'] == df['Date']).all()

True

In [6]:
#cleanup. Rename and drop unwanted columns
df = df.rename(columns={'data month':'month'}).drop('Date',axis=1)
df = df.drop(['status','period start','period end','period diff'], axis=1).rename(columns={
    'daylight_hours':'daylight hours','Average Temperature':'average temperature'})

In [7]:
df['primary level description'].value_counts()

NHS NORTH CENTRAL LONDON CCG            21
NHS SOUTH WEST LONDON CCG               21
NHS SOUTH EAST LONDON CCG               21
NHS CENTRAL LONDON (WESTMINSTER) CCG    16
NHS WEST LONDON CCG                     16
NHS NORTH EAST LONDON CCG                9
NHS NORTH WEST LONDON CCG                9
Name: primary level description, dtype: int64

In [8]:
df['month'].nunique()

25

In [9]:
#Separate the regions because different months are available for different regions
se = df[df['primary level description'] == 'NHS SOUTH EAST LONDON CCG']
sw = df[df['primary level description'] == 'NHS SOUTH WEST LONDON CCG']
nc = df[df['primary level description'] == 'NHS NORTH CENTRAL LONDON CCG']
w = df[df['primary level description'] == 'NHS WEST LONDON CCG']
c = df[df['primary level description'] == 'NHS CENTRAL LONDON (WESTMINSTER) CCG']
nw = df[df['primary level description'] == 'NHS NORTH WEST LONDON CCG']
ne = df[df['primary level description'] == 'NHS NORTH EAST LONDON CCG']

In [10]:
#merge regions: Northern, southern and central. 
#The merged regions are available on the same months

se_sw = pd.concat([se,sw]).sort_values('month')
ne_nw = pd.concat([ne,nw]).sort_values('month')
c_w = pd.concat([c,w]).sort_values('month')

In [11]:
se_sw.head()

Unnamed: 0,primary level description,measure value,month,daylight hours,average temperature
11,NHS SOUTH EAST LONDON CCG,38225.0,Apr-20,13.43,11.64
10,NHS SOUTH WEST LONDON CCG,27510.0,Apr-20,13.43,11.64
68,NHS SOUTH WEST LONDON CCG,30625.0,Apr-21,13.43,7.23
69,NHS SOUTH EAST LONDON CCG,42835.0,Apr-21,13.43,7.23
31,NHS SOUTH EAST LONDON CCG,40885.0,Aug-20,14.0,19.35


In [12]:
#Aggregate the data so that every row represents a unique month
sesw = se_sw.groupby('month').agg({'measure value':'sum','daylight hours':'mean','average temperature':'mean'}).dropna()
nenw = ne_nw.groupby('month').agg({'measure value':'sum','daylight hours':'mean','average temperature':'mean'}).dropna()
cw = c_w.groupby('month').agg({'measure value':'sum','daylight hours':'mean','average temperature':'mean'}).dropna()
nc1 = nc.drop('primary level description', axis=1).reset_index(drop=True)

In [13]:
nc1.head()

Unnamed: 0,measure value,month,daylight hours,average temperature
0,33435.0,Apr-20,13.43,11.64
1,33435.0,May-20,15.19,14.33
2,35345.0,Jun-20,16.0,16.6
3,35580.0,Jul-20,15.61,17.56
4,35485.0,Aug-20,14.0,19.35


In [14]:
sesw.head()

Unnamed: 0_level_0,measure value,daylight hours,average temperature
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Apr-20,65735.0,13.43,11.64
Apr-21,73460.0,13.43,7.23
Aug-20,69815.0,14.0,19.35
Aug-21,74760.0,14.03,16.81
Dec-20,72380.0,7.23,5.87


In [15]:
#Save the regional data to csv
nenw.reset_index().to_csv('region_north_e_and_w_London_apr21_to_dec21.csv', index=False)
cw.reset_index().to_csv('region_central_and_west_london_dec19_to_march21.csv',index=False)
sesw.reset_index().to_csv('region_south_e_and_w_london_apr20_to_dec21.csv',index=False)
nc1.to_csv('region_north_central_london_apr20_to_dec21.csv', index=False)