In [1]:
# Dependencies
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as st
import numpy as np

In [2]:
# File to Load
file = "../Physicians/Physicians.csv"
population_data = "../Physicians/population_data.csv"

In [3]:
# read physicians File and store into Pandas DataFrames
df = pd.read_csv(file)

df.head()

Unnamed: 0,County Name,Census division name,Family Medicine,Specialist,Grand Total
0,Algoma,"Algoma, Unorganized, North Part",1,,1
1,Algoma,Blind River,8,1.0,9
2,Algoma,Elliot Lake,16,2.0,18
3,Algoma,Garden River 14,1,,1
4,Algoma,Hornepayne,3,,3


In [4]:
#fill nan with value
clean_data = df.fillna(0)
clean_data.head()

Unnamed: 0,County Name,Census division name,Family Medicine,Specialist,Grand Total
0,Algoma,"Algoma, Unorganized, North Part",1,0.0,1
1,Algoma,Blind River,8,1.0,9
2,Algoma,Elliot Lake,16,2.0,18
3,Algoma,Garden River 14,1,0.0,1
4,Algoma,Hornepayne,3,0.0,3


In [5]:
#number of county in daataset
County =len(clean_data['County Name'].value_counts())
County

49

In [6]:
#extracting county name and csd_name columns 
organized_df = clean_data[['County Name', 'Census division name']]
organized_df.head()

Unnamed: 0,County Name,Census division name
0,Algoma,"Algoma, Unorganized, North Part"
1,Algoma,Blind River
2,Algoma,Elliot Lake
3,Algoma,Garden River 14
4,Algoma,Hornepayne


In [7]:
#group by County Name
county_dataframe = clean_data.set_index('Census division name').groupby(['County Name'])
county_dataframe

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x000001ACC99EF108>

In [8]:
#total number of family_medicine per county
family_medicine_df = county_dataframe["Family Medicine"].sum()
family_medicine_df.head()

County Name
Algoma          140
Brant           149
Bruce            73
Chatham-Kent     86
Cochrane        106
Name: Family Medicine, dtype: int64

In [9]:
##total number of Specialist per county
Specialist_df = county_dataframe["Specialist"].sum()
Specialist_df.head()

County Name
Algoma           95.0
Brant           118.0
Bruce             2.0
Chatham-Kent     68.0
Cochrane         42.0
Name: Specialist, dtype: float64

In [10]:
#merge dataset to get total_physicians per county
total_physicians=pd.merge(Specialist_df,family_medicine_df,on=["County Name"])
total_physicians.head()

Unnamed: 0_level_0,Specialist,Family Medicine
County Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Algoma,95.0,140
Brant,118.0,149
Bruce,2.0,73
Chatham-Kent,68.0,86
Cochrane,42.0,106


In [11]:
csd_dataframe = clean_data.set_index('Census division name')
csd_dataframe.head()

Unnamed: 0_level_0,County Name,Family Medicine,Specialist,Grand Total
Census division name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
"Algoma, Unorganized, North Part",Algoma,1,0.0,1
Blind River,Algoma,8,1.0,9
Elliot Lake,Algoma,16,2.0,18
Garden River 14,Algoma,1,0.0,1
Hornepayne,Algoma,3,0.0,3


In [12]:
final_df = pd.merge(csd_dataframe,total_physicians,how="left",on=["County Name","County Name"])
final_df.rename(columns={'Family Medicine_x':'Family Medicine per census division','Specialist_x':'Specialist per census division','Specialist_y':'Total specialist per county','Family Medicine_y':'Total family medicine per county'}, inplace=True)
final_df.head()

Unnamed: 0,County Name,Family Medicine per census division,Specialist per census division,Grand Total,Total specialist per county,Total family medicine per county
0,Algoma,1,0.0,1,95.0,140
1,Algoma,8,1.0,9,95.0,140
2,Algoma,16,2.0,18,95.0,140
3,Algoma,1,0.0,1,95.0,140
4,Algoma,3,0.0,3,95.0,140


In [13]:
final_data = pd.merge(final_df,organized_df,how="left",on=["County Name","County Name"])
final_data.head()

Unnamed: 0,County Name,Family Medicine per census division,Specialist per census division,Grand Total,Total specialist per county,Total family medicine per county,Census division name
0,Algoma,1,0.0,1,95.0,140,"Algoma, Unorganized, North Part"
1,Algoma,1,0.0,1,95.0,140,Blind River
2,Algoma,1,0.0,1,95.0,140,Elliot Lake
3,Algoma,1,0.0,1,95.0,140,Garden River 14
4,Algoma,1,0.0,1,95.0,140,Hornepayne


In [14]:
complete_data = final_data.set_index('Census division name')
complete_data

Unnamed: 0_level_0,County Name,Family Medicine per census division,Specialist per census division,Grand Total,Total specialist per county,Total family medicine per county
Census division name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Algoma, Unorganized, North Part",Algoma,1,0.0,1,95.0,140
Blind River,Algoma,1,0.0,1,95.0,140
Elliot Lake,Algoma,1,0.0,1,95.0,140
Garden River 14,Algoma,1,0.0,1,95.0,140
Hornepayne,Algoma,1,0.0,1,95.0,140
...,...,...,...,...,...,...
Markham,York,24,3.0,27,1013.0,1064
Newmarket,York,24,3.0,27,1013.0,1064
Richmond Hill,York,24,3.0,27,1013.0,1064
Vaughan,York,24,3.0,27,1013.0,1064


In [15]:
population_data = "../Physicians/population.csv"

In [16]:
population = pd.read_csv(population_data)
population


Unnamed: 0.1,Unnamed: 0,Economic region code,Economic region name,Census division code,Census division name,Census subdivision code,Census subdivision name,Total Population,Total Private Dwellings,Population density (people/square km),...,Total owned and rented households >0 income,<30 percent of income spent on shelter,>=30 percent of income spent on shelter,30 - <100 percent of income spent on shelter,Total commuting duration,Commute <15 min,Commute 15-29 min,Commute 30-44 min,Commute 45-59 min,Commute >=60 min
0,0,3510.0,Ottawa,3501,"Stormont, Dundas and Glengarry, United countie...",3501005,"South Glengarry, Township (TP)",13330.0,5848.0,22.0,...,5160.0,4500.0,660.0,605.0,4945.0,1485.0,2030.0,825.0,250.0,360.0
1,1,3510.0,Ottawa,3501,"Stormont, Dundas and Glengarry, United countie...",3501007,"Akwesasne (Part) 59, Indian reserve (IRI)",,,,...,,,,,,,,,,
2,2,3510.0,Ottawa,3501,"Stormont, Dundas and Glengarry, United countie...",3501011,"South Stormont, Township (TP)",13570.0,5583.0,30.3,...,5295.0,4725.0,570.0,505.0,5510.0,1355.0,2660.0,775.0,275.0,435.0
3,3,3510.0,Ottawa,3501,"Stormont, Dundas and Glengarry, United countie...",3501012,"Cornwall, City (CY)",47845.0,22214.0,777.9,...,21315.0,16605.0,4705.0,4455.0,15700.0,9380.0,4145.0,910.0,345.0,925.0
4,4,3510.0,Ottawa,3501,"Stormont, Dundas and Glengarry, United countie...",3501020,"South Dundas, Municipality (MU)",11044.0,4821.0,21.2,...,4490.0,3860.0,625.0,580.0,3895.0,1170.0,970.0,760.0,465.0,530.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
572,572,3595.0,Northwest,3560,"Kenora, District (DIS)",3560096,"Kasabonika Lake, Indian reserve (IRI)",1060.0,252.0,10.4,...,0.0,0.0,0.0,0.0,280.0,260.0,15.0,10.0,0.0,0.0
573,573,3595.0,Northwest,3560,"Kenora, District (DIS)",3560097,"Muskrat Dam Lake, Indian reserve (IRI)",,,,...,,,,,,,,,,
574,574,3595.0,Northwest,3560,"Kenora, District (DIS)",3560098,"Kingfisher Lake 1, Indian reserve (IRI)",511.0,127.0,56.0,...,0.0,0.0,0.0,0.0,180.0,170.0,10.0,10.0,0.0,0.0
575,575,3595.0,Northwest,3560,"Kenora, District (DIS)",3560100,"Wawakapewin, Indian reserve (IRI)",16.0,14.0,0.3,...,,,,,,,,,,


In [19]:
population_df = population[['Census division name', 'Total Population']]
population_df.head()                       

Unnamed: 0,Census division name,Total Population
0,"Stormont, Dundas and Glengarry, United countie...",13330.0
1,"Stormont, Dundas and Glengarry, United countie...",
2,"Stormont, Dundas and Glengarry, United countie...",13570.0
3,"Stormont, Dundas and Glengarry, United countie...",47845.0
4,"Stormont, Dundas and Glengarry, United countie...",11044.0


In [21]:
final_data_with_population = pd.merge(complete_data,population_df,how="left",on=["Census division name","Census division name"])
final_data_with_population

Unnamed: 0,Census division name,County Name,Family Medicine per census division,Specialist per census division,Grand Total,Total specialist per county,Total family medicine per county,Total Population
0,"Algoma, Unorganized, North Part",Algoma,1,0.0,1,95.0,140,
1,Blind River,Algoma,1,0.0,1,95.0,140,
2,Elliot Lake,Algoma,1,0.0,1,95.0,140,
3,Garden River 14,Algoma,1,0.0,1,95.0,140,
4,Hornepayne,Algoma,1,0.0,1,95.0,140,
...,...,...,...,...,...,...,...,...
2131,Markham,York,24,3.0,27,1013.0,1064,
2132,Newmarket,York,24,3.0,27,1013.0,1064,
2133,Richmond Hill,York,24,3.0,27,1013.0,1064,
2134,Vaughan,York,24,3.0,27,1013.0,1064,
