In [1]:
# Create IMAM supervision data to upload into RapidPro

# Identify all supervision (State and LGA) and put in rows by site (name, phone, email)
# Merge State and LGA supervision into complete listing of personnel
# export as .xlsx file

# This is still very excel way of thinking. 
# We should just create the table in json and import to RapidPro

In [504]:
# To show plots in the notebook
%matplotlib inline  

import pandas as pd
import numpy as np
import pandas_highcharts.core
from sqlalchemy import create_engine
import psycopg2
import matplotlib.pyplot 

import os

os.environ.setdefault("DJANGO_SETTINGS_MODULE", "IMAM.settings")

from django.conf import settings
from home.management.commands.load_data import assign_state_lga_num, rename_cols, generic_cleaning, merge_in_and_outpatients, add_program_reports_from_supervision

import django
django.setup()

from home.models import First_admin, Second_admin, Site, Registration

In [505]:
# For exporting excel files using XlsxWriter
from pandas import ExcelWriter
import xlsxwriter

In [506]:
# Suppress scientific notation
# pd.options.display.float_format = '{:20,.0f}'.format

In [507]:
engine = create_engine(
    'postgresql://{USER}:{PASSWORD}@{HOST}:{PORT}/{NAME}'.format(**settings.DATABASES['default']))
df = pd.read_sql_query("select * from registration;", con=engine)

In [508]:
# If there is no data in PostGres
# df = pd.ExcelFile('/home/robert/Downloads/reg.xlsx').parse('Contacts')

In [509]:
df.head()

Unnamed: 0,contact_uuid,urn,name,groups,siteid,type,first_seen,last_seen,post,mail,lga_num,state_num
0,7903e497-8bfe-47a5-bfd6-78584b3cc09f,tel:+2348165422728,Munirat Makama.,,2003110030,OTP,2016-10-31 10:54:37.155412,2017-04-24 08:10:10.429518,Nurse/Midwife,,2003.0,20.0
1,069c45bd-3e28-4461-a3a2-da998681f501,tel:+2347064388816,DAYYABU MUSA.,,3510110005,OTP,2016-12-12 06:40:36.221166,2017-05-07 13:57:44.327906,Community Health Officer,,3510.0,35.0
2,a591122c-7319-4d2c-85e3-c28d5fdd44a0,tel:+2348034042391,Falmata Ali Mustapha.,,813110003,OTP,2016-10-14 10:38:05.292361,2017-05-05 14:22:03.666872,Community Health Officer,faltalimutapha202@gmail.com,813.0,8.0
3,e978059a-ffdd-44b8-84a7-8ce8c0aed7af,tel:+2348180200915,Abdulkadir Muhammad Yasore,,2003,Sup,2016-11-05 16:33:04.907624,2017-05-08 12:38:59.153229,,akyasore@gmail.com,2003.0,20.0
4,3130d94b-f3bd-4412-b7c9-acd8b5598e81,tel:+2348087760645,IBRAHIM LAMARA.,,504110010,OTP,2017-02-22 14:12:33.933315,2017-04-12 14:09:33.640518,In Charge Hospital/PHC,,504.0,5.0


In [510]:
# run rename columns function
rename_cols(df)

Unnamed: 0,contact_uuid,urn,name,groups,siteid,type,first_seen,last_seen,post,mail,lga_num,state_num
0,7903e497-8bfe-47a5-bfd6-78584b3cc09f,tel:+2348165422728,Munirat Makama.,,2003110030,OTP,2016-10-31 10:54:37.155412,2017-04-24 08:10:10.429518,Nurse/Midwife,,2003.0,20.0
1,069c45bd-3e28-4461-a3a2-da998681f501,tel:+2347064388816,DAYYABU MUSA.,,3510110005,OTP,2016-12-12 06:40:36.221166,2017-05-07 13:57:44.327906,Community Health Officer,,3510.0,35.0
2,a591122c-7319-4d2c-85e3-c28d5fdd44a0,tel:+2348034042391,Falmata Ali Mustapha.,,813110003,OTP,2016-10-14 10:38:05.292361,2017-05-05 14:22:03.666872,Community Health Officer,faltalimutapha202@gmail.com,813.0,8.0
3,e978059a-ffdd-44b8-84a7-8ce8c0aed7af,tel:+2348180200915,Abdulkadir Muhammad Yasore,,2003,Sup,2016-11-05 16:33:04.907624,2017-05-08 12:38:59.153229,,akyasore@gmail.com,2003.0,20.0
4,3130d94b-f3bd-4412-b7c9-acd8b5598e81,tel:+2348087760645,IBRAHIM LAMARA.,,504110010,OTP,2017-02-22 14:12:33.933315,2017-04-12 14:09:33.640518,In Charge Hospital/PHC,,504.0,5.0
5,227ec725-6868-4ee3-9589-a2860c72daa8,tel:+2349027660360,EKENTA FANNY NJIDEKA.,,807210007,SC,2017-04-28 10:03:59.903480,2017-04-28 14:41:27.358756,Nurse/Midwife,nekenta@internationalmedicalcorps.org,807.0,8.0
6,a02518c4-935b-46cf-a1ce-11d23ba6d38b,tel:+2347033361497,JOY B MBAYA.,,821210017,SC,2017-04-28 10:26:51.567596,2017-04-28 14:37:43.264403,Community Health Officer,,821.0,8.0
7,b0670cfa-c8ef-4e1d-8eb2-d3ab70efe2eb,tel:+2348086596462,Mannir Ibrahim.,,3605110063,OTP,2016-11-07 08:41:14.014344,2017-01-18 20:35:52.997439,Community Health Officer,,3605.0,36.0
8,dfae6793-c610-4f33-bd27-952f259c92b1,tel:+2348069613783,Sani Magaji Nahuce.,,3605110056,OTP,2016-11-08 08:13:54.081639,2017-01-18 20:35:46.335767,,,3605.0,36.0
9,2a82f74b-c2ef-465b-9f8f-f86dabce6ef1,tel:+2348080796839,Musa Abdullahi.,,3603110029,OTP,2016-11-06 10:03:24.573761,2017-01-18 20:35:31.345573,,,3603.0,36.0


In [511]:
# CHANGE URN TO NUM
df = df.rename(index=str, columns={"urn": "num"})

In [535]:
#df.state_num= df.state_num.astype(int)
# df.lga_num= df.lga_num.astype(int)
# Cannot convert NA to integer


In [536]:
# rename SiteID to siteid
# df=df.rename(columns = {'SiteID':'siteid'})

# Change the order (the index) of the columns
columnsTitles = ['siteid',                 
                 'name',
                 'num',
                 'mail',
                 'post',
                 'type',
                 'first_seen',
                 'last_seen',
                 'lga_num', 
                 'state_num' 
                 ]

df2 = df.reindex(columns=columnsTitles)
df2

Unnamed: 0,siteid,name,num,mail,post,type,first_seen,last_seen,lga_num,state_num
0,2003110030,Munirat Makama.,tel:+2348165422728,,Nurse/Midwife,OTP,2016-10-31 10:54:37.155412,2017-04-24 08:10:10.429518,2003.0,20.0
1,3510110005,DAYYABU MUSA.,tel:+2347064388816,,Community Health Officer,OTP,2016-12-12 06:40:36.221166,2017-05-07 13:57:44.327906,3510.0,35.0
2,813110003,Falmata Ali Mustapha.,tel:+2348034042391,faltalimutapha202@gmail.com,Community Health Officer,OTP,2016-10-14 10:38:05.292361,2017-05-05 14:22:03.666872,813.0,8.0
3,2003,Abdulkadir Muhammad Yasore,tel:+2348180200915,akyasore@gmail.com,,Sup,2016-11-05 16:33:04.907624,2017-05-08 12:38:59.153229,2003.0,20.0
4,504110010,IBRAHIM LAMARA.,tel:+2348087760645,,In Charge Hospital/PHC,OTP,2017-02-22 14:12:33.933315,2017-04-12 14:09:33.640518,504.0,5.0
5,807210007,EKENTA FANNY NJIDEKA.,tel:+2349027660360,nekenta@internationalmedicalcorps.org,Nurse/Midwife,SC,2017-04-28 10:03:59.903480,2017-04-28 14:41:27.358756,807.0,8.0
6,821210017,JOY B MBAYA.,tel:+2347033361497,,Community Health Officer,SC,2017-04-28 10:26:51.567596,2017-04-28 14:37:43.264403,821.0,8.0
7,3605110063,Mannir Ibrahim.,tel:+2348086596462,,Community Health Officer,OTP,2016-11-07 08:41:14.014344,2017-01-18 20:35:52.997439,3605.0,36.0
8,3605110056,Sani Magaji Nahuce.,tel:+2348069613783,,,OTP,2016-11-08 08:13:54.081639,2017-01-18 20:35:46.335767,3605.0,36.0
9,3603110029,Musa Abdullahi.,tel:+2348080796839,,,OTP,2016-11-06 10:03:24.573761,2017-01-18 20:35:31.345573,3603.0,36.0


In [537]:
df2.post.value_counts()

# Correct in Registration
# Community Health Worker        1
# hw                             1
# HC                             1

Community Health Officer    1166
In Charge Hospital/PHC       891
Coordinator                  203
Technical Assistance          97
Nurse/Midwife                 75
Volunteer                     64
LabTech-Pharm                 60
Stocks Manager                35
Database Manager              29
Doctor                        17
Observer                      14
STOCKS MANAGER                 2
DATABASE MANAGER               2
In Charge Hospital/Phc         1
Name: post, dtype: int64

In [538]:
# Post ranking
df2['post_rank'] = df2.post.str.lower()

# Coordinator                    1
# Stocks Manager                 2
# Database Manager               3

# In Charge Hospital/PHC         4
# Doctor                         5
# Nurse/Midwife                  6
# LabTech-Pharm                  7
# Community Health Officer       8
# Volunteer                      9
# Technical Assistance          10
# Observer                      11

# Use replace with dictionary
df2['post_rank'].replace({'coordinator': 1,
                        'stocks manager': 2,
                        'database manager': 3,
                        'in charge hospital/phc': 4,
                        'doctor': 5,
                        'nurse/midwife': 6,
                        'labtech-pharm': 7,
                        'community health officer': 8,
                        'volunteer': 9,
                        'technical assistance': 10,
                        'observer': 11}, 
                         inplace=True)

# df2['post_rank'] = df2.post.str.lower().replace({
#                         'coordinator': 1,
#                         'stocks manager': 2,
#                         'database manager': 3,
#                         'in charge hospital/phc': 4,
#                         'doctor': 5,
#                         'nurse/midwife': 6,
#                         'labtech-pharm': 7,
#                         'community health officer': 8,
#                         'volunteer': 9,
#                         'technical assistance': 10,
#                         'observer': 11}, 
#                          inplace=True)


In [539]:
df2['post_rank'].value_counts()
# Used to sort by rank later

8.0     1166
4.0      892
1.0      203
10.0      97
6.0       75
9.0       64
7.0       60
2.0       37
3.0       31
5.0       17
11.0      14
Name: post_rank, dtype: int64

In [540]:
# df2.type
# change type in supervision cadres to sup
df2['type'] = np.where((df2['siteid']) < 3699, "Sup", df2['type'])

In [541]:
# REMEMBER With queries, the evaluation phrase must be a string in single or double quotes
# df2.query('type == ","')
# removed one row with , as type

In [542]:
df2.type.value_counts()


OTP    2187
Sup     356
SC      177
Name: type, dtype: int64

In [543]:
df2['siteid'].count()

2774

In [544]:
df2['type'].unique()
# At least 54 registrations with None as type

array([u'OTP', 'Sup', u'SC', None], dtype=object)

In [545]:
# Assign state lga num is not necessary as it is done already in the import_contacts.py
# df2 = assign_state_lga_num(df2)
# corrected error with LGA Num in load_data.py

In [546]:
df2 = df2.query('siteid>1').query('siteid!=99')
# Remove national level registrations
# siteid == 99 is a number to use for missing or no longer involved with IMAM program
# Remove erroneous registrations

In [547]:
df2 = df2.sort_values(by=['siteid','post_rank', 'name'])

In [548]:
del supervision_df

In [549]:
# create db with only supervision staff
supervision_df = df2[df2['siteid'] <= 3699]
# supervision siteids range from 1 to 3699

In [550]:
# to pivot data, create counts of each case by siteid
# Remember that siteid == state_num or LGA_num if on supervision level. 
# It is not necessary to run this command below on the state_num or lga_num. 

# copy of a slice from a DataFrame - ERROR
# supervision_df['count'] = supervision_df.groupby('siteid').cumcount() + 1
#supervision_df.loc['count'] = supervision_df.groupby('siteid').cumcount() + 1
# df.loc[:, :] is supposedly equivalent to all rows and columns in dataframe

supervision_df.loc[: , 'count'] = supervision_df.groupby('siteid').cumcount() + 1

In [551]:
# supervision_df = supervision_df.drop('type', axis=1)

In [552]:
supervision_df
# Supervision long dataframe

# num labelled as num

Unnamed: 0,siteid,name,num,mail,post,type,first_seen,last_seen,lga_num,state_num,post_rank,count
693,2,Hauwa Zoakah .,tel:+2348020938959,hauwabata@yahoo.com,Coordinator,Sup,2016-10-26 09:04:19.462503,2017-05-08 08:22:44.912015,,2.0,1.0,1
2301,2,Hauwa Zoakah.,tel:+2347035678763,hauwabata@yahoo.com,Coordinator,Sup,2016-06-01 12:17:48.149140,2017-05-08 08:22:44.164408,,2.0,1.0,2
2300,2,Ijagila Mark .,tel:+2348100001300,ijagilamark@gmail.com,Coordinator,Sup,2016-10-24 09:36:56.209869,2017-05-08 08:22:44.541012,,2.0,1.0,3
2302,2,Wullanga Alfred,tel:+2347032853473,wulangaalfred@gmail.com,Coordinator,Sup,2016-10-26 09:06:02.356386,2017-05-08 08:22:43.794723,,2.0,1.0,4
694,2,Reuben Aidaticha.,tel:+2348088481006,aidaticha@gmail.com,Database Manager,Sup,2016-10-24 10:38:14.041832,2017-05-08 08:22:42.640776,,2.0,3.0,5
2304,2,Reuben Aidaticha.,tel:+2348038213859,aidaticha@gmail.com,Database Manager,Sup,2016-10-24 09:35:36.024432,2017-05-08 08:22:43.006681,,2.0,3.0,6
2305,2,Wullanga Alfred.,tel:+2348029864318,wullangaalfred@gmail.com,Database Manager,Sup,2016-10-24 09:35:46.229729,2017-05-08 08:22:42.272719,,2.0,3.0,7
2303,2,Olawumi Monica Ajayi.,tel:+2348036173901,wumi.ajayi@yahoo.com,Technical Assistance,Sup,2016-10-24 09:38:19.635591,2017-05-08 08:22:43.385150,,2.0,10.0,8
246,5,Charity Evans Nysalamke.,tel:+2348036581237,nyalamkecharityevans@yahoo.com,Coordinator,Sup,2016-09-02 10:41:18.971823,2017-02-27 11:33:29.021348,,5.0,1.0,1
613,5,Hamza Yakubu Sade .,tel:+2348037716795,,Coordinator,Sup,2016-09-02 10:41:35.895429,2017-02-27 11:33:31.265269,,5.0,1.0,2


In [553]:
# The data points to include in IMAM Supervision database for each supervision SiteID
# Name, phone number, email
# can remove state_num and lga_num

columnsTitles = ['siteid',                 
                 'name',
                 'num',
                 'mail',
                 'state_num',
                 'lga_num',
                 'count',
                 ]

supervision_df = supervision_df.reindex(columns=columnsTitles)

In [529]:
supervision_df

Unnamed: 0,siteid,name,num,mail,state_num,lga_num,count
693,2,Hauwa Zoakah .,tel:+2348020938959,hauwabata@yahoo.com,2.0,,1
2301,2,Hauwa Zoakah.,tel:+2347035678763,hauwabata@yahoo.com,2.0,,2
2300,2,Ijagila Mark .,tel:+2348100001300,ijagilamark@gmail.com,2.0,,3
2302,2,Wullanga Alfred,tel:+2347032853473,wulangaalfred@gmail.com,2.0,,4
694,2,Reuben Aidaticha.,tel:+2348088481006,aidaticha@gmail.com,2.0,,5
2304,2,Reuben Aidaticha.,tel:+2348038213859,aidaticha@gmail.com,2.0,,6
2305,2,Wullanga Alfred.,tel:+2348029864318,wullangaalfred@gmail.com,2.0,,7
2303,2,Olawumi Monica Ajayi.,tel:+2348036173901,wumi.ajayi@yahoo.com,2.0,,8
246,5,Charity Evans Nysalamke.,tel:+2348036581237,nyalamkecharityevans@yahoo.com,5.0,,1
613,5,Hamza Yakubu Sade .,tel:+2348037716795,,5.0,,2


In [485]:
# create db with only State Level supervision staff
first_admin_wide = supervision_df[supervision_df['siteid'] <= 39]


In [486]:
first_admin_wide

Unnamed: 0,siteid,name,num,mail,count
693,2,Hauwa Zoakah .,tel:+2348020938959,hauwabata@yahoo.com,1
2301,2,Hauwa Zoakah.,tel:+2347035678763,hauwabata@yahoo.com,2
2300,2,Ijagila Mark .,tel:+2348100001300,ijagilamark@gmail.com,3
2302,2,Wullanga Alfred,tel:+2347032853473,wulangaalfred@gmail.com,4
694,2,Reuben Aidaticha.,tel:+2348088481006,aidaticha@gmail.com,5
2304,2,Reuben Aidaticha.,tel:+2348038213859,aidaticha@gmail.com,6
2305,2,Wullanga Alfred.,tel:+2348029864318,wullangaalfred@gmail.com,7
2303,2,Olawumi Monica Ajayi.,tel:+2348036173901,wumi.ajayi@yahoo.com,8
246,5,Charity Evans Nysalamke.,tel:+2348036581237,nyalamkecharityevans@yahoo.com,1
613,5,Hamza Yakubu Sade .,tel:+2348037716795,,2


In [487]:
# convert vertical to horizontal database. 
# will None in the cells cause us to send excessive number of warning SMS? 
first_admin_wide= first_admin_wide.pivot(index='siteid', columns='count')

In [489]:
first_admin_wide

Unnamed: 0_level_0,name,name,name,name,name,name,name,name,name,name,...,mail,mail,mail,mail,mail,mail,mail,mail,mail,mail
count,1,2,3,4,5,6,7,8,9,10,...,9,10,11,12,13,14,15,16,17,18
siteid,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2,Hauwa Zoakah .,Hauwa Zoakah.,Ijagila Mark .,Wullanga Alfred,Reuben Aidaticha.,Reuben Aidaticha.,Wullanga Alfred.,Olawumi Monica Ajayi.,,,...,,,,,,,,,,
5,Charity Evans Nysalamke.,Hamza Yakubu Sade .,Sama'ila Usman Maikan.,Yakubu Baba.,Ali Shehu Kobi .,Habu Abdulmalik Dauda.,Jackson Ladu Martins.,,,,...,,,,,,,,,,
8,Hassana Suleiman Jibrin.,Abdullahi Alhaji Madi.,"Amarachi, Clementina Chukwuma.",Aminu Usman Danzomo.,Daniel James .,Magret Ayuba.,,,,,...,,,,,,,,,,
16,Olufunmilayo Adepoju-adebambo.,Ronas Amos Amusa .,Suleiman Mamman.,Usman Baraya.,Ibrahim Inuwa Lano.,Ahmed Audu Saddana.,Selamawit Negash.,Rukayya Lawal.,,,...,,,,,,,,,,
17,MUSA MOHAMMED HADEJIA .,Saidu Umar Adamu.,Olatomiwa Olabisi.,Temidayo Esther Ajala.,,,,,,,...,,,,,,,,,,
18,Hauwa Usman .,Jane Gwani.,Saratu Aduwak.,Maryam Yusuf .,,,,,,,...,,,,,,,,,,
19,Ado Jibrin Sanda.,Ado Mustapha.,Murtala M Inuwa.,Murtala Sani Abdullahi.,Akilu Sani.,Sabo Wada.,Sabo Wada.,"Abolarin, Samuel Sesan.",Deborah Abi Nyako.,Dorcas Heinmen Gauji.,...,nyakoabi@yahoo.com,dgauji@yahoo.com,ffekura@gmail.com,williamnyako22@gmail.com,lekrunmon@yahoo.com,anyam@unicef.org,akilumfashi@gmail.com,ayodejiosunkentan@yahoo.com,mikekush70@yahoo.com,
20,Rabia Mohammed Sno .,Hamisu Idris K/bai.,Abdulmumin Lawan.,Ibrahim Maje Sayyadi.,Yusufahmadu Gangara.,Zayyana Isyaku Sule.,Abdulhadi Abdulkadir.,Ado Ibrahim Abdulrahman.,Musbahu Hamisu.,Sim Karla Sakyenu.,...,hmusbahu@yahoo.com,simkarla@live.com,,,,,,,,
21,Abdulmalik Muhammad Illo .,Abdulmalik Muhammad Illo.,Aliyu Galadima Libata.,Beatrice Kwere.,Shamsu Muhammed.,Abimbola Abosede Sobo.,Abisola Mary Atoyebi.,Aliyu Galadima Libata .,Sunday Norbert.,Suwaiba M Bello.,...,sundayknorbert@yahoo.com,muheebat18@gmail.com,,,,,,,,
33,Abdallah Ladan.,Buharismalami.,Muhammad Ali Hamza.,Nura Muazu.,Hassan Muhammad Galadanci.,Hassanmuhammadgaladanci.,Kamaru Muhammad.,Nura Shehu .,Sany Mohd Sokoto.,,...,,,,,,,,,,


In [490]:
# Create new column name
# and correct multiIndex
first_admin_wide.columns = ["sno" + str(first_admin_wide.columns[i][1]) + (first_admin_wide.columns[i][0]) for i in range(len(first_admin_wide.columns))]


In [491]:
first_admin_wide = first_admin_wide.reset_index()
first_admin_wide
# Note state_num does not exist, but is same as siteid

Unnamed: 0,siteid,sno1name,sno2name,sno3name,sno4name,sno5name,sno6name,sno7name,sno8name,sno9name,...,sno9mail,sno10mail,sno11mail,sno12mail,sno13mail,sno14mail,sno15mail,sno16mail,sno17mail,sno18mail
0,2,Hauwa Zoakah .,Hauwa Zoakah.,Ijagila Mark .,Wullanga Alfred,Reuben Aidaticha.,Reuben Aidaticha.,Wullanga Alfred.,Olawumi Monica Ajayi.,,...,,,,,,,,,,
1,5,Charity Evans Nysalamke.,Hamza Yakubu Sade .,Sama'ila Usman Maikan.,Yakubu Baba.,Ali Shehu Kobi .,Habu Abdulmalik Dauda.,Jackson Ladu Martins.,,,...,,,,,,,,,,
2,8,Hassana Suleiman Jibrin.,Abdullahi Alhaji Madi.,"Amarachi, Clementina Chukwuma.",Aminu Usman Danzomo.,Daniel James .,Magret Ayuba.,,,,...,,,,,,,,,,
3,16,Olufunmilayo Adepoju-adebambo.,Ronas Amos Amusa .,Suleiman Mamman.,Usman Baraya.,Ibrahim Inuwa Lano.,Ahmed Audu Saddana.,Selamawit Negash.,Rukayya Lawal.,,...,,,,,,,,,,
4,17,MUSA MOHAMMED HADEJIA .,Saidu Umar Adamu.,Olatomiwa Olabisi.,Temidayo Esther Ajala.,,,,,,...,,,,,,,,,,
5,18,Hauwa Usman .,Jane Gwani.,Saratu Aduwak.,Maryam Yusuf .,,,,,,...,,,,,,,,,,
6,19,Ado Jibrin Sanda.,Ado Mustapha.,Murtala M Inuwa.,Murtala Sani Abdullahi.,Akilu Sani.,Sabo Wada.,Sabo Wada.,"Abolarin, Samuel Sesan.",Deborah Abi Nyako.,...,nyakoabi@yahoo.com,dgauji@yahoo.com,ffekura@gmail.com,williamnyako22@gmail.com,lekrunmon@yahoo.com,anyam@unicef.org,akilumfashi@gmail.com,ayodejiosunkentan@yahoo.com,mikekush70@yahoo.com,
7,20,Rabia Mohammed Sno .,Hamisu Idris K/bai.,Abdulmumin Lawan.,Ibrahim Maje Sayyadi.,Yusufahmadu Gangara.,Zayyana Isyaku Sule.,Abdulhadi Abdulkadir.,Ado Ibrahim Abdulrahman.,Musbahu Hamisu.,...,hmusbahu@yahoo.com,simkarla@live.com,,,,,,,,
8,21,Abdulmalik Muhammad Illo .,Abdulmalik Muhammad Illo.,Aliyu Galadima Libata.,Beatrice Kwere.,Shamsu Muhammed.,Abimbola Abosede Sobo.,Abisola Mary Atoyebi.,Aliyu Galadima Libata .,Sunday Norbert.,...,sundayknorbert@yahoo.com,muheebat18@gmail.com,,,,,,,,
9,33,Abdallah Ladan.,Buharismalami.,Muhammad Ali Hamza.,Nura Muazu.,Hassan Muhammad Galadanci.,Hassanmuhammadgaladanci.,Kamaru Muhammad.,Nura Shehu .,Sany Mohd Sokoto.,...,,,,,,,,,,


In [296]:
# Prepared state level list of supervisors

In [492]:
# set index to be state_num - same as siteid for state
first_admin_wide = first_admin_wide.set_index('siteid')
# remove name of index
first_admin_wide.index.name = None


In [493]:
first_admin_wide

Unnamed: 0,sno1name,sno2name,sno3name,sno4name,sno5name,sno6name,sno7name,sno8name,sno9name,sno10name,...,sno9mail,sno10mail,sno11mail,sno12mail,sno13mail,sno14mail,sno15mail,sno16mail,sno17mail,sno18mail
2,Hauwa Zoakah .,Hauwa Zoakah.,Ijagila Mark .,Wullanga Alfred,Reuben Aidaticha.,Reuben Aidaticha.,Wullanga Alfred.,Olawumi Monica Ajayi.,,,...,,,,,,,,,,
5,Charity Evans Nysalamke.,Hamza Yakubu Sade .,Sama'ila Usman Maikan.,Yakubu Baba.,Ali Shehu Kobi .,Habu Abdulmalik Dauda.,Jackson Ladu Martins.,,,,...,,,,,,,,,,
8,Hassana Suleiman Jibrin.,Abdullahi Alhaji Madi.,"Amarachi, Clementina Chukwuma.",Aminu Usman Danzomo.,Daniel James .,Magret Ayuba.,,,,,...,,,,,,,,,,
16,Olufunmilayo Adepoju-adebambo.,Ronas Amos Amusa .,Suleiman Mamman.,Usman Baraya.,Ibrahim Inuwa Lano.,Ahmed Audu Saddana.,Selamawit Negash.,Rukayya Lawal.,,,...,,,,,,,,,,
17,MUSA MOHAMMED HADEJIA .,Saidu Umar Adamu.,Olatomiwa Olabisi.,Temidayo Esther Ajala.,,,,,,,...,,,,,,,,,,
18,Hauwa Usman .,Jane Gwani.,Saratu Aduwak.,Maryam Yusuf .,,,,,,,...,,,,,,,,,,
19,Ado Jibrin Sanda.,Ado Mustapha.,Murtala M Inuwa.,Murtala Sani Abdullahi.,Akilu Sani.,Sabo Wada.,Sabo Wada.,"Abolarin, Samuel Sesan.",Deborah Abi Nyako.,Dorcas Heinmen Gauji.,...,nyakoabi@yahoo.com,dgauji@yahoo.com,ffekura@gmail.com,williamnyako22@gmail.com,lekrunmon@yahoo.com,anyam@unicef.org,akilumfashi@gmail.com,ayodejiosunkentan@yahoo.com,mikekush70@yahoo.com,
20,Rabia Mohammed Sno .,Hamisu Idris K/bai.,Abdulmumin Lawan.,Ibrahim Maje Sayyadi.,Yusufahmadu Gangara.,Zayyana Isyaku Sule.,Abdulhadi Abdulkadir.,Ado Ibrahim Abdulrahman.,Musbahu Hamisu.,Sim Karla Sakyenu.,...,hmusbahu@yahoo.com,simkarla@live.com,,,,,,,,
21,Abdulmalik Muhammad Illo .,Abdulmalik Muhammad Illo.,Aliyu Galadima Libata.,Beatrice Kwere.,Shamsu Muhammed.,Abimbola Abosede Sobo.,Abisola Mary Atoyebi.,Aliyu Galadima Libata .,Sunday Norbert.,Suwaiba M Bello.,...,sundayknorbert@yahoo.com,muheebat18@gmail.com,,,,,,,,
33,Abdallah Ladan.,Buharismalami.,Muhammad Ali Hamza.,Nura Muazu.,Hassan Muhammad Galadanci.,Hassanmuhammadgaladanci.,Kamaru Muhammad.,Nura Shehu .,Sany Mohd Sokoto.,,...,,,,,,,,,,


In [496]:
# Create same list for LGA
# create db with only LGA Level supervision staff
second_admin_wide = supervision_df.query('siteid>=101').query('siteid<=3699')
#lga_df = lga_df[lga_df['siteid'] <= 3699]


In [497]:
second_admin_wide

Unnamed: 0,siteid,name,num,mail,count
1444,201,Edina Richard.,tel:+2348131816143,,1
2281,202,Monday Leasado.,tel:+2348038056048,,1
2282,202,Monday Leasado.,tel:+2348086625852,,2
2283,202,Monday Leasado.,tel:+2348065356507,,3
2277,203,Naaticha Waziri .,tel:+2348037675670,naatiwaziri69@gmail.com,1
1397,204,Rose Zidon .,tel:+2347065739356,,1
1432,204,Rose Mbamuno Zidon,tel:+2349086559594,,2
2275,205,Esther Danjuma.,tel:+2347030635580,,1
2276,205,Esther Danjuma.,tel:+2348025568649,,2
2274,206,Hannatu B Usman.,tel:+2348064811859,hannatubbu@gmail.com,1


In [430]:
# convert vertical to horizontal database. 
# will None in the cells cause us to send excessive number of warning SMS? 
second_admin_wide = second_admin_wide.pivot(index='siteid', columns='count')

In [431]:
second_admin_wide

Unnamed: 0_level_0,name,name,name,name,name,num,num,num,num,num,mail,mail,mail,mail,mail
count,1,2,3,4,5,1,2,3,4,5,1,2,3,4,5
siteid,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
201,Edina Richard.,,,,,tel:+2348131816143,,,,,,,,,
202,Monday Leasado.,Monday Leasado.,Monday Leasado.,,,tel:+2348038056048,tel:+2348086625852,tel:+2348065356507,,,,,,,
203,Naaticha Waziri .,,,,,tel:+2348037675670,,,,,naatiwaziri69@gmail.com,,,,
204,Rose Zidon .,Rose Mbamuno Zidon,,,,tel:+2347065739356,tel:+2349086559594,,,,,,,,
205,Esther Danjuma.,Esther Danjuma.,,,,tel:+2347030635580,tel:+2348025568649,,,,,,,,
206,Hannatu B Usman.,,,,,tel:+2348064811859,,,,,hannatubbu@gmail.com,,,,
207,Aishatu Samaila.,Aishatu Samaila.,,,,tel:+2348022645708,tel:+2348034154949,,,,,,,,
208,Wubbewasu David .,Apollos Elkanah.,,,,tel:+2348174118980,tel:+2348029839844,,,,,apollospola@gmail,,,
209,Patience Zomti Douglas.,,,,,tel:+2348065754427,,,,,,,,,
210,Adama Abubakar.,Ularamai Raphael .,,,,tel:+2349078543500,tel:+2347087576976,,,,,,,,


In [432]:
# Rename columns and correct multiIndex
second_admin_wide.columns = ["lga" + str(second_admin_wide.columns[i][1]) + (second_admin_wide.columns[i][0]) for i in range(len(second_admin_wide.columns))]

In [304]:
second_admin_wide = second_admin_wide.reset_index()

In [169]:
second_admin_wide

Unnamed: 0,siteid,lga1name,lga2name,lga3name,lga4name,lga5name,lga1urn,lga2urn,lga3urn,lga4urn,lga5urn,lga1mail,lga2mail,lga3mail,lga4mail,lga5mail
0,201,Edina Richard.,,,,,tel:+2348131816143,,,,,,,,,
1,202,Monday Leasado.,Monday Leasado.,Monday Leasado.,,,tel:+2348038056048,tel:+2348086625852,tel:+2348065356507,,,,,,,
2,203,Naaticha Waziri .,,,,,tel:+2348037675670,,,,,naatiwaziri69@gmail.com,,,,
3,204,Rose Mbamuno Zidon,Rose Zidon .,,,,tel:+2349086559594,tel:+2347065739356,,,,,,,,
4,205,Esther Danjuma.,Esther Danjuma.,,,,tel:+2347030635580,tel:+2348025568649,,,,,,,,
5,206,Hannatu B Usman.,,,,,tel:+2348064811859,,,,,hannatubbu@gmail.com,,,,
6,207,Aishatu Samaila.,Aishatu Samaila.,,,,tel:+2348034154949,tel:+2348022645708,,,,,,,,
7,208,Wubbewasu David .,Apollos Elkanah.,,,,tel:+2348174118980,tel:+2348029839844,,,,,apollospola@gmail,,,
8,209,Patience Zomti Douglas.,,,,,tel:+2348065754427,,,,,,,,,
9,210,Adama Abubakar.,Ularamai Raphael .,,,,tel:+2349078543500,tel:+2347087576976,,,,,,,,


In [305]:
# Change None in cells to blank

#for cell in lga:
#   lga[cell].fillna(value='', inplace=True)

# lga = lga.fillna
# changes df format and leaves None in place. 

In [306]:
second_admin_wide['state_num'] = 0

In [307]:
# Add state_num to LGA df
second_admin_wide['state_num'] = np.where(second_admin_wide['siteid'] <999, second_admin_wide['siteid'].astype(str).str[:1], second_admin_wide['state_num'])
second_admin_wide['state_num'] = np.where(second_admin_wide['siteid'] <3699, second_admin_wide['siteid'].astype(str).str[:2], second_admin_wide['state_num'])

# Do not forget to cast state_num to INT or merge will not work
lga['state_num'] = lga['state_num'].astype(int)
lga['state_num'].value_counts()

8     21
2     21
35    17
21    15
20    15
17    12
33     9
36     7
19     6
16     3
5      3
18     2
Name: state_num, dtype: int64

In [308]:
second_admin_wide.dtypes

siteid        int64
lga1name     object
lga2name     object
lga3name     object
lga4name     object
lga5name     object
lga1urn      object
lga2urn      object
lga3urn      object
lga4urn      object
lga5urn      object
lga1mail     object
lga2mail     object
lga3mail     object
lga4mail     object
lga5mail     object
state_num     int64
dtype: object

In [409]:
# Prepared LGA level supervision list


In [309]:
first_admin_wide

Unnamed: 0,sno1name,sno2name,sno3name,sno4name,sno5name,sno6name,sno7name,sno8name,sno9name,sno10name,...,sno9mail,sno10mail,sno11mail,sno12mail,sno13mail,sno14mail,sno15mail,sno16mail,sno17mail,sno18mail
2,Wullanga Alfred.,Reuben Aidaticha.,Olawumi Monica Ajayi.,Wullanga Alfred,Hauwa Zoakah.,Ijagila Mark .,Hauwa Zoakah .,Reuben Aidaticha.,,,...,,,,,,,,,,
5,Jackson Ladu Martins.,Habu Abdulmalik Dauda.,Charity Evans Nysalamke.,Yakubu Baba.,Ali Shehu Kobi .,Hamza Yakubu Sade .,Sama'ila Usman Maikan.,,,,...,,,,,,,,,,
8,Magret Ayuba.,Hassana Suleiman Jibrin.,"Amarachi, Clementina Chukwuma.",Daniel James .,Abdullahi Alhaji Madi.,Aminu Usman Danzomo.,,,,,...,,,,,,,,,,
16,Usman Baraya.,Selamawit Negash.,Rukayya Lawal.,Ronas Amos Amusa .,Ahmed Audu Saddana.,Olufunmilayo Adepoju-adebambo.,Ibrahim Inuwa Lano.,Suleiman Mamman.,,,...,,,,,,,,,,
17,MUSA MOHAMMED HADEJIA .,Temidayo Esther Ajala.,Olatomiwa Olabisi.,Saidu Umar Adamu.,,,,,,,...,,,,,,,,,,
18,Saratu Aduwak.,Maryam Yusuf .,Jane Gwani.,Hauwa Usman .,,,,,,,...,,,,,,,,,,
19,Nyako William Azi.,Ado Jibrin Sanda.,Abigail Ishaya Nyam .,Akilu Sani.,Deborah Abi Nyako.,Sabo Wada.,Sabo Wada.,Ado Mustapha.,Murtala M Inuwa.,Fumen Fuset Emmanuel.,...,murtalamuhd33@gmail.com,ffekura@gmail.com,dgauji@yahoo.com,msaniabdullahi@gmail.com,,abolarin.samuelsesan@gmail.com,mikekush70@yahoo.com,ayodejiosunkentan@yahoo.com,lekrunmon@yahoo.com,akilumfashi@gmail.com
20,Musbahu Hamisu.,Abdulhadi Abdulkadir.,Abdulmumin Lawan.,Zayyana Isyaku Sule.,Sim Karla Sakyenu.,Rabia Mohammed Sno .,Ibrahim Maje Sayyadi.,Hamisu Idris K/bai.,Yusufahmadu Gangara.,Ado Ibrahim Abdulrahman.,...,yusufahmadu5@gmail.com,ibrado30@gmail.com,,,,,,,,
21,Abdulmalik Muhammad Illo .,Abisola Mary Atoyebi.,Sunday Norbert.,Suwaiba M Bello.,Aliyu Galadima Libata .,Beatrice Kwere.,Abdulmalik Muhammad Illo.,Aliyu Galadima Libata.,Shamsu Muhammed.,Abimbola Abosede Sobo.,...,,soboabimbola@yahoo.com,,,,,,,,
33,Hassan Muhammad Galadanci.,Hassanmuhammadgaladanci.,Nura Shehu .,Buharismalami.,Nura Muazu.,Abdallah Ladan.,Sany Mohd Sokoto.,Kamaru Muhammad.,Muhammad Ali Hamza.,,...,ahamzask@yahoo.com,,,,,,,,,


In [310]:
first_admin_wide.dtypes

sno1name     object
sno2name     object
sno3name     object
sno4name     object
sno5name     object
sno6name     object
sno7name     object
sno8name     object
sno9name     object
sno10name    object
sno11name    object
sno12name    object
sno13name    object
sno14name    object
sno15name    object
sno16name    object
sno17name    object
sno18name    object
sno1urn      object
sno2urn      object
sno3urn      object
sno4urn      object
sno5urn      object
sno6urn      object
sno7urn      object
sno8urn      object
sno9urn      object
sno10urn     object
sno11urn     object
sno12urn     object
sno13urn     object
sno14urn     object
sno15urn     object
sno16urn     object
sno17urn     object
sno18urn     object
sno1mail     object
sno2mail     object
sno3mail     object
sno4mail     object
sno5mail     object
sno6mail     object
sno7mail     object
sno8mail     object
sno9mail     object
sno10mail    object
sno11mail    object
sno12mail    object
sno13mail    object
sno14mail    object


In [311]:
# merge state df to lga df

# imam_sup = pd.merge(lga,state, on='state_num', suffixes=['_lga','_st'])

# imam_sup = pd.merge(lga, state, on='state_num')


# imam_sup = lga.join(state, how='inner')

# imam_sup = lga.merge(state)

# imam_sup = lga.merge(state, on=['state_num'])

# imam_sup = lga.join(state, on=['state_num'])

first_second = pd.merge(lga, state, left_on='state_num', right_index=True, how='left', sort=False)

# imam_sup = lga.join(state, on='state_num')

first_second

Unnamed: 0,siteid,lga1name,lga2name,lga3name,lga4name,lga5name,lga1urn,lga2urn,lga3urn,lga4urn,...,sno9mail,sno10mail,sno11mail,sno12mail,sno13mail,sno14mail,sno15mail,sno16mail,sno17mail,sno18mail
0,201,Edina Richard.,,,,,tel:+2348131816143,,,,...,,,,,,,,,,
1,202,Monday Leasado.,Monday Leasado.,Monday Leasado.,,,tel:+2348038056048,tel:+2348086625852,tel:+2348065356507,,...,,,,,,,,,,
2,203,Naaticha Waziri .,,,,,tel:+2348037675670,,,,...,,,,,,,,,,
3,204,Rose Zidon .,Rose Mbamuno Zidon,,,,tel:+2347065739356,tel:+2349086559594,,,...,,,,,,,,,,
4,205,Esther Danjuma.,Esther Danjuma.,,,,tel:+2347030635580,tel:+2348025568649,,,...,,,,,,,,,,
5,206,Hannatu B Usman.,,,,,tel:+2348064811859,,,,...,,,,,,,,,,
6,207,Aishatu Samaila.,Aishatu Samaila.,,,,tel:+2348022645708,tel:+2348034154949,,,...,,,,,,,,,,
7,208,Wubbewasu David .,Apollos Elkanah.,,,,tel:+2348174118980,tel:+2348029839844,,,...,,,,,,,,,,
8,209,Patience Zomti Douglas.,,,,,tel:+2348065754427,,,,...,,,,,,,,,,
9,210,Adama Abubakar.,Ularamai Raphael .,,,,tel:+2349078543500,tel:+2347087576976,,,...,,,,,,,,,,


In [313]:
# is Statename# null ? 
first_second.sno1name.value_counts()

Wullanga Alfred.              21
Magret Ayuba.                 21
Adrienne Colwell Klein.       17
Musbahu Hamisu.               15
Abdulmalik Muhammad Illo .    15
MUSA  MOHAMMED  HADEJIA .     12
Hassan Muhammad Galadanci.     9
Bello Umar Jabaka.             7
Nyako William Azi.             6
Usman Baraya.                  3
Jackson Ladu Martins.          3
Saratu Aduwak.                 2
Name: sno1name, dtype: int64

In [314]:
# # Test Export as Excel 
# filename = "IMAM_state_lga_test.xlsx"
# writer = pd.ExcelWriter(filename, engine='xlsxwriter')
# imam_sup.to_excel(writer,'Sheet1')
# writer.save()
# writer.close()

In [315]:
# SiteID is now NaN for state. 
# can convert this to SiteID later.
first_second['lga_num'] = first_second['siteid'] 

# imam_sup.state_num.value_counts()

In [325]:
# merge to implementation staff df
site = df2.query('siteid > 3699')
site.siteid.value_counts()

3512110004    8
3503210022    8
3603110029    7
2106110004    7
2106110020    7
3511110005    7
3513110005    6
821110057     6
1714110008    6
3305110027    6
3306110017    6
1703110012    6
2118110015    6
806110012     6
1727110010    5
2034110018    5
821110052     5
2011120005    5
1702110010    5
2118110014    5
1717110001    5
1940110023    5
2109210028    5
3301110011    5
2011110029    5
3321110020    5
2104110014    5
2112110009    5
821110048     5
1719110024    5
             ..
816110037     1
2003210012    1
2104110008    1
2104110006    1
3508110020    1
2004110035    1
811110016     1
2108110005    1
821220023     1
815110008     1
207110015     1
808110002     1
3321310007    1
207110089     1
2026110022    1
43100110      1
2034110016    1
1805110047    1
813110044     1
813110040     1
813110034     1
2034110030    1
1805110031    1
3502110034    1
823210001     1
1805110023    1
3514110005    1
3501110014    1
2010110006    1
3501110031    1
Name: siteid, dtype: int

In [320]:
# Change the order (the index) of the columns
columnsTitles = ['name',
                 'num',
                 'siteid', 
                 'lga_num', 
                 'state_num' 
                 ]

site = site.reindex(columns=columnsTitles)

In [327]:
site

Unnamed: 0,siteid,name,urn,mail,post,type,first_seen,last_seen,lga_num,state_num,post_rank
1140,351707,Baba gaji lawan.,tel:+2348065992888,,,,2017-01-14 14:35:48.137461,2017-02-13 14:32:11.069752,,,
1676,17002312,Lawan Hassan .,tel:+2347080867122,,,,2016-09-13 15:55:11.776705,2017-01-16 09:01:49.528369,,,
1675,43100110,Jamil Mohammad.,tel:+2347088790682,,,,2016-11-03 21:28:34.716529,2017-01-16 09:01:50.017487,,,
718,101110001,Assay T Bulti.,tel:+2348035351744,,In Charge Hospital/PHC,OTP,2016-08-22 08:31:51.601275,2017-05-03 09:07:20.055704,101.0,1.0,4.0
1371,101110001,Elfriede Mamie Kormawa.,tel:+2347064019648,ekormawa@unicef.org,Doctor,OTP,2016-06-24 08:04:14.291228,2017-05-02 09:57:59.441663,101.0,1.0,5.0
1106,101110001,Aisha Kaka Bello.,tel:+2348035865826,bello.a.kaka@gmail.com,Technical Assistance,OTP,2016-10-15 11:03:06.595684,2017-01-16 09:01:50.505299,101.0,1.0,10.0
2089,201110007,Assurance David.,tel:+2349021245510,,Community Health Officer,OTP,2016-10-25 09:05:47.222224,2017-05-08 08:17:29.557400,201.0,2.0,8.0
662,201110007,Gracegidado.,tel:+2348134107585,,Community Health Officer,OTP,2016-10-25 09:10:31.153335,2017-05-08 08:17:29.665633,201.0,2.0,8.0
1671,201110007,Phcc Dong.,tel:+2349028026299,,,,2016-11-05 11:12:25.103702,2017-01-18 20:05:18.505335,201.0,2.0,
1161,201110009,Jasper Gift Jonathan.,tel:+2348160018409,,Community Health Officer,OTP,2016-10-25 09:07:25.708751,2017-05-08 08:17:29.878409,201.0,2.0,8.0


In [95]:
# del imam_supervision

In [328]:
# merge supervision df to implementation df

# imam_supervision = imam_imp.join(imam_sup, suffixes=('_x', '_y'), on='lga_num')

imam_supervision = pd.merge(first_second, site, left_on='lga_num', right_on='lga_num', how='left', 
                            suffixes=('_x', '_y'), sort=False)


In [329]:
# Name, Phone, SiteID, SiteName, State, LGA, LGA name phone email, State name phone email
imam_supervision

Unnamed: 0,siteid_x,name,urn,mail,post,type,first_seen,last_seen,lga_num,state_num_x,...,sno9mail,sno10mail,sno11mail,sno12mail,sno13mail,sno14mail,sno15mail,sno16mail,sno17mail,sno18mail
0,351707,Baba gaji lawan.,tel:+2348065992888,,,,2017-01-14 14:35:48.137461,2017-02-13 14:32:11.069752,,,...,,,,,,,,,,
1,17002312,Lawan Hassan .,tel:+2347080867122,,,,2016-09-13 15:55:11.776705,2017-01-16 09:01:49.528369,,,...,,,,,,,,,,
2,43100110,Jamil Mohammad.,tel:+2347088790682,,,,2016-11-03 21:28:34.716529,2017-01-16 09:01:50.017487,,,...,,,,,,,,,,
3,101110001,Assay T Bulti.,tel:+2348035351744,,In Charge Hospital/PHC,OTP,2016-08-22 08:31:51.601275,2017-05-03 09:07:20.055704,101.0,1.0,...,,,,,,,,,,
4,101110001,Elfriede Mamie Kormawa.,tel:+2347064019648,ekormawa@unicef.org,Doctor,OTP,2016-06-24 08:04:14.291228,2017-05-02 09:57:59.441663,101.0,1.0,...,,,,,,,,,,
5,101110001,Aisha Kaka Bello.,tel:+2348035865826,bello.a.kaka@gmail.com,Technical Assistance,OTP,2016-10-15 11:03:06.595684,2017-01-16 09:01:50.505299,101.0,1.0,...,,,,,,,,,,
6,201110007,Assurance David.,tel:+2349021245510,,Community Health Officer,OTP,2016-10-25 09:05:47.222224,2017-05-08 08:17:29.557400,201.0,2.0,...,,,,,,,,,,
7,201110007,Gracegidado.,tel:+2348134107585,,Community Health Officer,OTP,2016-10-25 09:10:31.153335,2017-05-08 08:17:29.665633,201.0,2.0,...,,,,,,,,,,
8,201110007,Phcc Dong.,tel:+2349028026299,,,,2016-11-05 11:12:25.103702,2017-01-18 20:05:18.505335,201.0,2.0,...,,,,,,,,,,
9,201110009,Jasper Gift Jonathan.,tel:+2348160018409,,Community Health Officer,OTP,2016-10-25 09:07:25.708751,2017-05-08 08:17:29.878409,201.0,2.0,...,,,,,,,,,,


In [341]:
# Test for correct merge
imam_supervision.sno1num.value_counts()
# urn used instead of num

tel:+2348130221420    494
tel:+2348120268361    412
tel:+2347039678456    271
tel:+2348029864318    271
tel:+2348060647427    258
tel:+2348065921417    182
tel:+2348069015170    148
tel:+2348105261540    124
tel:+2348060751100     86
tel:+2348035350951     46
tel:+2348035599416     37
tel:+2349077197415     32
Name: sno1urn, dtype: int64

In [343]:
imam_supervision.lga1num.value_counts()

tel:+2348027411737    110
tel:+2348103514343     73
tel:+2347085888487     56
tel:+2348029437389     45
tel:+2348030636860     40
tel:+2348038537117     38
tel:+2348036408362     35
tel:+2348098271887     34
tel:+2347030100575     33
tel:+2348031914291     33
tel:+2347039632824     33
tel:+2348063318743     31
tel:+2348065871088     30
tel:+2347085656081     30
tel:+2349082509065     29
tel:+2348091841242     29
tel:+2348032801270     28
tel:+2348060802656     26
tel:+2348147962972     26
tel:+2349071833453     25
tel:+2348029493607     25
tel:+2348085380707     24
tel:+2348062941060     23
tel:+2347067974171     22
tel:+2348092090642     22
tel:+2348065279440     22
tel:+2347039077472     21
tel:+2349023862306     21
tel:+2348030842504     21
tel:+2348126471046     20
                     ... 
tel:+2348130521995     12
tel:+2347038454253     12
tel:+2347036119732     12
tel:+2348069393239     11
tel:+2347064427428     11
tel:+2348065754427     11
tel:+2348165916091     11
tel:+2348061

In [None]:
# Check for blanks instead of None in cells

# for cell in imam_supervision:
#    state[cell].fillna(value='', inplace=True)

In [None]:
# Ensure that we have state and LGA level included individually and merged to site level
# Append

In [344]:
#convert to xls
# and/or export as JSON
# Test Export as Excel 
filename = "IMAM_supervision.xlsx"
writer = pd.ExcelWriter(filename, engine='xlsxwriter')
imam_sup.to_excel(writer,'Sheet1')
writer.save()
writer.close()

In [90]:
imam_supervision['sitename'] = imam_supervision['siteid_x'].map(lambda x: Site.objects.get(siteid=x).sitename.strip() if Site.objects.filter(siteid=x) else "")

Unnamed: 0,name,urn,siteid_x,mail,lga_num,state_num_x,siteid_y,lganame1,lganame2,lganame3,...,snomail10,snomail11,snomail12,snomail13,snomail14,snomail15,snomail16,snomail17,snomail18,sitename
0,Munirat Makama.,tel:+2348165422728,2003110030,,2003.0,20.0,2003.0,Assama'u Tukur.,Abdulkadir Muhammad Yasore.,,...,zayyanais@gmailcom,,,,,,,,,KANDAWA
1,Kwatam Dandy.,tel:+2348063455263,821110032,,821.0,8.0,821.0,Hadiza Shettima.,,,...,,,,,,,,,,CBN IDP Camp
2,Amina Musa T.,tel:+2348160109849,811110033,,811.0,8.0,811.0,Mairo musa.,,,...,,,,,,,,,,PHC Gwoza
3,HARUNA ABDULLAHI.,tel:+2348021010530,805110012,,805.0,8.0,805.0,Aishatu Abubakar.,Aishatu Abubakar.,,...,,,,,,,,,,GUR
4,Halima Muhammed.,tel:+2349076655644,821110031,,821.0,8.0,821.0,Hadiza Shettima.,,,...,,,,,,,,,,Teachers Village IDP Camp


In [None]:
imam_supervision.head()

In [124]:
Site.objects.all()

<QuerySet [<Site: Site Bagega PHC>, <Site: Site Kasumka Comm Disp>, <Site: Site Kawaye Disp>, <Site: Site Makakari Disp>, <Site: Site Tungar Daji Disp (Anka)>, <Site: Site Tungar Kudaku >, <Site: Site Barayar Zaki Primary HC>, <Site: Site Bardi Disp>, <Site: Site Dutsin Dan Ajiya Disp>, <Site: Site Rafin Gero PHC>, <Site: Site Anka WCW Clinic>, <Site: Site Anka Psychiatric Hosp.>, <Site: Site Gargam Dispensary>, <Site: Site Galadunci Desp.>, <Site: Site inwala Disp>, <Site: Site Kadadabba Disp>, <Site: Site Abare Disp>, <Site: Site Anka GH>, <Site: Site Anka orphans and less previlaged Clinic>, <Site: Site Dareta Dispensary>, '...(remaining elements truncated)...']>

In [None]:
# are any SiteIDs NaN ?

# print df.siteid.str.isdigit()
# df['siteid'].str.contains('^\d+$')

# SiteIDs in PostGres are BigInt thus strings are not allowed. 

# In load contact data for the SiteID, 
# we strip all following text after 9 or 10 digit number
# we convert all letter Os to zeros
# we delete all entries that have text before the siteID. 

In [None]:
# are any SiteIDs NaN ?
df.query('siteid!=siteid')

In [None]:
df2['siteid'] = pd.to_numeric(df2['siteid'], errors='coerce')
# No change

# Remove NaN from siteid

# If you don't specify a variable this will drop any rows including a NaN
df2 = df2.dropna(subset=['siteid'])
# No change

df2 = df2.query('siteid<3699990999')
# No change

In [None]:
df2['siteid'] = df2['siteid'].astype(int)
# No change

In [None]:
# What is the type of siteid
print type(df2['siteid'])
# This does not tell us anything about the variable

In [None]:
df2.dtypes

In [None]:
# do any SiteIDs include the letter o instead of number zero? 
foo = pd.DataFrame({'a' : [1,2,3,4], 'b' : ['hi', 'ooo', 'fat', 'cat']})
foo[foo['b'].str.contains('ooo')]  

# Regex & Data Cleaning
# https://trendct.org/2016/08/05/real-world-data-cleanup-with-python-and-pandas/

# df[df['siteid'].str.contains('ooo')]

# can only use .str accessor with string
#.str.contains('\D')

In [None]:
# To replace
# data['result'].replace(regex=True,inplace=True,to_replace=r'\D',value=r'')


In [None]:
# How many registrations in supervision ? 
len(supervision_df['siteid'])

# Many supervision staff did not record their post - should recode them all as supervisors
supervision_df['post'].str.upper().value_counts()
# str.upper() does not make permanent change. 

# I think there was a data entry error with Post because most people entered their post correctly
# also there should not be any HC, HW in the supervision cadres

In [None]:
pd.unique(state_df.name.ravel())
# Ravel - Return the flattened underlying data as an ndarray