In [23]:
# Create IMAM supervision data to upload into RapidPro

# Identify all supervision (State and LGA) and put in rows by site (name, phone, email)
# Merge State and LGA supervision into complete listing of personnel
# export as .xlsx file

# This is still very excel way of thinking. 
# We should just create the table in json and import to RapidPro

In [436]:
# To show plots in the notebook
%matplotlib inline  

import pandas as pd
import numpy as np
import pandas_highcharts.core
from sqlalchemy import create_engine
import psycopg2
import matplotlib.pyplot 

import os

os.environ.setdefault("DJANGO_SETTINGS_MODULE", "IMAM.settings")

from django.conf import settings
from home.management.commands.load_data import assign_state_lga_num, rename_cols, generic_cleaning, merge_in_and_outpatients, add_program_reports_from_supervision

import django
django.setup()

from home.models import First_admin, Second_admin, Site, Registration

In [437]:
# For exporting excel files using XlsxWriter
from pandas import ExcelWriter
import xlsxwriter

In [438]:
# Suppress scientific notation
# pd.options.display.float_format = '{:20,.0f}'.format

In [439]:
engine = create_engine(
    'postgresql://{USER}:{PASSWORD}@{HOST}:{PORT}/{NAME}'.format(**settings.DATABASES['default']))
df = pd.read_sql_query("select * from registration;", con=engine)

In [440]:
# If there is no data in PostGres
# df = pd.ExcelFile('/home/robert/Downloads/reg.xlsx').parse('Contacts')

In [441]:
df.head()

Unnamed: 0,contact_uuid,urn,name,groups,siteid,type,first_seen,last_seen,post,mail,lga_num,state_num
0,0ca2a249-d30c-4374-b10e-6e5fb7fb9fab,tel:+2348036519538,Murtala M Inuwa.,,19,,2016-11-02 09:52:21.325302,2017-05-01 07:12:31.695006,Coordinator,murtalamuhd33@gmail.com,,19
1,7903e497-8bfe-47a5-bfd6-78584b3cc09f,tel:+2348165422728,Munirat Makama.,,2003110030,OTP,2016-10-31 10:54:37.155412,2017-04-24 08:10:10.429518,Nurse/Midwife,,2003.0,20
2,982228b2-1c75-4462-93e8-659d4f32caaa,tel:+2348098248879,Fumen Fuset Emmanuel.,,19,,2016-10-31 10:14:32.202894,2017-05-01 07:12:30.564476,Database Manager,ffekura@gmail.com,,19
3,472dea24-f143-4e35-a542-a6f01996ec12,tel:+2348063455263,Kwatam Dandy.,,821110032,OTP,2016-10-18 10:13:17.924586,2017-05-01 07:05:21.348423,Community Health Officer,,821.0,8
4,ed674a59-ff07-408a-88c5-7a4f0967f5b4,tel:+2348160109849,Amina Musa T.,,811110033,OTP,2017-04-28 14:28:51.700810,2017-05-02 15:19:18.310892,Community Health Officer,,811.0,8


In [442]:
# run rename columns function
rename_cols(df)

Unnamed: 0,contact_uuid,urn,name,groups,siteid,type,first_seen,last_seen,post,mail,lga_num,state_num
0,0ca2a249-d30c-4374-b10e-6e5fb7fb9fab,tel:+2348036519538,Murtala M Inuwa.,,19,,2016-11-02 09:52:21.325302,2017-05-01 07:12:31.695006,Coordinator,murtalamuhd33@gmail.com,,19
1,7903e497-8bfe-47a5-bfd6-78584b3cc09f,tel:+2348165422728,Munirat Makama.,,2003110030,OTP,2016-10-31 10:54:37.155412,2017-04-24 08:10:10.429518,Nurse/Midwife,,2003,20
2,982228b2-1c75-4462-93e8-659d4f32caaa,tel:+2348098248879,Fumen Fuset Emmanuel.,,19,,2016-10-31 10:14:32.202894,2017-05-01 07:12:30.564476,Database Manager,ffekura@gmail.com,,19
3,472dea24-f143-4e35-a542-a6f01996ec12,tel:+2348063455263,Kwatam Dandy.,,821110032,OTP,2016-10-18 10:13:17.924586,2017-05-01 07:05:21.348423,Community Health Officer,,821,8
4,ed674a59-ff07-408a-88c5-7a4f0967f5b4,tel:+2348160109849,Amina Musa T.,,811110033,OTP,2017-04-28 14:28:51.700810,2017-05-02 15:19:18.310892,Community Health Officer,,811,8
5,3b01093a-271c-413e-a10a-8e5423de7448,tel:+2348021010530,HARUNA ABDULLAHI.,,805110012,OTP,2017-05-02 11:48:29.502721,2017-05-03 15:51:51.870141,Volunteer,,805,8
6,9048eab9-50f3-4d57-8b11-52c9860a7d95,tel:+2349076655644,Halima Muhammed.,,821110031,OTP,2016-10-18 10:13:43.931902,2017-05-01 07:05:19.854107,Community Health Officer,,821,8
7,30e48f2a-1dd8-4d49-9945-da17cfa114a9,tel:+2348126752275,Umar Abubakar.,,211110047,OTP,2016-10-26 09:10:39.762480,2017-05-01 06:58:56.645605,In Charge Hospital/PHC,,211,2
8,fa08f0c2-80d8-4650-acbf-e46d98225b49,tel:+2348080488082,Umar Abubakar.,,211110047,OTP,2016-10-26 10:48:14.152486,2017-05-01 06:58:56.184455,In Charge Hospital/PHC,,211,2
9,3130d94b-f3bd-4412-b7c9-acd8b5598e81,tel:+2348087760645,IBRAHIM LAMARA.,,504110010,OTP,2017-02-22 14:12:33.933315,2017-04-12 14:09:33.640518,In Charge Hospital/PHC,,504,5


In [443]:
# rename SiteID to siteid
# df=df.rename(columns = {'SiteID':'siteid'})

# Change the order (the index) of the columns
columnsTitles = ['siteid',                 
                 'name',
                 'urn',
                 'mail',
                 'post',
                 'type',
                 'groups',
                 'first_seen',
                 'last_seen',
                 'lga_num', 
                 'state_num' 
                 ]

df2 = df.reindex(columns=columnsTitles)
df2

Unnamed: 0,siteid,name,urn,mail,post,type,groups,first_seen,last_seen,lga_num,state_num
0,19,Murtala M Inuwa.,tel:+2348036519538,murtalamuhd33@gmail.com,Coordinator,,,2016-11-02 09:52:21.325302,2017-05-01 07:12:31.695006,,19
1,2003110030,Munirat Makama.,tel:+2348165422728,,Nurse/Midwife,OTP,,2016-10-31 10:54:37.155412,2017-04-24 08:10:10.429518,2003,20
2,19,Fumen Fuset Emmanuel.,tel:+2348098248879,ffekura@gmail.com,Database Manager,,,2016-10-31 10:14:32.202894,2017-05-01 07:12:30.564476,,19
3,821110032,Kwatam Dandy.,tel:+2348063455263,,Community Health Officer,OTP,,2016-10-18 10:13:17.924586,2017-05-01 07:05:21.348423,821,8
4,811110033,Amina Musa T.,tel:+2348160109849,,Community Health Officer,OTP,,2017-04-28 14:28:51.700810,2017-05-02 15:19:18.310892,811,8
5,805110012,HARUNA ABDULLAHI.,tel:+2348021010530,,Volunteer,OTP,,2017-05-02 11:48:29.502721,2017-05-03 15:51:51.870141,805,8
6,821110031,Halima Muhammed.,tel:+2349076655644,,Community Health Officer,OTP,,2016-10-18 10:13:43.931902,2017-05-01 07:05:19.854107,821,8
7,211110047,Umar Abubakar.,tel:+2348126752275,,In Charge Hospital/PHC,OTP,,2016-10-26 09:10:39.762480,2017-05-01 06:58:56.645605,211,2
8,211110047,Umar Abubakar.,tel:+2348080488082,,In Charge Hospital/PHC,OTP,,2016-10-26 10:48:14.152486,2017-05-01 06:58:56.184455,211,2
9,504110010,IBRAHIM LAMARA.,tel:+2348087760645,,In Charge Hospital/PHC,OTP,,2017-02-22 14:12:33.933315,2017-04-12 14:09:33.640518,504,5


In [444]:
# Assign state lga num is not necessary as it is done already in the import_contacts.py
# df2 = assign_state_lga_num(df2)
# error with LGA Num in load_data.py

In [445]:
# Create back-up
restore = df2

In [446]:
# Restore
df2 = restore

In [447]:
df2['siteid'].count()

2763

In [448]:
df2 = df2.query('siteid>1')
# Remove national level registrations

In [449]:
df2 = df2.query('siteid!=99')
# siteid == 99 is a number to use for missing or no longer involved with IMAM program
# Remove erroneous registrations

In [450]:
df2.state_num.value_counts()

 8    530
35    455
21    318
 2    311
20    309
17    211
33    179
36    142
19    118
18     65
 5     56
16     50
 1      3
Name: state_num, dtype: int64

In [451]:
df2.query('state_num==1')
# These are test registrations below 

Unnamed: 0,siteid,name,urn,mail,post,type,groups,first_seen,last_seen,lga_num,state_num
917,101110001,Assay T Bulti.,tel:+2348035351744,,In Charge Hospital/PHC,OTP,,2016-08-22 08:31:51.601275,2017-05-03 09:07:20.055704,101,1
1382,101110001,Aisha Kaka Bello.,tel:+2348035865826,bello.a.kaka@gmail.com,Technical Assistance,OTP,,2016-10-15 11:03:06.595684,2017-01-16 09:01:50.505299,101,1
1717,101110001,Elfriede Mamie Kormawa.,tel:+2347064019648,ekormawa@unicef.org,Doctor,OTP,,2016-06-24 08:04:14.291228,2017-05-02 09:57:59.441663,101,1


In [452]:
# create db with only supervision staff
supervision_df = df2[df2['siteid'] <= 3699]
# supervision siteids range from 1 to 3699

In [453]:
# Sort data
supervision_df = supervision_df.sort_values(by='siteid')
supervision_df

Unnamed: 0,siteid,name,urn,mail,post,type,groups,first_seen,last_seen,lga_num,state_num
852,2,Reuben Aidaticha.,tel:+2348088481006,aidaticha@gmail.com,Database Manager,,,2016-10-24 10:38:14.041832,2017-05-01 07:12:32.919732,,2
2369,2,Hauwa Zoakah.,tel:+2347035678763,hauwabata@yahoo.com,Coordinator,,,2016-06-01 12:17:48.149140,2017-05-01 07:12:33.249846,,2
2365,2,Wullanga Alfred,tel:+2347032853473,wulangaalfred@gmail.com,Coordinator,,,2016-10-26 09:06:02.356386,2017-05-02 11:46:46.575273,,2
2345,2,Reuben Aidaticha.,tel:+2348038213859,aidaticha@gmail.com,Database Manager,,,2016-10-24 09:35:36.024432,2017-05-01 07:12:33.466924,,2
2348,2,Wullanga Alfred.,tel:+2348029864318,wullangaalfred@gmail.com,Database Manager,,,2016-10-24 09:35:46.229729,2017-05-01 07:12:33.358664,,2
854,2,Hauwa Zoakah .,tel:+2348020938959,hauwabata@yahoo.com,Coordinator,,,2016-10-26 09:04:19.462503,2017-05-01 07:12:32.809703,,2
2180,2,Olawumi Monica Ajayi.,tel:+2348036173901,wumi.ajayi@yahoo.com,Technical Assistance,,,2016-10-24 09:38:19.635591,2017-05-01 07:12:33.141239,,2
2181,2,Ijagila Mark .,tel:+2348100001300,ijagilamark@gmail.com,Coordinator,,,2016-10-24 09:36:56.209869,2017-05-01 07:12:32.701764,,2
771,5,Ali Shehu Kobi .,tel:+2347032683737,,Stocks Manager,,,2016-09-02 10:40:33.174405,2017-02-27 11:33:30.512228,,5
1790,5,Habu Abdulmalik Dauda.,tel:+2348180448144,dhadboolagajei72@gmail.com,Database Manager,,,2016-09-02 14:26:26.565091,2017-02-27 11:33:30.122641,,5


In [454]:
# to pivot data, create counts of each case by siteid
# Remember that siteid == state_num or LGA_num if on supervision level. 
# It is not necessary to run this command below on the state_num or lga_num. 

# df['count'] = df.groupby('col').cumcount() + 1
supervision_df['count'] = supervision_df.groupby('siteid').cumcount() + 1

In [455]:
supervision_df = supervision_df.drop('type', axis=1)
supervision_df = supervision_df.drop('groups', axis=1)

In [456]:
supervision_df

Unnamed: 0,siteid,name,urn,mail,post,first_seen,last_seen,lga_num,state_num,count
852,2,Reuben Aidaticha.,tel:+2348088481006,aidaticha@gmail.com,Database Manager,2016-10-24 10:38:14.041832,2017-05-01 07:12:32.919732,,2,1
2369,2,Hauwa Zoakah.,tel:+2347035678763,hauwabata@yahoo.com,Coordinator,2016-06-01 12:17:48.149140,2017-05-01 07:12:33.249846,,2,2
2365,2,Wullanga Alfred,tel:+2347032853473,wulangaalfred@gmail.com,Coordinator,2016-10-26 09:06:02.356386,2017-05-02 11:46:46.575273,,2,3
2345,2,Reuben Aidaticha.,tel:+2348038213859,aidaticha@gmail.com,Database Manager,2016-10-24 09:35:36.024432,2017-05-01 07:12:33.466924,,2,4
2348,2,Wullanga Alfred.,tel:+2348029864318,wullangaalfred@gmail.com,Database Manager,2016-10-24 09:35:46.229729,2017-05-01 07:12:33.358664,,2,5
854,2,Hauwa Zoakah .,tel:+2348020938959,hauwabata@yahoo.com,Coordinator,2016-10-26 09:04:19.462503,2017-05-01 07:12:32.809703,,2,6
2180,2,Olawumi Monica Ajayi.,tel:+2348036173901,wumi.ajayi@yahoo.com,Technical Assistance,2016-10-24 09:38:19.635591,2017-05-01 07:12:33.141239,,2,7
2181,2,Ijagila Mark .,tel:+2348100001300,ijagilamark@gmail.com,Coordinator,2016-10-24 09:36:56.209869,2017-05-01 07:12:32.701764,,2,8
771,5,Ali Shehu Kobi .,tel:+2347032683737,,Stocks Manager,2016-09-02 10:40:33.174405,2017-02-27 11:33:30.512228,,5,1
1790,5,Habu Abdulmalik Dauda.,tel:+2348180448144,dhadboolagajei72@gmail.com,Database Manager,2016-09-02 14:26:26.565091,2017-02-27 11:33:30.122641,,5,2


In [457]:
# Export as Excel 
filename = "IMAM_state_supervision.xlsx"
writer = pd.ExcelWriter(filename, engine='xlsxwriter')
supervision_df.to_excel(writer,'Sheet1')
writer.save()
writer.close()

In [458]:
# The data points to include in IMAM Supervision database for each supervision SiteID
# Name, Phone Number (URN), email
# can remove state_num and lga_num

columnsTitles = ['siteid',                 
                 'name',
                 'urn',
                 'mail',
                 'count',
                 ]

supervision_df = supervision_df.reindex(columns=columnsTitles)

In [459]:
# create db with only State Level supervision staff
state_df = supervision_df[supervision_df['siteid'] <= 39]


In [460]:
state_df

Unnamed: 0,siteid,name,urn,mail,count
852,2,Reuben Aidaticha.,tel:+2348088481006,aidaticha@gmail.com,1
2369,2,Hauwa Zoakah.,tel:+2347035678763,hauwabata@yahoo.com,2
2365,2,Wullanga Alfred,tel:+2347032853473,wulangaalfred@gmail.com,3
2345,2,Reuben Aidaticha.,tel:+2348038213859,aidaticha@gmail.com,4
2348,2,Wullanga Alfred.,tel:+2348029864318,wullangaalfred@gmail.com,5
854,2,Hauwa Zoakah .,tel:+2348020938959,hauwabata@yahoo.com,6
2180,2,Olawumi Monica Ajayi.,tel:+2348036173901,wumi.ajayi@yahoo.com,7
2181,2,Ijagila Mark .,tel:+2348100001300,ijagilamark@gmail.com,8
771,5,Ali Shehu Kobi .,tel:+2347032683737,,1
1790,5,Habu Abdulmalik Dauda.,tel:+2348180448144,dhadboolagajei72@gmail.com,2


In [461]:
# convert vertical to horizontal database. 
# will None in the cells cause us to send excessive number of warning SMS? 
state_wide = state_df.pivot(index='siteid', columns='count')

In [462]:
state_wide.head()

Unnamed: 0_level_0,name,name,name,name,name,name,name,name,name,name,...,mail,mail,mail,mail,mail,mail,mail,mail,mail,mail
count,1,2,3,4,5,6,7,8,9,10,...,9,10,11,12,13,14,15,16,17,18
siteid,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2,Reuben Aidaticha.,Hauwa Zoakah.,Wullanga Alfred,Reuben Aidaticha.,Wullanga Alfred.,Hauwa Zoakah .,Olawumi Monica Ajayi.,Ijagila Mark .,,,...,,,,,,,,,,
5,Ali Shehu Kobi .,Habu Abdulmalik Dauda.,Hamza Yakubu Sade .,Jackson Ladu Martins.,Charity Evans Nysalamke.,Sama'ila Usman Maikan.,Yakubu Baba.,,,,...,,,,,,,,,,
8,Magret Ayuba.,"Amarachi, Clementina Chukwuma.",Abdullahi Alhaji Madi.,Hassana Suleiman Jibrin.,Daniel James .,Aminu Usman Danzomo.,,,,,...,,,,,,,,,,
16,Rukayya Lawal.,Usman Baraya.,Ibrahim Inuwa Lano.,Olufunmilayo Adepoju-adebambo.,Ahmed Audu Saddana.,Selamawit Negash.,Ronas Amos Amusa .,Suleiman Mamman.,,,...,,,,,,,,,,
17,MUSA MOHAMMED HADEJIA .,Olatomiwa Olabisi.,Temidayo Esther Ajala.,Saidu Umar Adamu.,,,,,,,...,,,,,,,,,,


In [463]:
# Create new column name
# and correct multiIndex
state_wide.columns = ["sno" + (state_wide.columns[i][0]) + str(state_wide.columns[i][1]) for i in range(len(state_wide.columns))]

In [472]:
state = state_wide.reset_index()
state
# Note state_num does not exist, but is same as siteid

Unnamed: 0,siteid,snoname1,snoname2,snoname3,snoname4,snoname5,snoname6,snoname7,snoname8,snoname9,...,snomail9,snomail10,snomail11,snomail12,snomail13,snomail14,snomail15,snomail16,snomail17,snomail18
0,2,Reuben Aidaticha.,Hauwa Zoakah.,Wullanga Alfred,Reuben Aidaticha.,Wullanga Alfred.,Hauwa Zoakah .,Olawumi Monica Ajayi.,Ijagila Mark .,,...,,,,,,,,,,
1,5,Ali Shehu Kobi .,Habu Abdulmalik Dauda.,Hamza Yakubu Sade .,Jackson Ladu Martins.,Charity Evans Nysalamke.,Sama'ila Usman Maikan.,Yakubu Baba.,,,...,,,,,,,,,,
2,8,Magret Ayuba.,"Amarachi, Clementina Chukwuma.",Abdullahi Alhaji Madi.,Hassana Suleiman Jibrin.,Daniel James .,Aminu Usman Danzomo.,,,,...,,,,,,,,,,
3,16,Rukayya Lawal.,Usman Baraya.,Ibrahim Inuwa Lano.,Olufunmilayo Adepoju-adebambo.,Ahmed Audu Saddana.,Selamawit Negash.,Ronas Amos Amusa .,Suleiman Mamman.,,...,,,,,,,,,,
4,17,MUSA MOHAMMED HADEJIA .,Olatomiwa Olabisi.,Temidayo Esther Ajala.,Saidu Umar Adamu.,,,,,,...,,,,,,,,,,
5,18,Jane Gwani.,Maryam Yusuf .,Hauwa Usman .,Saratu Aduwak.,,,,,,...,,,,,,,,,,
6,19,Abigail Ishaya Nyam .,Sabo Wada.,Murtala M Inuwa.,Ado Jibrin Sanda.,Fumen Fuset Emmanuel.,Idowu Kusemi.,Nyako William Azi.,Ayodeji Osunkentan.,Sabo Wada.,...,sabowada57@gmail.com,,msaniabdullahi@gmail.com,akilumfashi@gmail.com,lekrunmon@yahoo.com,akilumfashi@gmail.com,abolarin.samuelsesan@gmail.com,adomustaphabichi1@gmail.com,dgauji@yahoo.com,nyakoabi@yahoo.com
7,20,Ibrahim Maje Sayyadi.,Rabia Mohammed Sno .,Musbahu Hamisu.,Zayyana Isyaku Sule.,Ado Ibrahim Abdulrahman.,Yusufahmadu Gangara.,Abdulhadi Abdulkadir.,Hamisu Idris K/bai.,Sim Karla Sakyenu.,...,simkarla@live.com,abdulmuminlawan@gmail.com,,,,,,,,
8,21,Beatrice Kwere.,Abdulmalik Muhammad Illo .,Shamsu Muhammed.,Abisola Mary Atoyebi.,Suwaiba M Bello.,Sunday Norbert.,Aliyu Galadima Libata .,Abimbola Abosede Sobo.,Aliyu Galadima Libata.,...,kabanjali@yahoo.com.hk,,,,,,,,,
9,33,Hassan Muhammad Galadanci.,Nura Muazu.,Nura Shehu .,Abdallah Ladan.,Sany Mohd Sokoto.,Kamaru Muhammad.,Muhammad Ali Hamza.,Buharismalami.,Hassanmuhammadgaladanci.,...,,,,,,,,,,


In [473]:
# Change None in cells to blank
for cell in state:
   state[cell].fillna(value='', inplace=True)

In [474]:
# Prepared state level list of supervisors

In [475]:
state = state.set_index('siteid')

In [476]:
state.index.name = None
state

Unnamed: 0,snoname1,snoname2,snoname3,snoname4,snoname5,snoname6,snoname7,snoname8,snoname9,snoname10,...,snomail9,snomail10,snomail11,snomail12,snomail13,snomail14,snomail15,snomail16,snomail17,snomail18
2,Reuben Aidaticha.,Hauwa Zoakah.,Wullanga Alfred,Reuben Aidaticha.,Wullanga Alfred.,Hauwa Zoakah .,Olawumi Monica Ajayi.,Ijagila Mark .,,,...,,,,,,,,,,
5,Ali Shehu Kobi .,Habu Abdulmalik Dauda.,Hamza Yakubu Sade .,Jackson Ladu Martins.,Charity Evans Nysalamke.,Sama'ila Usman Maikan.,Yakubu Baba.,,,,...,,,,,,,,,,
8,Magret Ayuba.,"Amarachi, Clementina Chukwuma.",Abdullahi Alhaji Madi.,Hassana Suleiman Jibrin.,Daniel James .,Aminu Usman Danzomo.,,,,,...,,,,,,,,,,
16,Rukayya Lawal.,Usman Baraya.,Ibrahim Inuwa Lano.,Olufunmilayo Adepoju-adebambo.,Ahmed Audu Saddana.,Selamawit Negash.,Ronas Amos Amusa .,Suleiman Mamman.,,,...,,,,,,,,,,
17,MUSA MOHAMMED HADEJIA .,Olatomiwa Olabisi.,Temidayo Esther Ajala.,Saidu Umar Adamu.,,,,,,,...,,,,,,,,,,
18,Jane Gwani.,Maryam Yusuf .,Hauwa Usman .,Saratu Aduwak.,,,,,,,...,,,,,,,,,,
19,Abigail Ishaya Nyam .,Sabo Wada.,Murtala M Inuwa.,Ado Jibrin Sanda.,Fumen Fuset Emmanuel.,Idowu Kusemi.,Nyako William Azi.,Ayodeji Osunkentan.,Sabo Wada.,Auwalu Ibrahim.,...,sabowada57@gmail.com,,msaniabdullahi@gmail.com,akilumfashi@gmail.com,lekrunmon@yahoo.com,akilumfashi@gmail.com,abolarin.samuelsesan@gmail.com,adomustaphabichi1@gmail.com,dgauji@yahoo.com,nyakoabi@yahoo.com
20,Ibrahim Maje Sayyadi.,Rabia Mohammed Sno .,Musbahu Hamisu.,Zayyana Isyaku Sule.,Ado Ibrahim Abdulrahman.,Yusufahmadu Gangara.,Abdulhadi Abdulkadir.,Hamisu Idris K/bai.,Sim Karla Sakyenu.,Abdulmumin Lawan.,...,simkarla@live.com,abdulmuminlawan@gmail.com,,,,,,,,
21,Beatrice Kwere.,Abdulmalik Muhammad Illo .,Shamsu Muhammed.,Abisola Mary Atoyebi.,Suwaiba M Bello.,Sunday Norbert.,Aliyu Galadima Libata .,Abimbola Abosede Sobo.,Aliyu Galadima Libata.,Abdulmalik Muhammad Illo.,...,kabanjali@yahoo.com.hk,,,,,,,,,
33,Hassan Muhammad Galadanci.,Nura Muazu.,Nura Shehu .,Abdallah Ladan.,Sany Mohd Sokoto.,Kamaru Muhammad.,Muhammad Ali Hamza.,Buharismalami.,Hassanmuhammadgaladanci.,,...,,,,,,,,,,


In [477]:
# Create same list for LGA
# create db with only LGA Level supervision staff
lga_df = supervision_df[supervision_df.siteid >= 101]
lga_df = lga_df[lga_df['siteid'] <= 3799]


In [478]:
lga_df

Unnamed: 0,siteid,name,urn,mail,count
1789,201,Edina Richard.,tel:+2348131816143,,1
1787,202,Monday Leasado.,tel:+2348038056048,,1
1788,202,Monday Leasado.,tel:+2348086625852,,2
1370,202,Monday Leasado.,tel:+2348065356507,,3
2031,203,Naaticha Waziri .,tel:+2348037675670,naatiwaziri69@gmail.com,1
1755,204,Rose Zidon .,tel:+2347065739356,,1
1785,204,Rose Mbamuno Zidon,tel:+2349086559594,,2
2303,205,Esther Danjuma.,tel:+2347030635580,,1
2056,205,Esther Danjuma.,tel:+2348025568649,,2
1979,206,Hannatu B Usman.,tel:+2348064811859,hannatubbu@gmail.com,1


In [479]:
# convert vertical to horizontal database. 
# will None in the cells cause us to send excessive number of warning SMS? 
lga_wide = lga_df.pivot(index='siteid', columns='count')

In [480]:
lga_wide

Unnamed: 0_level_0,name,name,name,name,name,urn,urn,urn,urn,urn,mail,mail,mail,mail,mail
count,1,2,3,4,5,1,2,3,4,5,1,2,3,4,5
siteid,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
201,Edina Richard.,,,,,tel:+2348131816143,,,,,,,,,
202,Monday Leasado.,Monday Leasado.,Monday Leasado.,,,tel:+2348038056048,tel:+2348086625852,tel:+2348065356507,,,,,,,
203,Naaticha Waziri .,,,,,tel:+2348037675670,,,,,naatiwaziri69@gmail.com,,,,
204,Rose Zidon .,Rose Mbamuno Zidon,,,,tel:+2347065739356,tel:+2349086559594,,,,,,,,
205,Esther Danjuma.,Esther Danjuma.,,,,tel:+2347030635580,tel:+2348025568649,,,,,,,,
206,Hannatu B Usman.,,,,,tel:+2348064811859,,,,,hannatubbu@gmail.com,,,,
207,Aishatu Samaila.,Aishatu Samaila.,,,,tel:+2348034154949,tel:+2348022645708,,,,,,,,
208,Wubbewasu David .,Apollos Elkanah.,,,,tel:+2348174118980,tel:+2348029839844,,,,,apollospola@gmail,,,
209,Patience Zomti Douglas.,,,,,tel:+2348065754427,,,,,,,,,
210,Adama Abubakar.,Ularamai Raphael .,,,,tel:+2349078543500,tel:+2347087576976,,,,,,,,


In [481]:
# Rename columns and correct multiIndex
lga_wide.columns = ["lga" + (lga_wide.columns[i][0]) + str(lga_wide.columns[i][1]) for i in range(len(lga_wide.columns))]

In [482]:
lga = lga_wide.reset_index()

In [494]:
lga

Unnamed: 0,siteid,lganame1,lganame2,lganame3,lganame4,lganame5,lgaurn1,lgaurn2,lgaurn3,lgaurn4,lgaurn5,lgamail1,lgamail2,lgamail3,lgamail4,lgamail5,state_num
0,201,Edina Richard.,,,,,tel:+2348131816143,,,,,,,,,,2
1,202,Monday Leasado.,Monday Leasado.,Monday Leasado.,,,tel:+2348038056048,tel:+2348086625852,tel:+2348065356507,,,,,,,,2
2,203,Naaticha Waziri .,,,,,tel:+2348037675670,,,,,naatiwaziri69@gmail.com,,,,,2
3,204,Rose Zidon .,Rose Mbamuno Zidon,,,,tel:+2347065739356,tel:+2349086559594,,,,,,,,,2
4,205,Esther Danjuma.,Esther Danjuma.,,,,tel:+2347030635580,tel:+2348025568649,,,,,,,,,2
5,206,Hannatu B Usman.,,,,,tel:+2348064811859,,,,,hannatubbu@gmail.com,,,,,2
6,207,Aishatu Samaila.,Aishatu Samaila.,,,,tel:+2348034154949,tel:+2348022645708,,,,,,,,,2
7,208,Wubbewasu David .,Apollos Elkanah.,,,,tel:+2348174118980,tel:+2348029839844,,,,,apollospola@gmail,,,,2
8,209,Patience Zomti Douglas.,,,,,tel:+2348065754427,,,,,,,,,,2
9,210,Adama Abubakar.,Ularamai Raphael .,,,,tel:+2349078543500,tel:+2347087576976,,,,,,,,,2


In [485]:
# Change None in cells to blank
for cell in lga:
   lga[cell].fillna(value='', inplace=True)

In [496]:
lga['state_num'] = 0

In [497]:
# Add state_num to LGA df
lga['siteid_lgt'] = lga['siteid'].astype(str).str.len()

In [498]:
lga['state_num'] = np.where(lga['siteid_lgt']==3, lga['siteid'].astype(str).str[:1], lga['state_num'])
lga['state_num'] = np.where(lga['siteid_lgt']==4, lga['siteid'].astype(str).str[:2], lga['state_num'])
lga = lga.drop('siteid_lgt', axis=1)
lga['state_num'].value_counts()

8     21
2     21
35    17
20    15
21    15
17    12
33     9
36     7
19     6
16     3
5      3
18     2
Name: state_num, dtype: int64

In [412]:
lga.dtypes

siteid        int64
lganame1     object
lganame2     object
lgaurn1      object
lgaurn2      object
lgamail1     object
lgamail2     object
state_num    object
dtype: object

In [409]:
# Prepared LGA level supervision list


In [502]:
state

Unnamed: 0,snoname1,snoname2,snoname3,snoname4,snoname5,snoname6,snoname7,snoname8,snoname9,snoname10,...,snomail9,snomail10,snomail11,snomail12,snomail13,snomail14,snomail15,snomail16,snomail17,snomail18
2,Reuben Aidaticha.,Hauwa Zoakah.,Wullanga Alfred,Reuben Aidaticha.,Wullanga Alfred.,Hauwa Zoakah .,Olawumi Monica Ajayi.,Ijagila Mark .,,,...,,,,,,,,,,
5,Ali Shehu Kobi .,Habu Abdulmalik Dauda.,Hamza Yakubu Sade .,Jackson Ladu Martins.,Charity Evans Nysalamke.,Sama'ila Usman Maikan.,Yakubu Baba.,,,,...,,,,,,,,,,
8,Magret Ayuba.,"Amarachi, Clementina Chukwuma.",Abdullahi Alhaji Madi.,Hassana Suleiman Jibrin.,Daniel James .,Aminu Usman Danzomo.,,,,,...,,,,,,,,,,
16,Rukayya Lawal.,Usman Baraya.,Ibrahim Inuwa Lano.,Olufunmilayo Adepoju-adebambo.,Ahmed Audu Saddana.,Selamawit Negash.,Ronas Amos Amusa .,Suleiman Mamman.,,,...,,,,,,,,,,
17,MUSA MOHAMMED HADEJIA .,Olatomiwa Olabisi.,Temidayo Esther Ajala.,Saidu Umar Adamu.,,,,,,,...,,,,,,,,,,
18,Jane Gwani.,Maryam Yusuf .,Hauwa Usman .,Saratu Aduwak.,,,,,,,...,,,,,,,,,,
19,Abigail Ishaya Nyam .,Sabo Wada.,Murtala M Inuwa.,Ado Jibrin Sanda.,Fumen Fuset Emmanuel.,Idowu Kusemi.,Nyako William Azi.,Ayodeji Osunkentan.,Sabo Wada.,Auwalu Ibrahim.,...,sabowada57@gmail.com,,msaniabdullahi@gmail.com,akilumfashi@gmail.com,lekrunmon@yahoo.com,akilumfashi@gmail.com,abolarin.samuelsesan@gmail.com,adomustaphabichi1@gmail.com,dgauji@yahoo.com,nyakoabi@yahoo.com
20,Ibrahim Maje Sayyadi.,Rabia Mohammed Sno .,Musbahu Hamisu.,Zayyana Isyaku Sule.,Ado Ibrahim Abdulrahman.,Yusufahmadu Gangara.,Abdulhadi Abdulkadir.,Hamisu Idris K/bai.,Sim Karla Sakyenu.,Abdulmumin Lawan.,...,simkarla@live.com,abdulmuminlawan@gmail.com,,,,,,,,
21,Beatrice Kwere.,Abdulmalik Muhammad Illo .,Shamsu Muhammed.,Abisola Mary Atoyebi.,Suwaiba M Bello.,Sunday Norbert.,Aliyu Galadima Libata .,Abimbola Abosede Sobo.,Aliyu Galadima Libata.,Abdulmalik Muhammad Illo.,...,kabanjali@yahoo.com.hk,,,,,,,,,
33,Hassan Muhammad Galadanci.,Nura Muazu.,Nura Shehu .,Abdallah Ladan.,Sany Mohd Sokoto.,Kamaru Muhammad.,Muhammad Ali Hamza.,Buharismalami.,Hassanmuhammadgaladanci.,,...,,,,,,,,,,


In [490]:
state.dtypes

snoname1     object
snoname2     object
snoname3     object
snoname4     object
snoname5     object
snoname6     object
snoname7     object
snoname8     object
snoname9     object
snoname10    object
snoname11    object
snoname12    object
snoname13    object
snoname14    object
snoname15    object
snoname16    object
snoname17    object
snoname18    object
snourn1      object
snourn2      object
snourn3      object
snourn4      object
snourn5      object
snourn6      object
snourn7      object
snourn8      object
snourn9      object
snourn10     object
snourn11     object
snourn12     object
snourn13     object
snourn14     object
snourn15     object
snourn16     object
snourn17     object
snourn18     object
snomail1     object
snomail2     object
snomail3     object
snomail4     object
snomail5     object
snomail6     object
snomail7     object
snomail8     object
snomail9     object
snomail10    object
snomail11    object
snomail12    object
snomail13    object
snomail14    object


In [501]:
# merge state df to lga df

# imam_sup = pd.merge(lga,state, on='state_num', suffixes=['_lga','_st'])

# imam_sup = pd.merge(lga, state, on='state_num')


# imam_sup = lga.join(state, how='inner')

# imam_sup = lga.merge(state)

# imam_sup = lga.merge(state, on=['state_num'])

# imam_sup = lga.join(state, on=['state_num'])

imam_sup = pd.merge(lga, state, left_on='state_num', right_index=True, how='left', sort=False)

# imam_sup = lga.join(state, on='state_num')

imam_sup

Unnamed: 0,siteid,lganame1,lganame2,lganame3,lganame4,lganame5,lgaurn1,lgaurn2,lgaurn3,lgaurn4,...,snomail9,snomail10,snomail11,snomail12,snomail13,snomail14,snomail15,snomail16,snomail17,snomail18
0,201,Edina Richard.,,,,,tel:+2348131816143,,,,...,,,,,,,,,,
1,202,Monday Leasado.,Monday Leasado.,Monday Leasado.,,,tel:+2348038056048,tel:+2348086625852,tel:+2348065356507,,...,,,,,,,,,,
2,203,Naaticha Waziri .,,,,,tel:+2348037675670,,,,...,,,,,,,,,,
3,204,Rose Zidon .,Rose Mbamuno Zidon,,,,tel:+2347065739356,tel:+2349086559594,,,...,,,,,,,,,,
4,205,Esther Danjuma.,Esther Danjuma.,,,,tel:+2347030635580,tel:+2348025568649,,,...,,,,,,,,,,
5,206,Hannatu B Usman.,,,,,tel:+2348064811859,,,,...,,,,,,,,,,
6,207,Aishatu Samaila.,Aishatu Samaila.,,,,tel:+2348034154949,tel:+2348022645708,,,...,,,,,,,,,,
7,208,Wubbewasu David .,Apollos Elkanah.,,,,tel:+2348174118980,tel:+2348029839844,,,...,,,,,,,,,,
8,209,Patience Zomti Douglas.,,,,,tel:+2348065754427,,,,...,,,,,,,,,,
9,210,Adama Abubakar.,Ularamai Raphael .,,,,tel:+2349078543500,tel:+2347087576976,,,...,,,,,,,,,,


In [503]:
# is Statename# null ? 
imam_sup.snoname1.value_counts()

Series([], Name: snoname1, dtype: int64)

In [506]:
# SiteID is now NaN for state. 
# can convert this to SiteID later.
imam_sup['lga_num'] = imam_sup['siteid'] 

# imam_sup.state_num.value_counts()

In [507]:
# merge to implementation staff df
imam_imp = df2.query('siteid > 3699')
imam_imp.siteid.value_counts()

3512110004    8
3503210022    8
3511110005    7
3603110029    7
2106110004    7
2106110020    7
1714110008    6
806110012     6
2118110015    6
3513110005    6
1703110012    6
3306110017    6
3305110027    6
821110052     5
2001110059    5
2011110029    5
821110009     5
2021210016    5
3321110020    5
3317110023    5
3301110011    5
2011120005    5
821110057     5
1940110023    5
1717110001    5
2034110018    5
2109210028    5
1702110010    5
2112110009    5
2034110001    5
             ..
813110042     1
3502110014    1
813110044     1
43100110      1
207110089     1
3508110015    1
1930110002    1
3613210035    1
3516110004    1
808110002     1
809110008     1
821110060     1
816110040     1
809110004     1
3505110037    1
3605110051    1
1802110018    1
1805110031    1
821110053     1
3502110034    1
1720110003    1
208110016     1
821110059     1
821110061     1
208110012     1
1929110001    1
208110010     1
3512110033    1
813110034     1
1805110047    1
Name: siteid, dtype: int

In [508]:
# Change the order (the index) of the columns
columnsTitles = ['name',
                 'urn',
                 'siteid', 
                 'mail',
                 'lga_num', 
                 'state_num' 
                 ]

imam_imp = imam_imp.reindex(columns=columnsTitles)

In [509]:
len(imam_imp)


2413

In [533]:
imam_imp

Unnamed: 0,name,urn,siteid,mail,lga_num,state_num
1,Munirat Makama.,tel:+2348165422728,2003110030,,2003,20
3,Kwatam Dandy.,tel:+2348063455263,821110032,,821,8
4,Amina Musa T.,tel:+2348160109849,811110033,,811,8
5,HARUNA ABDULLAHI.,tel:+2348021010530,805110012,,805,8
6,Halima Muhammed.,tel:+2349076655644,821110031,,821,8
7,Umar Abubakar.,tel:+2348126752275,211110047,,211,2
8,Umar Abubakar.,tel:+2348080488082,211110047,,211,2
9,IBRAHIM LAMARA.,tel:+2348087760645,504110010,,504,5
10,Lami Isaac.,tel:+2348135016269,821110029,,821,8
11,Elizabeth Musa.,tel:+2348029266456,821110029,,821,8


In [518]:
# del imam_supervision

In [536]:
# merge supervision df to implementation df

# imam_supervision = imam_imp.join(imam_sup, on='lga_num')

imam_supervision = imam_imp.join(imam_sup, suffixes=('_x', '_y'), on='lga_num')

TypeError: join() got an unexpected keyword argument 'suffixes'

In [537]:
imam_supervision

NameError: name 'imam_supervision' is not defined

In [112]:
#convert to xls
# and/or export as JSON


In [None]:
# are any SiteIDs NaN ?

# print df.siteid.str.isdigit()
# df['siteid'].str.contains('^\d+$')

# SiteIDs in PostGres are BigInt thus strings are not allowed. 

# In load contact data for the SiteID, 
# we strip all following text after 9 or 10 digit number
# we convert all letter Os to zeros
# we delete all entries that have text before the siteID. 

In [None]:
# are any SiteIDs NaN ?
df.query('siteid!=siteid')

In [None]:
df2['siteid'] = pd.to_numeric(df2['siteid'], errors='coerce')
# No change

# Remove NaN from siteid

# If you don't specify a variable this will drop any rows including a NaN
df2 = df2.dropna(subset=['siteid'])
# No change

df2 = df2.query('siteid<3699990999')
# No change

In [None]:
df2['siteid'] = df2['siteid'].astype(int)
# No change

In [None]:
# What is the type of siteid
print type(df2['siteid'])
# This does not tell us anything about the variable

In [None]:
df2.dtypes

In [None]:
# do any SiteIDs include the letter o instead of number zero? 
foo = pd.DataFrame({'a' : [1,2,3,4], 'b' : ['hi', 'ooo', 'fat', 'cat']})
foo[foo['b'].str.contains('ooo')]  

# Regex & Data Cleaning
# https://trendct.org/2016/08/05/real-world-data-cleanup-with-python-and-pandas/

# df[df['siteid'].str.contains('ooo')]

# can only use .str accessor with string
#.str.contains('\D')

In [None]:
# To replace
# data['result'].replace(regex=True,inplace=True,to_replace=r'\D',value=r'')


In [None]:
# How many registrations in supervision ? 
len(supervision_df['siteid'])

# Many supervision staff did not record their post - should recode them all as supervisors
supervision_df['post'].str.upper().value_counts()
# str.upper() does not make permanent change. 

# I think there was a data entry error with Post because most people entered their post correctly
# also there should not be any HC, HW in the supervision cadres

In [None]:
pd.unique(state_df.name.ravel())
# Ravel - Return the flattened underlying data as an ndarray