In [23]:
# Create IMAM supervision data to upload into RapidPro

# Identify all supervision (State and LGA) and put in rows by site (name, phone, email)
# Merge State and LGA supervision into complete listing of personnel
# export as .xlsx file

# This is still very excel way of thinking. 
# We should just create the table in json and import to RapidPro

In [1]:
# To show plots in the notebook
%matplotlib inline  

import pandas as pd
import numpy as np
import pandas_highcharts.core
from sqlalchemy import create_engine
import psycopg2
import matplotlib.pyplot 

import os

os.environ.setdefault("DJANGO_SETTINGS_MODULE", "IMAM.settings")

from django.conf import settings
from home.management.commands.load_data import assign_state_lga_num, rename_cols, generic_cleaning, merge_in_and_outpatients, add_program_reports_from_supervision

import django
django.setup()

from home.models import First_admin, Second_admin, Site, Registration

In [2]:
# For exporting excel files using XlsxWriter
from pandas import ExcelWriter
import xlsxwriter

In [3]:
# Suppress scientific notation
# pd.options.display.float_format = '{:20,.0f}'.format

In [7]:
engine = create_engine(
    'postgresql://{USER}:{PASSWORD}@{HOST}:{PORT}/{NAME}'.format(**settings.DATABASES['default']))
df = pd.read_sql_query("select * from registration;", con=engine)

In [8]:
# If there is no data in PostGres
# df = pd.ExcelFile('/home/robert/Downloads/reg.xlsx').parse('Contacts')

In [9]:
df.head()

Unnamed: 0,contact_uuid,urn,name,groups,siteid,type,first_seen,last_seen,post,mail,lga_num,state_num
0,0ca2a249-d30c-4374-b10e-6e5fb7fb9fab,tel:+2348036519538,Murtala M Inuwa.,,19,,2016-11-02 09:52:21.325302,2017-05-01 07:12:31.695006,Coordinator,murtalamuhd33@gmail.com,,19.0
1,7903e497-8bfe-47a5-bfd6-78584b3cc09f,tel:+2348165422728,Munirat Makama.,,2003110030,OTP,2016-10-31 10:54:37.155412,2017-04-24 08:10:10.429518,Nurse/Midwife,,2003.0,20.0
2,982228b2-1c75-4462-93e8-659d4f32caaa,tel:+2348098248879,Fumen Fuset Emmanuel.,,19,,2016-10-31 10:14:32.202894,2017-05-01 07:12:30.564476,Database Manager,ffekura@gmail.com,,19.0
3,472dea24-f143-4e35-a542-a6f01996ec12,tel:+2348063455263,Kwatam Dandy.,,821110032,OTP,2016-10-18 10:13:17.924586,2017-05-01 07:05:21.348423,Community Health Officer,,821.0,8.0
4,ed674a59-ff07-408a-88c5-7a4f0967f5b4,tel:+2348160109849,Amina Musa T.,,811110033,OTP,2017-04-28 14:28:51.700810,2017-05-02 15:19:18.310892,Community Health Officer,,811.0,8.0


In [10]:
# run rename columns function
rename_cols(df)

Unnamed: 0,contact_uuid,urn,name,groups,siteid,type,first_seen,last_seen,post,mail,lga_num,state_num
0,0ca2a249-d30c-4374-b10e-6e5fb7fb9fab,tel:+2348036519538,Murtala M Inuwa.,,19,,2016-11-02 09:52:21.325302,2017-05-01 07:12:31.695006,Coordinator,murtalamuhd33@gmail.com,,19.0
1,7903e497-8bfe-47a5-bfd6-78584b3cc09f,tel:+2348165422728,Munirat Makama.,,2003110030,OTP,2016-10-31 10:54:37.155412,2017-04-24 08:10:10.429518,Nurse/Midwife,,2003.0,20.0
2,982228b2-1c75-4462-93e8-659d4f32caaa,tel:+2348098248879,Fumen Fuset Emmanuel.,,19,,2016-10-31 10:14:32.202894,2017-05-01 07:12:30.564476,Database Manager,ffekura@gmail.com,,19.0
3,472dea24-f143-4e35-a542-a6f01996ec12,tel:+2348063455263,Kwatam Dandy.,,821110032,OTP,2016-10-18 10:13:17.924586,2017-05-01 07:05:21.348423,Community Health Officer,,821.0,8.0
4,ed674a59-ff07-408a-88c5-7a4f0967f5b4,tel:+2348160109849,Amina Musa T.,,811110033,OTP,2017-04-28 14:28:51.700810,2017-05-02 15:19:18.310892,Community Health Officer,,811.0,8.0
5,3b01093a-271c-413e-a10a-8e5423de7448,tel:+2348021010530,HARUNA ABDULLAHI.,,805110012,OTP,2017-05-02 11:48:29.502721,2017-05-03 15:51:51.870141,Volunteer,,805.0,8.0
6,9048eab9-50f3-4d57-8b11-52c9860a7d95,tel:+2349076655644,Halima Muhammed.,,821110031,OTP,2016-10-18 10:13:43.931902,2017-05-01 07:05:19.854107,Community Health Officer,,821.0,8.0
7,30e48f2a-1dd8-4d49-9945-da17cfa114a9,tel:+2348126752275,Umar Abubakar.,,211110047,OTP,2016-10-26 09:10:39.762480,2017-05-01 06:58:56.645605,In Charge Hospital/PHC,,211.0,2.0
8,fa08f0c2-80d8-4650-acbf-e46d98225b49,tel:+2348080488082,Umar Abubakar.,,211110047,OTP,2016-10-26 10:48:14.152486,2017-05-01 06:58:56.184455,In Charge Hospital/PHC,,211.0,2.0
9,3130d94b-f3bd-4412-b7c9-acd8b5598e81,tel:+2348087760645,IBRAHIM LAMARA.,,504110010,OTP,2017-02-22 14:12:33.933315,2017-04-12 14:09:33.640518,In Charge Hospital/PHC,,504.0,5.0


In [11]:
# rename SiteID to siteid
# df=df.rename(columns = {'SiteID':'siteid'})


# CHANGE URN TO NUM

# Change the order (the index) of the columns
columnsTitles = ['siteid',                 
                 'name',
                 'urn',
                 'mail',
                 'post',
                 'type',
                 'groups',
                 'first_seen',
                 'last_seen',
                 'lga_num', 
                 'state_num' 
                 ]

df2 = df.reindex(columns=columnsTitles)
df2

Unnamed: 0,siteid,name,urn,mail,post,type,groups,first_seen,last_seen,lga_num,state_num
0,19,Murtala M Inuwa.,tel:+2348036519538,murtalamuhd33@gmail.com,Coordinator,,,2016-11-02 09:52:21.325302,2017-05-01 07:12:31.695006,,19.0
1,2003110030,Munirat Makama.,tel:+2348165422728,,Nurse/Midwife,OTP,,2016-10-31 10:54:37.155412,2017-04-24 08:10:10.429518,2003.0,20.0
2,19,Fumen Fuset Emmanuel.,tel:+2348098248879,ffekura@gmail.com,Database Manager,,,2016-10-31 10:14:32.202894,2017-05-01 07:12:30.564476,,19.0
3,821110032,Kwatam Dandy.,tel:+2348063455263,,Community Health Officer,OTP,,2016-10-18 10:13:17.924586,2017-05-01 07:05:21.348423,821.0,8.0
4,811110033,Amina Musa T.,tel:+2348160109849,,Community Health Officer,OTP,,2017-04-28 14:28:51.700810,2017-05-02 15:19:18.310892,811.0,8.0
5,805110012,HARUNA ABDULLAHI.,tel:+2348021010530,,Volunteer,OTP,,2017-05-02 11:48:29.502721,2017-05-03 15:51:51.870141,805.0,8.0
6,821110031,Halima Muhammed.,tel:+2349076655644,,Community Health Officer,OTP,,2016-10-18 10:13:43.931902,2017-05-01 07:05:19.854107,821.0,8.0
7,211110047,Umar Abubakar.,tel:+2348126752275,,In Charge Hospital/PHC,OTP,,2016-10-26 09:10:39.762480,2017-05-01 06:58:56.645605,211.0,2.0
8,211110047,Umar Abubakar.,tel:+2348080488082,,In Charge Hospital/PHC,OTP,,2016-10-26 10:48:14.152486,2017-05-01 06:58:56.184455,211.0,2.0
9,504110010,IBRAHIM LAMARA.,tel:+2348087760645,,In Charge Hospital/PHC,OTP,,2017-02-22 14:12:33.933315,2017-04-12 14:09:33.640518,504.0,5.0


In [12]:
# Assign state lga num is not necessary as it is done already in the import_contacts.py
# df2 = assign_state_lga_num(df2)
# error with LGA Num in load_data.py

In [13]:
# Create back-up
restore = df2

In [14]:
# Restore
df2 = restore

In [15]:
df2['siteid'].count()

2765

In [16]:
df2 = df2.query('siteid>1')
# Remove national level registrations

In [17]:
df2 = df2.query('siteid!=99')
# siteid == 99 is a number to use for missing or no longer involved with IMAM program
# Remove erroneous registrations

In [18]:
df2.state_num.value_counts()

8.0     530
35.0    455
21.0    318
2.0     311
20.0    309
17.0    213
33.0    179
36.0    142
19.0    118
18.0     65
5.0      56
16.0     50
1.0       3
Name: state_num, dtype: int64

In [19]:
df2.query('state_num==1')
# These are test registrations below 

Unnamed: 0,siteid,name,urn,mail,post,type,groups,first_seen,last_seen,lga_num,state_num
901,101110001,Assay T Bulti.,tel:+2348035351744,,In Charge Hospital/PHC,OTP,,2016-08-22 08:31:51.601275,2017-05-03 09:07:20.055704,101.0,1.0
1360,101110001,Aisha Kaka Bello.,tel:+2348035865826,bello.a.kaka@gmail.com,Technical Assistance,OTP,,2016-10-15 11:03:06.595684,2017-01-16 09:01:50.505299,101.0,1.0
1693,101110001,Elfriede Mamie Kormawa.,tel:+2347064019648,ekormawa@unicef.org,Doctor,OTP,,2016-06-24 08:04:14.291228,2017-05-02 09:57:59.441663,101.0,1.0


In [26]:
# create db with only supervision staff
supervision_df = df2[df2['siteid'] <= 3699]
# supervision siteids range from 1 to 3699

In [27]:
# Sort data
supervision_df = supervision_df.sort_values(by='siteid')
supervision_df

Unnamed: 0,siteid,name,urn,mail,post,type,groups,first_seen,last_seen,lga_num,state_num
2148,2,Ijagila Mark .,tel:+2348100001300,ijagilamark@gmail.com,Coordinator,,,2016-10-24 09:36:56.209869,2017-05-01 07:12:32.701764,,2.0
2309,2,Reuben Aidaticha.,tel:+2348038213859,aidaticha@gmail.com,Database Manager,,,2016-10-24 09:35:36.024432,2017-05-01 07:12:33.466924,,2.0
2311,2,Wullanga Alfred.,tel:+2348029864318,wullangaalfred@gmail.com,Database Manager,,,2016-10-24 09:35:46.229729,2017-05-01 07:12:33.358664,,2.0
2328,2,Wullanga Alfred,tel:+2347032853473,wulangaalfred@gmail.com,Coordinator,,,2016-10-26 09:06:02.356386,2017-05-02 11:46:46.575273,,2.0
2334,2,Hauwa Zoakah.,tel:+2347035678763,hauwabata@yahoo.com,Coordinator,,,2016-06-01 12:17:48.149140,2017-05-01 07:12:33.249846,,2.0
2147,2,Olawumi Monica Ajayi.,tel:+2348036173901,wumi.ajayi@yahoo.com,Technical Assistance,,,2016-10-24 09:38:19.635591,2017-05-01 07:12:33.141239,,2.0
839,2,Reuben Aidaticha.,tel:+2348088481006,aidaticha@gmail.com,Database Manager,,,2016-10-24 10:38:14.041832,2017-05-01 07:12:32.919732,,2.0
841,2,Hauwa Zoakah .,tel:+2348020938959,hauwabata@yahoo.com,Coordinator,,,2016-10-26 09:04:19.462503,2017-05-01 07:12:32.809703,,2.0
754,5,Sama'ila Usman Maikan.,tel:+2348038233464,,Coordinator,,,2016-09-02 10:42:56.346388,2017-02-27 11:33:31.642024,,5.0
756,5,Ali Shehu Kobi .,tel:+2347032683737,,Stocks Manager,,,2016-09-02 10:40:33.174405,2017-02-27 11:33:30.512228,,5.0


In [28]:
# to pivot data, create counts of each case by siteid
# Remember that siteid == state_num or LGA_num if on supervision level. 
# It is not necessary to run this command below on the state_num or lga_num. 

# df['count'] = df.groupby('col').cumcount() + 1
supervision_df['count'] = supervision_df.groupby('siteid').cumcount() + 1

In [29]:
supervision_df = supervision_df.drop('type', axis=1)
supervision_df = supervision_df.drop('groups', axis=1)

In [31]:
supervision_df
# Supervision long dataframe

Unnamed: 0,siteid,name,urn,mail,post,first_seen,last_seen,lga_num,state_num,count
2148,2,Ijagila Mark .,tel:+2348100001300,ijagilamark@gmail.com,Coordinator,2016-10-24 09:36:56.209869,2017-05-01 07:12:32.701764,,2.0,1
2309,2,Reuben Aidaticha.,tel:+2348038213859,aidaticha@gmail.com,Database Manager,2016-10-24 09:35:36.024432,2017-05-01 07:12:33.466924,,2.0,2
2311,2,Wullanga Alfred.,tel:+2348029864318,wullangaalfred@gmail.com,Database Manager,2016-10-24 09:35:46.229729,2017-05-01 07:12:33.358664,,2.0,3
2328,2,Wullanga Alfred,tel:+2347032853473,wulangaalfred@gmail.com,Coordinator,2016-10-26 09:06:02.356386,2017-05-02 11:46:46.575273,,2.0,4
2334,2,Hauwa Zoakah.,tel:+2347035678763,hauwabata@yahoo.com,Coordinator,2016-06-01 12:17:48.149140,2017-05-01 07:12:33.249846,,2.0,5
2147,2,Olawumi Monica Ajayi.,tel:+2348036173901,wumi.ajayi@yahoo.com,Technical Assistance,2016-10-24 09:38:19.635591,2017-05-01 07:12:33.141239,,2.0,6
839,2,Reuben Aidaticha.,tel:+2348088481006,aidaticha@gmail.com,Database Manager,2016-10-24 10:38:14.041832,2017-05-01 07:12:32.919732,,2.0,7
841,2,Hauwa Zoakah .,tel:+2348020938959,hauwabata@yahoo.com,Coordinator,2016-10-26 09:04:19.462503,2017-05-01 07:12:32.809703,,2.0,8
754,5,Sama'ila Usman Maikan.,tel:+2348038233464,,Coordinator,2016-09-02 10:42:56.346388,2017-02-27 11:33:31.642024,,5.0,1
756,5,Ali Shehu Kobi .,tel:+2347032683737,,Stocks Manager,2016-09-02 10:40:33.174405,2017-02-27 11:33:30.512228,,5.0,2


In [32]:
# Export as Excel 
filename = "IMAM_state_supervision.xlsx"
writer = pd.ExcelWriter(filename, engine='xlsxwriter')
supervision_df.to_excel(writer,'Sheet1')
writer.save()
writer.close()

In [138]:
# The data points to include in IMAM Supervision database for each supervision SiteID
# Name, Phone Number (URN), email
# can remove state_num and lga_num

columnsTitles = ['siteid',                 
                 'name',
                 'urn',
                 'mail',
                 'count',
                 ]

supervision_df = supervision_df.reindex(columns=columnsTitles)

In [139]:
# create db with only State Level supervision staff
state_df = supervision_df[supervision_df['siteid'] <= 39]


In [140]:
state_df

Unnamed: 0,siteid,name,num,mail,count
2148,2,Ijagila Mark .,,ijagilamark@gmail.com,1
2309,2,Reuben Aidaticha.,,aidaticha@gmail.com,2
2311,2,Wullanga Alfred.,,wullangaalfred@gmail.com,3
2328,2,Wullanga Alfred,,wulangaalfred@gmail.com,4
2334,2,Hauwa Zoakah.,,hauwabata@yahoo.com,5
2147,2,Olawumi Monica Ajayi.,,wumi.ajayi@yahoo.com,6
839,2,Reuben Aidaticha.,,aidaticha@gmail.com,7
841,2,Hauwa Zoakah .,,hauwabata@yahoo.com,8
754,5,Sama'ila Usman Maikan.,,,1
756,5,Ali Shehu Kobi .,,,2


In [141]:
# convert vertical to horizontal database. 
# will None in the cells cause us to send excessive number of warning SMS? 
state_wide = state_df.pivot(index='siteid', columns='count')

In [142]:
state_wide.head()

Unnamed: 0_level_0,name,name,name,name,name,name,name,name,name,name,...,mail,mail,mail,mail,mail,mail,mail,mail,mail,mail
count,1,2,3,4,5,6,7,8,9,10,...,9,10,11,12,13,14,15,16,17,18
siteid,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2,Ijagila Mark .,Reuben Aidaticha.,Wullanga Alfred.,Wullanga Alfred,Hauwa Zoakah.,Olawumi Monica Ajayi.,Reuben Aidaticha.,Hauwa Zoakah .,,,...,,,,,,,,,,
5,Sama'ila Usman Maikan.,Ali Shehu Kobi .,Yakubu Baba.,Charity Evans Nysalamke.,Jackson Ladu Martins.,Hamza Yakubu Sade .,Habu Abdulmalik Dauda.,,,,...,,,,,,,,,,
8,Hassana Suleiman Jibrin.,Abdullahi Alhaji Madi.,"Amarachi, Clementina Chukwuma.",Daniel James .,Aminu Usman Danzomo.,Magret Ayuba.,,,,,...,,,,,,,,,,
16,Ronas Amos Amusa .,Rukayya Lawal.,Ibrahim Inuwa Lano.,Selamawit Negash.,Suleiman Mamman.,Ahmed Audu Saddana.,Usman Baraya.,Olufunmilayo Adepoju-adebambo.,,,...,,,,,,,,,,
17,MUSA MOHAMMED HADEJIA .,Temidayo Esther Ajala.,Olatomiwa Olabisi.,Saidu Umar Adamu.,,,,,,,...,,,,,,,,,,


In [143]:
# Create new column name
# and correct multiIndex
state_wide.columns = ["sno" + str(state_wide.columns[i][1]) + (state_wide.columns[i][0]) for i in range(len(state_wide.columns))]

In [144]:
state = state_wide.reset_index()
state
# Note state_num does not exist, but is same as siteid

Unnamed: 0,siteid,sno1name,sno2name,sno3name,sno4name,sno5name,sno6name,sno7name,sno8name,sno9name,...,sno9mail,sno10mail,sno11mail,sno12mail,sno13mail,sno14mail,sno15mail,sno16mail,sno17mail,sno18mail
0,2,Ijagila Mark .,Reuben Aidaticha.,Wullanga Alfred.,Wullanga Alfred,Hauwa Zoakah.,Olawumi Monica Ajayi.,Reuben Aidaticha.,Hauwa Zoakah .,,...,,,,,,,,,,
1,5,Sama'ila Usman Maikan.,Ali Shehu Kobi .,Yakubu Baba.,Charity Evans Nysalamke.,Jackson Ladu Martins.,Hamza Yakubu Sade .,Habu Abdulmalik Dauda.,,,...,,,,,,,,,,
2,8,Hassana Suleiman Jibrin.,Abdullahi Alhaji Madi.,"Amarachi, Clementina Chukwuma.",Daniel James .,Aminu Usman Danzomo.,Magret Ayuba.,,,,...,,,,,,,,,,
3,16,Ronas Amos Amusa .,Rukayya Lawal.,Ibrahim Inuwa Lano.,Selamawit Negash.,Suleiman Mamman.,Ahmed Audu Saddana.,Usman Baraya.,Olufunmilayo Adepoju-adebambo.,,...,,,,,,,,,,
4,17,MUSA MOHAMMED HADEJIA .,Temidayo Esther Ajala.,Olatomiwa Olabisi.,Saidu Umar Adamu.,,,,,,...,,,,,,,,,,
5,18,Hauwa Usman .,Maryam Yusuf .,Saratu Aduwak.,Jane Gwani.,,,,,,...,,,,,,,,,,
6,19,Abigail Ishaya Nyam .,Akilu Sani.,Sabo Wada.,Nyako William Azi.,Ayodeji Osunkentan.,Murtala M Inuwa.,"Abolarin, Samuel Sesan.",Ado Mustapha.,Ado Jibrin Sanda.,...,adosanda@gmail.com,lekrunmon@yahoo.com,nyakoabi@yahoo.com,dgauji@yahoo.com,msaniabdullahi@gmail.com,mikekush70@yahoo.com,sabowada57@gmail.com,,ffekura@gmail.com,akilumfashi@gmail.com
7,20,Rabia Mohammed Sno .,Yusufahmadu Gangara.,Musbahu Hamisu.,Abdulhadi Abdulkadir.,Ado Ibrahim Abdulrahman.,Hamisu Idris K/bai.,Abdulmumin Lawan.,Sim Karla Sakyenu.,Ibrahim Maje Sayyadi.,...,ibrahimmajes@hotmail.com,zayyanais@gmailcom,,,,,,,,
8,21,Abimbola Abosede Sobo.,Aliyu Galadima Libata.,Abisola Mary Atoyebi.,Shamsu Muhammed.,Abdulmalik Muhammad Illo.,Abdulmalik Muhammad Illo .,Aliyu Galadima Libata .,Beatrice Kwere.,Suwaiba M Bello.,...,muheebat18@gmail.com,sundayknorbert@yahoo.com,,,,,,,,
9,33,Abdallah Ladan.,Hassan Muhammad Galadanci.,Kamaru Muhammad.,Nura Muazu.,Buharismalami.,Muhammad Ali Hamza.,Hassanmuhammadgaladanci.,Nura Shehu .,Sany Mohd Sokoto.,...,,,,,,,,,,


In [145]:
# Prepared state level list of supervisors

In [146]:
# set index to be state_num - same as siteid for state
state = state.set_index('siteid')
# remove name of index
state.index.name = None


In [147]:
state

Unnamed: 0,sno1name,sno2name,sno3name,sno4name,sno5name,sno6name,sno7name,sno8name,sno9name,sno10name,...,sno9mail,sno10mail,sno11mail,sno12mail,sno13mail,sno14mail,sno15mail,sno16mail,sno17mail,sno18mail
2,Ijagila Mark .,Reuben Aidaticha.,Wullanga Alfred.,Wullanga Alfred,Hauwa Zoakah.,Olawumi Monica Ajayi.,Reuben Aidaticha.,Hauwa Zoakah .,,,...,,,,,,,,,,
5,Sama'ila Usman Maikan.,Ali Shehu Kobi .,Yakubu Baba.,Charity Evans Nysalamke.,Jackson Ladu Martins.,Hamza Yakubu Sade .,Habu Abdulmalik Dauda.,,,,...,,,,,,,,,,
8,Hassana Suleiman Jibrin.,Abdullahi Alhaji Madi.,"Amarachi, Clementina Chukwuma.",Daniel James .,Aminu Usman Danzomo.,Magret Ayuba.,,,,,...,,,,,,,,,,
16,Ronas Amos Amusa .,Rukayya Lawal.,Ibrahim Inuwa Lano.,Selamawit Negash.,Suleiman Mamman.,Ahmed Audu Saddana.,Usman Baraya.,Olufunmilayo Adepoju-adebambo.,,,...,,,,,,,,,,
17,MUSA MOHAMMED HADEJIA .,Temidayo Esther Ajala.,Olatomiwa Olabisi.,Saidu Umar Adamu.,,,,,,,...,,,,,,,,,,
18,Hauwa Usman .,Maryam Yusuf .,Saratu Aduwak.,Jane Gwani.,,,,,,,...,,,,,,,,,,
19,Abigail Ishaya Nyam .,Akilu Sani.,Sabo Wada.,Nyako William Azi.,Ayodeji Osunkentan.,Murtala M Inuwa.,"Abolarin, Samuel Sesan.",Ado Mustapha.,Ado Jibrin Sanda.,Olalekan Wahab Runmonkun .,...,adosanda@gmail.com,lekrunmon@yahoo.com,nyakoabi@yahoo.com,dgauji@yahoo.com,msaniabdullahi@gmail.com,mikekush70@yahoo.com,sabowada57@gmail.com,,ffekura@gmail.com,akilumfashi@gmail.com
20,Rabia Mohammed Sno .,Yusufahmadu Gangara.,Musbahu Hamisu.,Abdulhadi Abdulkadir.,Ado Ibrahim Abdulrahman.,Hamisu Idris K/bai.,Abdulmumin Lawan.,Sim Karla Sakyenu.,Ibrahim Maje Sayyadi.,Zayyana Isyaku Sule.,...,ibrahimmajes@hotmail.com,zayyanais@gmailcom,,,,,,,,
21,Abimbola Abosede Sobo.,Aliyu Galadima Libata.,Abisola Mary Atoyebi.,Shamsu Muhammed.,Abdulmalik Muhammad Illo.,Abdulmalik Muhammad Illo .,Aliyu Galadima Libata .,Beatrice Kwere.,Suwaiba M Bello.,Sunday Norbert.,...,muheebat18@gmail.com,sundayknorbert@yahoo.com,,,,,,,,
33,Abdallah Ladan.,Hassan Muhammad Galadanci.,Kamaru Muhammad.,Nura Muazu.,Buharismalami.,Muhammad Ali Hamza.,Hassanmuhammadgaladanci.,Nura Shehu .,Sany Mohd Sokoto.,,...,,,,,,,,,,


In [148]:
# Create same list for LGA
# create db with only LGA Level supervision staff
lga_df = supervision_df[supervision_df.siteid >= 101]
lga_df = lga_df[lga_df['siteid'] <= 3799]


In [149]:
lga_df

Unnamed: 0,siteid,name,num,mail,count
1761,201,Edina Richard.,,,1
1341,202,Monday Leasado.,,,1
1759,202,Monday Leasado.,,,2
1760,202,Monday Leasado.,,,3
1998,203,Naaticha Waziri .,,naatiwaziri69@gmail.com,1
1728,204,Rose Zidon .,,,1
1757,204,Rose Mbamuno Zidon,,,2
2023,205,Esther Danjuma.,,,1
2268,205,Esther Danjuma.,,,2
1945,206,Hannatu B Usman.,,hannatubbu@gmail.com,1


In [136]:
# convert vertical to horizontal database. 
# will None in the cells cause us to send excessive number of warning SMS? 
lga_wide = lga_df.pivot(index='siteid', columns='count')

In [137]:
lga_wide

Unnamed: 0_level_0,name,name,name,name,name,urn,urn,urn,urn,urn,mail,mail,mail,mail,mail
count,1,2,3,4,5,1,2,3,4,5,1,2,3,4,5
siteid,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
201,Edina Richard.,,,,,tel:+2348131816143,,,,,,,,,
202,Monday Leasado.,Monday Leasado.,Monday Leasado.,,,tel:+2348065356507,tel:+2348038056048,tel:+2348086625852,,,,,,,
203,Naaticha Waziri .,,,,,tel:+2348037675670,,,,,naatiwaziri69@gmail.com,,,,
204,Rose Zidon .,Rose Mbamuno Zidon,,,,tel:+2347065739356,tel:+2349086559594,,,,,,,,
205,Esther Danjuma.,Esther Danjuma.,,,,tel:+2348025568649,tel:+2347030635580,,,,,,,,
206,Hannatu B Usman.,,,,,tel:+2348064811859,,,,,hannatubbu@gmail.com,,,,
207,Aishatu Samaila.,Aishatu Samaila.,,,,tel:+2348022645708,tel:+2348034154949,,,,,,,,
208,Wubbewasu David .,Apollos Elkanah.,,,,tel:+2348174118980,tel:+2348029839844,,,,,apollospola@gmail,,,
209,Patience Zomti Douglas.,,,,,tel:+2348065754427,,,,,,,,,
210,Ularamai Raphael .,Adama Abubakar.,,,,tel:+2347087576976,tel:+2349078543500,,,,,,,,


In [101]:
# Rename columns and correct multiIndex
lga_wide.columns = ["lga" + str(lga_wide.columns[i][1]) + (lga_wide.columns[i][0]) for i in range(len(lga_wide.columns))]

In [102]:
lga = lga_wide.reset_index()

In [103]:
lga

Unnamed: 0,siteid,lgalg,lgalg.1,lgalg.2,lgalg.3,lgalg.4,lgalg.5,lgalg.6,lgalg.7,lgalg.8,lgalg.9,lgalg.10,lgalg.11,lgalg.12,lgalg.13,lgalg.14
0,201,Edina Richard.,,,,,tel:+2348131816143,,,,,,,,,
1,202,Monday Leasado.,Monday Leasado.,Monday Leasado.,,,tel:+2348065356507,tel:+2348038056048,tel:+2348086625852,,,,,,,
2,203,Naaticha Waziri .,,,,,tel:+2348037675670,,,,,naatiwaziri69@gmail.com,,,,
3,204,Rose Zidon .,Rose Mbamuno Zidon,,,,tel:+2347065739356,tel:+2349086559594,,,,,,,,
4,205,Esther Danjuma.,Esther Danjuma.,,,,tel:+2348025568649,tel:+2347030635580,,,,,,,,
5,206,Hannatu B Usman.,,,,,tel:+2348064811859,,,,,hannatubbu@gmail.com,,,,
6,207,Aishatu Samaila.,Aishatu Samaila.,,,,tel:+2348022645708,tel:+2348034154949,,,,,,,,
7,208,Wubbewasu David .,Apollos Elkanah.,,,,tel:+2348174118980,tel:+2348029839844,,,,,apollospola@gmail,,,
8,209,Patience Zomti Douglas.,,,,,tel:+2348065754427,,,,,,,,,
9,210,Ularamai Raphael .,Adama Abubakar.,,,,tel:+2347087576976,tel:+2349078543500,,,,,,,,


In [69]:
# Change None in cells to blank
for cell in lga:
   lga[cell].fillna(value='', inplace=True)

In [70]:
# lga = lga.fillna
# changes df format and leaves None in place. 

In [71]:
lga['state_num'] = 0

In [72]:
# Add state_num to LGA df
lga['siteid_lgt'] = lga['siteid'].astype(str).str.len()

lga['state_num'] = np.where(lga['siteid_lgt']==3, lga['siteid'].astype(str).str[:1], lga['state_num'])
lga['state_num'] = np.where(lga['siteid_lgt']==4, lga['siteid'].astype(str).str[:2], lga['state_num'])
lga = lga.drop('siteid_lgt', axis=1)

# cast state_num to INT or merge will not work
lga['state_num'] = lga['state_num'].astype(int)
lga['state_num'].value_counts()

8     21
2     21
35    17
21    15
20    15
17    12
33     9
36     7
19     6
16     3
5      3
18     2
Name: state_num, dtype: int64

In [73]:
lga.dtypes

siteid        int64
lganame1     object
lganame2     object
lganame3     object
lganame4     object
lganame5     object
lgaurn1      object
lgaurn2      object
lgaurn3      object
lgaurn4      object
lgaurn5      object
lgamail1     object
lgamail2     object
lgamail3     object
lgamail4     object
lgamail5     object
state_num     int64
dtype: object

In [409]:
# Prepared LGA level supervision list


In [74]:
state

Unnamed: 0,snoname1,snoname2,snoname3,snoname4,snoname5,snoname6,snoname7,snoname8,snoname9,snoname10,...,snomail9,snomail10,snomail11,snomail12,snomail13,snomail14,snomail15,snomail16,snomail17,snomail18
2,Ijagila Mark .,Reuben Aidaticha.,Wullanga Alfred.,Wullanga Alfred,Hauwa Zoakah.,Olawumi Monica Ajayi.,Reuben Aidaticha.,Hauwa Zoakah .,,,...,,,,,,,,,,
5,Sama'ila Usman Maikan.,Ali Shehu Kobi .,Yakubu Baba.,Charity Evans Nysalamke.,Jackson Ladu Martins.,Hamza Yakubu Sade .,Habu Abdulmalik Dauda.,,,,...,,,,,,,,,,
8,Hassana Suleiman Jibrin.,Abdullahi Alhaji Madi.,"Amarachi, Clementina Chukwuma.",Daniel James .,Aminu Usman Danzomo.,Magret Ayuba.,,,,,...,,,,,,,,,,
16,Ronas Amos Amusa .,Rukayya Lawal.,Ibrahim Inuwa Lano.,Selamawit Negash.,Suleiman Mamman.,Ahmed Audu Saddana.,Usman Baraya.,Olufunmilayo Adepoju-adebambo.,,,...,,,,,,,,,,
17,MUSA MOHAMMED HADEJIA .,Temidayo Esther Ajala.,Olatomiwa Olabisi.,Saidu Umar Adamu.,,,,,,,...,,,,,,,,,,
18,Hauwa Usman .,Maryam Yusuf .,Saratu Aduwak.,Jane Gwani.,,,,,,,...,,,,,,,,,,
19,Abigail Ishaya Nyam .,Akilu Sani.,Sabo Wada.,Nyako William Azi.,Ayodeji Osunkentan.,Murtala M Inuwa.,"Abolarin, Samuel Sesan.",Ado Mustapha.,Ado Jibrin Sanda.,Olalekan Wahab Runmonkun .,...,adosanda@gmail.com,lekrunmon@yahoo.com,nyakoabi@yahoo.com,dgauji@yahoo.com,msaniabdullahi@gmail.com,mikekush70@yahoo.com,sabowada57@gmail.com,,ffekura@gmail.com,akilumfashi@gmail.com
20,Rabia Mohammed Sno .,Yusufahmadu Gangara.,Musbahu Hamisu.,Abdulhadi Abdulkadir.,Ado Ibrahim Abdulrahman.,Hamisu Idris K/bai.,Abdulmumin Lawan.,Sim Karla Sakyenu.,Ibrahim Maje Sayyadi.,Zayyana Isyaku Sule.,...,ibrahimmajes@hotmail.com,zayyanais@gmailcom,,,,,,,,
21,Abimbola Abosede Sobo.,Aliyu Galadima Libata.,Abisola Mary Atoyebi.,Shamsu Muhammed.,Abdulmalik Muhammad Illo.,Abdulmalik Muhammad Illo .,Aliyu Galadima Libata .,Beatrice Kwere.,Suwaiba M Bello.,Sunday Norbert.,...,muheebat18@gmail.com,sundayknorbert@yahoo.com,,,,,,,,
33,Abdallah Ladan.,Hassan Muhammad Galadanci.,Kamaru Muhammad.,Nura Muazu.,Buharismalami.,Muhammad Ali Hamza.,Hassanmuhammadgaladanci.,Nura Shehu .,Sany Mohd Sokoto.,,...,,,,,,,,,,


In [103]:
state.dtypes

snoname1     object
snoname2     object
snoname3     object
snoname4     object
snoname5     object
snoname6     object
snoname7     object
snoname8     object
snoname9     object
snoname10    object
snoname11    object
snoname12    object
snoname13    object
snoname14    object
snoname15    object
snoname16    object
snoname17    object
snoname18    object
snourn1      object
snourn2      object
snourn3      object
snourn4      object
snourn5      object
snourn6      object
snourn7      object
snourn8      object
snourn9      object
snourn10     object
snourn11     object
snourn12     object
snourn13     object
snourn14     object
snourn15     object
snourn16     object
snourn17     object
snourn18     object
snomail1     object
snomail2     object
snomail3     object
snomail4     object
snomail5     object
snomail6     object
snomail7     object
snomail8     object
snomail9     object
snomail10    object
snomail11    object
snomail12    object
snomail13    object
snomail14    object


In [75]:
# merge state df to lga df

# imam_sup = pd.merge(lga,state, on='state_num', suffixes=['_lga','_st'])

# imam_sup = pd.merge(lga, state, on='state_num')


# imam_sup = lga.join(state, how='inner')

# imam_sup = lga.merge(state)

# imam_sup = lga.merge(state, on=['state_num'])

# imam_sup = lga.join(state, on=['state_num'])

imam_sup = pd.merge(lga, state, left_on='state_num', right_index=True, how='left', sort=False)

# imam_sup = lga.join(state, on='state_num')

imam_sup

Unnamed: 0,siteid,lganame1,lganame2,lganame3,lganame4,lganame5,lgaurn1,lgaurn2,lgaurn3,lgaurn4,...,snomail9,snomail10,snomail11,snomail12,snomail13,snomail14,snomail15,snomail16,snomail17,snomail18
0,201,Edina Richard.,,,,,tel:+2348131816143,,,,...,,,,,,,,,,
1,202,Monday Leasado.,Monday Leasado.,Monday Leasado.,,,tel:+2348065356507,tel:+2348038056048,tel:+2348086625852,,...,,,,,,,,,,
2,203,Naaticha Waziri .,,,,,tel:+2348037675670,,,,...,,,,,,,,,,
3,204,Rose Zidon .,Rose Mbamuno Zidon,,,,tel:+2347065739356,tel:+2349086559594,,,...,,,,,,,,,,
4,205,Esther Danjuma.,Esther Danjuma.,,,,tel:+2348025568649,tel:+2347030635580,,,...,,,,,,,,,,
5,206,Hannatu B Usman.,,,,,tel:+2348064811859,,,,...,,,,,,,,,,
6,207,Aishatu Samaila.,Aishatu Samaila.,,,,tel:+2348022645708,tel:+2348034154949,,,...,,,,,,,,,,
7,208,Wubbewasu David .,Apollos Elkanah.,,,,tel:+2348174118980,tel:+2348029839844,,,...,,,,,,,,,,
8,209,Patience Zomti Douglas.,,,,,tel:+2348065754427,,,,...,,,,,,,,,,
9,210,Ularamai Raphael .,Adama Abubakar.,,,,tel:+2347087576976,tel:+2349078543500,,,...,,,,,,,,,,


In [77]:
# is Statename# null ? 
imam_sup.snoname1.value_counts()

Ijagila Mark .               21
Hassana Suleiman Jibrin.     21
Ahmed Bulama Musa.           17
Rabia Mohammed Sno .         15
Abimbola Abosede Sobo.       15
MUSA  MOHAMMED  HADEJIA .    12
Abdallah Ladan.               9
Aliyu Ibrahim.                7
Abigail Ishaya Nyam .         6
Sama'ila Usman Maikan.        3
Ronas Amos Amusa .            3
Hauwa Usman .                 2
Name: snoname1, dtype: int64

In [78]:
# Test Export as Excel 
filename = "IMAM_state_lga_test.xlsx"
writer = pd.ExcelWriter(filename, engine='xlsxwriter')
imam_sup.to_excel(writer,'Sheet1')
writer.save()
writer.close()

In [79]:
# SiteID is now NaN for state. 
# can convert this to SiteID later.
imam_sup['lga_num'] = imam_sup['siteid'] 

# imam_sup.state_num.value_counts()

In [91]:
# merge to implementation staff df
imam_imp = df2.query('siteid > 3699')
imam_imp.siteid.value_counts()

3503210022    8
3512110004    8
2106110004    7
3511110005    7
3603110029    7
2106110020    7
806110012     6
1714110008    6
3513110005    6
2118110015    6
1703110012    6
3306110017    6
3305110027    6
1702110010    5
821110052     5
1717110001    5
3317110023    5
2001110059    5
2021210016    5
821110057     5
1940110023    5
821110009     5
2112110009    5
2118110014    5
3301110011    5
2011120005    5
2034110018    5
2104110014    5
2011110029    5
3321110020    5
             ..
813110044     1
207110089     1
1802110018    1
3502110014    1
2108210020    1
3613210035    1
1805110047    1
3516110004    1
2001110060    1
837110010     1
3514110005    1
2010110006    1
1930110002    1
813110034     1
1929110001    1
3505110037    1
1720110003    1
1805110031    1
809110004     1
821110053     1
3502110034    1
3605110051    1
208110016     1
821110059     1
821110061     1
208110012     1
809110008     1
208110010     1
3512110033    1
824110023     1
Name: siteid, dtype: int

In [92]:
# Change the order (the index) of the columns
columnsTitles = ['name',
                 'urn',
                 'siteid', 
                 'lga_num', 
                 'state_num' 
                 ]

imam_imp = imam_imp.reindex(columns=columnsTitles)

In [93]:
len(imam_imp)


2415

In [94]:
imam_imp

Unnamed: 0,name,urn,siteid,lga_num,state_num
1,Munirat Makama.,tel:+2348165422728,2003110030,2003.0,20.0
3,Kwatam Dandy.,tel:+2348063455263,821110032,821.0,8.0
4,Amina Musa T.,tel:+2348160109849,811110033,811.0,8.0
5,HARUNA ABDULLAHI.,tel:+2348021010530,805110012,805.0,8.0
6,Halima Muhammed.,tel:+2349076655644,821110031,821.0,8.0
7,Umar Abubakar.,tel:+2348126752275,211110047,211.0,2.0
8,Umar Abubakar.,tel:+2348080488082,211110047,211.0,2.0
9,IBRAHIM LAMARA.,tel:+2348087760645,504110010,504.0,5.0
10,Lami Isaac.,tel:+2348135016269,821110029,821.0,8.0
11,Elizabeth Musa.,tel:+2348029266456,821110029,821.0,8.0


In [95]:
# del imam_supervision

In [96]:
# merge supervision df to implementation df

# imam_supervision = imam_imp.join(imam_sup, suffixes=('_x', '_y'), on='lga_num')

imam_supervision = pd.merge(imam_imp, imam_sup, left_on='lga_num', right_on='lga_num', how='left', 
                            suffixes=('_x', '_y'), sort=False)


# imam_supervision = imam_imp.join(imam_sup, suffixes=('_x', '_y'), on='lga_num')

In [97]:
# Name, Phone, SiteID, SiteName, State, LGA, LGA name phone email, State name phone email
imam_supervision

Unnamed: 0,name,urn,siteid_x,lga_num,state_num_x,siteid_y,lganame1,lganame2,lganame3,lganame4,...,snomail9,snomail10,snomail11,snomail12,snomail13,snomail14,snomail15,snomail16,snomail17,snomail18
0,Munirat Makama.,tel:+2348165422728,2003110030,2003.0,20.0,2003.0,Assama'u Tukur.,Abdulkadir Muhammad Yasore.,,,...,ibrahimmajes@hotmail.com,zayyanais@gmailcom,,,,,,,,
1,Kwatam Dandy.,tel:+2348063455263,821110032,821.0,8.0,821.0,Hadiza Shettima.,,,,...,,,,,,,,,,
2,Amina Musa T.,tel:+2348160109849,811110033,811.0,8.0,811.0,Mairo musa.,,,,...,,,,,,,,,,
3,HARUNA ABDULLAHI.,tel:+2348021010530,805110012,805.0,8.0,805.0,Aishatu Abubakar.,Aishatu Abubakar.,,,...,,,,,,,,,,
4,Halima Muhammed.,tel:+2349076655644,821110031,821.0,8.0,821.0,Hadiza Shettima.,,,,...,,,,,,,,,,
5,Umar Abubakar.,tel:+2348126752275,211110047,211.0,2.0,211.0,Yakubu Suleiman.,Yakubu Suleiman.,,,...,,,,,,,,,,
6,Umar Abubakar.,tel:+2348080488082,211110047,211.0,2.0,211.0,Yakubu Suleiman.,Yakubu Suleiman.,,,...,,,,,,,,,,
7,IBRAHIM LAMARA.,tel:+2348087760645,504110010,504.0,5.0,504.0,Ahmed Idi Dagauda.,,,,...,,,,,,,,,,
8,Lami Isaac.,tel:+2348135016269,821110029,821.0,8.0,821.0,Hadiza Shettima.,,,,...,,,,,,,,,,
9,Elizabeth Musa.,tel:+2348029266456,821110029,821.0,8.0,821.0,Hadiza Shettima.,,,,...,,,,,,,,,,


In [112]:
#convert to xls
# and/or export as JSON


In [90]:
imam_supervision['sitename'] = imam_supervision['siteid_x'].map(lambda x: Site.objects.get(siteid=x).sitename.strip() if Site.objects.filter(siteid=x) else "")

Unnamed: 0,name,urn,siteid_x,mail,lga_num,state_num_x,siteid_y,lganame1,lganame2,lganame3,...,snomail10,snomail11,snomail12,snomail13,snomail14,snomail15,snomail16,snomail17,snomail18,sitename
0,Munirat Makama.,tel:+2348165422728,2003110030,,2003.0,20.0,2003.0,Assama'u Tukur.,Abdulkadir Muhammad Yasore.,,...,zayyanais@gmailcom,,,,,,,,,KANDAWA
1,Kwatam Dandy.,tel:+2348063455263,821110032,,821.0,8.0,821.0,Hadiza Shettima.,,,...,,,,,,,,,,CBN IDP Camp
2,Amina Musa T.,tel:+2348160109849,811110033,,811.0,8.0,811.0,Mairo musa.,,,...,,,,,,,,,,PHC Gwoza
3,HARUNA ABDULLAHI.,tel:+2348021010530,805110012,,805.0,8.0,805.0,Aishatu Abubakar.,Aishatu Abubakar.,,...,,,,,,,,,,GUR
4,Halima Muhammed.,tel:+2349076655644,821110031,,821.0,8.0,821.0,Hadiza Shettima.,,,...,,,,,,,,,,Teachers Village IDP Camp


In [None]:
imam_supervision.head()

In [124]:
Site.objects.all()

<QuerySet [<Site: Site Bagega PHC>, <Site: Site Kasumka Comm Disp>, <Site: Site Kawaye Disp>, <Site: Site Makakari Disp>, <Site: Site Tungar Daji Disp (Anka)>, <Site: Site Tungar Kudaku >, <Site: Site Barayar Zaki Primary HC>, <Site: Site Bardi Disp>, <Site: Site Dutsin Dan Ajiya Disp>, <Site: Site Rafin Gero PHC>, <Site: Site Anka WCW Clinic>, <Site: Site Anka Psychiatric Hosp.>, <Site: Site Gargam Dispensary>, <Site: Site Galadunci Desp.>, <Site: Site inwala Disp>, <Site: Site Kadadabba Disp>, <Site: Site Abare Disp>, <Site: Site Anka GH>, <Site: Site Anka orphans and less previlaged Clinic>, <Site: Site Dareta Dispensary>, '...(remaining elements truncated)...']>

In [None]:
# Change None in cells to blank
for cell in state:
   state[cell].fillna(value='', inplace=True)

In [None]:
# are any SiteIDs NaN ?

# print df.siteid.str.isdigit()
# df['siteid'].str.contains('^\d+$')

# SiteIDs in PostGres are BigInt thus strings are not allowed. 

# In load contact data for the SiteID, 
# we strip all following text after 9 or 10 digit number
# we convert all letter Os to zeros
# we delete all entries that have text before the siteID. 

In [None]:
# are any SiteIDs NaN ?
df.query('siteid!=siteid')

In [None]:
df2['siteid'] = pd.to_numeric(df2['siteid'], errors='coerce')
# No change

# Remove NaN from siteid

# If you don't specify a variable this will drop any rows including a NaN
df2 = df2.dropna(subset=['siteid'])
# No change

df2 = df2.query('siteid<3699990999')
# No change

In [None]:
df2['siteid'] = df2['siteid'].astype(int)
# No change

In [None]:
# What is the type of siteid
print type(df2['siteid'])
# This does not tell us anything about the variable

In [None]:
df2.dtypes

In [None]:
# do any SiteIDs include the letter o instead of number zero? 
foo = pd.DataFrame({'a' : [1,2,3,4], 'b' : ['hi', 'ooo', 'fat', 'cat']})
foo[foo['b'].str.contains('ooo')]  

# Regex & Data Cleaning
# https://trendct.org/2016/08/05/real-world-data-cleanup-with-python-and-pandas/

# df[df['siteid'].str.contains('ooo')]

# can only use .str accessor with string
#.str.contains('\D')

In [None]:
# To replace
# data['result'].replace(regex=True,inplace=True,to_replace=r'\D',value=r'')


In [None]:
# How many registrations in supervision ? 
len(supervision_df['siteid'])

# Many supervision staff did not record their post - should recode them all as supervisors
supervision_df['post'].str.upper().value_counts()
# str.upper() does not make permanent change. 

# I think there was a data entry error with Post because most people entered their post correctly
# also there should not be any HC, HW in the supervision cadres

In [None]:
pd.unique(state_df.name.ravel())
# Ravel - Return the flattened underlying data as an ndarray