# Boolean Algebra & first order logic

In [26]:
# decomment if pandas or faker not installed in your env
#!pip install pandas
#!pip install faker

## 1 Generating fake data (person details type data)


In [15]:
### Function to generate Fake dataframe
def gen_fake_data(fake_config, fake):
    data = []
    for i in range(fake_config['nrows']):
        fake_types = fake_config['fake_types']
        data.append(
            {
                el.get('column_name', None) or el['fake_type']:
                    getattr(fake,el['fake_type'])() if (el.get('kwargs') is None) else getattr(fake,el['fake_type'])(**el.get('kwargs')) 
                    for el in fake_types 
                    }
        )
    return data

In [16]:
### Config argument to generate the dataframe
fake_config = {
  "fake_types": [
    {
      "fake_type": "first_name",
    },
    {
      "fake_type": "last_name",
    },
    {
      "fake_type": "email",
    },
    {
      "fake_type": "country_code"
    },
    {
      "fake_type": "phone_number"
    },
    {
        "fake_type": "job",
    },
    {
        "fake_type": "currency_code",
    }
  ],
  "nrows": 8000
}


In [17]:
# Instantiate fake data
import pandas as pd
from faker import Faker
fake = Faker()
fake.seed(0)


ldic = gen_fake_data(fake_config, fake)
df = pd.DataFrame(ldic)

In [18]:
df.head()

Unnamed: 0,country_code,currency_code,email,first_name,job,last_name,phone_number
0,MU,BMD,williamsullivan@turner.org,Megan,Hospital pharmacist,Chang,194.892.4115
1,UZ,KGS,swoods@gmail.com,Ryan,Retail banker,Carr,(016)097-5351x39332
2,NG,SYP,crodriguez@hotmail.com,Christopher,Colour technologist,Flores,+1-839-894-7196x5934
3,YE,NOK,sheltondavid@johnson.biz,Antonio,Information systems manager,Zavala,+1-848-339-6947
4,SN,MXN,darrellmatthews@lee-smith.com,Isabella,Occupational hygienist,Haas,256-012-3098x9101


## 2 Define our operators and extractors

### 2.1 operators

In [19]:
# Define logical operators
from functools import reduce

def and_(*conditions):
    # the classical logical operator "and" is binary, this one is n-ary to facilitate mainuplation 
    # instead of doing and_(and_(c1, c2),c3) we do and_(c1, c2, c3)
    # when applied on every colum, it plays the role of the operator ∀(c)
    return reduce(lambda x, y : x & y, conditions)

def or_(*conditions):
    # the classical logical operator "or" is binary, this one is n-ary to facilitate mainuplation 
    # instead of doing or_(or_(c1, c2),c3) we do or_(c1, c2, c3)
    # when applied on every colum, it plays the role of the operator ∃(c)
    return reduce(lambda x, y: x | y, conditions)

def not_(condition):
    # plays the role of the classical "not" operator
    return ~condition

### 2.2 extractors 

In [42]:

def get_condition_on_df_col(df, column, lambda_func):
    # return a serie of truth valuation {True, False} of the condition lambda_func for every cell in the column 
    return df[column].apply(lambda row : lambda_func(row))

def get_condition_on_at_least_one_col(df, lambda_func, columns = []):
    if columns == []:
        columns = df.columns 
    return or_(*[get_condition_on_df_col(df,  column, lambda_func) for column in columns ])

def get_condition_on_every_col(df, lambda_func, columns = []):
    if columns == []:
        columns = df.columns 
    return and_(*[get_condition_on_df_col(df,  column, lambda_func) for column in columns ])

## 3 Questions answering

### 3.1 A person who her first name starts with an a

In [43]:
#A person who her first name starts with an a
lambda_func_startswith_a = lambda cel : cel.lower().startswith('a')
column = 'first_name'
c1 = get_condition_on_df_col(df, column, lambda_func_startswith_a)
df[c1].head()

Unnamed: 0,country_code,currency_code,email,first_name,job,last_name,phone_number
3,YE,NOK,sheltondavid@johnson.biz,Antonio,Information systems manager,Zavala,+1-848-339-6947
11,SA,FJD,mwilliams@hotmail.com,Angela,Chartered legal executive (England and Wales),Potts,+1-765-823-6940
14,DM,ARS,lklein@gmail.com,Angela,"Conservation officer, nature",Roberts,001-926-351-1087x31764
18,EG,HRK,mallorybarton@reyes-martin.org,Ashley,Risk analyst,Cook,001-500-762-7912
19,RW,BSD,osborneandrew@anderson-nguyen.info,Austin,Regulatory affairs officer,Owens,(710)979-5194


### 3.2 A person who her last name start with a b

In [44]:
lambda_func_startswith_b = lambda cel : cel.lower().startswith('b')
column = 'last_name'
c2 = get_condition_on_df_col(df, column, lambda_func_startswith_b)
df[c2].head()

Unnamed: 0,country_code,currency_code,email,first_name,job,last_name,phone_number
17,KZ,GHS,zchang@hotmail.com,Wayne,"Psychologist, counselling",Berry,(896)769-9300
31,TD,KWD,monica50@byrd.com,Jennifer,"Designer, interior/spatial",Bullock,001-567-088-6977
53,MH,VEF,serranomatthew@hotmail.com,Michele,Clinical embryologist,Baker,8400652822
100,ML,JEP,harrellandre@yahoo.com,Rachel,"Pharmacist, community",Beck,419-493-0239
112,VE,JPY,charlotteadams@benson.info,James,"Geologist, engineering",Beard,645.935.6213x57725


### 3.3 A person who her first name doesn't end with an a and her last name doesn't end with a b


In [45]:
lambda_func_endswith_a = lambda cel : cel.lower().endswith('a')
column = 'first_name'
c3A = get_condition_on_df_col(df, column, lambda_func_endswith_a)
lambda_func_endswith_b = lambda cel : cel.lower().endswith('b')
column = 'last_name'
c3B = get_condition_on_df_col(df, column, lambda_func_endswith_b)
c3 = and_(not_(c3B), not_(c3A)) # could be written also as not_(and_(c3B, c3A))
df[c3].head()

Unnamed: 0,country_code,currency_code,email,first_name,job,last_name,phone_number
0,MU,BMD,williamsullivan@turner.org,Megan,Hospital pharmacist,Chang,194.892.4115
1,UZ,KGS,swoods@gmail.com,Ryan,Retail banker,Carr,(016)097-5351x39332
2,NG,SYP,crodriguez@hotmail.com,Christopher,Colour technologist,Flores,+1-839-894-7196x5934
3,YE,NOK,sheltondavid@johnson.biz,Antonio,Information systems manager,Zavala,+1-848-339-6947
6,MD,ALL,howardjessica@hotmail.com,Benjamin,Pathologist,Garcia,(302)258-4197


### 3.4 A person who her country code is not US and verify the condition (1 and 2) or 3


In [46]:
#A person who her country code is not US and verify the condition (1 and 2) or 3
lambda_func_country_code_not_us = lambda cel : cel != 'US'
column = 'country_code'
c4A = get_condition_on_df_col(df, column, lambda_func_country_code_not_us)
c4 = and_(c4A, or_(and_(c1, c2), c3)) 
df[c4].head()

Unnamed: 0,country_code,currency_code,email,first_name,job,last_name,phone_number
0,MU,BMD,williamsullivan@turner.org,Megan,Hospital pharmacist,Chang,194.892.4115
1,UZ,KGS,swoods@gmail.com,Ryan,Retail banker,Carr,(016)097-5351x39332
2,NG,SYP,crodriguez@hotmail.com,Christopher,Colour technologist,Flores,+1-839-894-7196x5934
3,YE,NOK,sheltondavid@johnson.biz,Antonio,Information systems manager,Zavala,+1-848-339-6947
6,MD,ALL,howardjessica@hotmail.com,Benjamin,Pathologist,Garcia,(302)258-4197


In [47]:
# 5 A person who verifies the condition 4 but doesn't verify the condition 1 and currency code isn't ZWD
c5 = and_(c4, or_(c1, c3))
df[c5].head()

Unnamed: 0,country_code,currency_code,email,first_name,job,last_name,phone_number
0,MU,BMD,williamsullivan@turner.org,Megan,Hospital pharmacist,Chang,194.892.4115
1,UZ,KGS,swoods@gmail.com,Ryan,Retail banker,Carr,(016)097-5351x39332
2,NG,SYP,crodriguez@hotmail.com,Christopher,Colour technologist,Flores,+1-839-894-7196x5934
3,YE,NOK,sheltondavid@johnson.biz,Antonio,Information systems manager,Zavala,+1-848-339-6947
6,MD,ALL,howardjessica@hotmail.com,Benjamin,Pathologist,Garcia,(302)258-4197


### 3.6 A person who verfies that at least one of his columns start with a +

In [48]:
#A person who verfies that at least one of his columns start with a +
lambda_func_startswith_plus = lambda cel : cel.lower().startswith('+')
c6 = get_condition_on_at_least_one_col(df, lambda_func_startswith_plus)
df[c6].head()

Unnamed: 0,country_code,currency_code,email,first_name,job,last_name,phone_number
2,NG,SYP,crodriguez@hotmail.com,Christopher,Colour technologist,Flores,+1-839-894-7196x5934
3,YE,NOK,sheltondavid@johnson.biz,Antonio,Information systems manager,Zavala,+1-848-339-6947
7,CI,PEN,ubanks@yahoo.com,Erin,Museum/gallery conservator,Mccullough,+1-715-084-2375
9,HN,PEN,amunoz@gmail.com,Jacqueline,Veterinary surgeon,Griffin,+1-063-812-0665x0300
11,SA,FJD,mwilliams@hotmail.com,Angela,Chartered legal executive (England and Wales),Potts,+1-765-823-6940


### 3.7 A person who verifies that at least one of his first name, job or last name start with an a

In [50]:
lambda_func_startswith_a = lambda cel : cel.lower().startswith('a')
columns = ['first_name', 'last_name', 'job']
c7 = get_condition_on_at_least_one_col(df, lambda_func_startswith_a, columns)
df[c7].head()

Unnamed: 0,country_code,currency_code,email,first_name,job,last_name,phone_number
3,YE,NOK,sheltondavid@johnson.biz,Antonio,Information systems manager,Zavala,+1-848-339-6947
10,MK,PGK,turnerhaley@gmail.com,Cheryl,Agricultural consultant,Lee,001-104-714-2851x2400
11,SA,FJD,mwilliams@hotmail.com,Angela,Chartered legal executive (England and Wales),Potts,+1-765-823-6940
14,DM,ARS,lklein@gmail.com,Angela,"Conservation officer, nature",Roberts,001-926-351-1087x31764
18,EG,HRK,mallorybarton@reyes-martin.org,Ashley,Risk analyst,Cook,001-500-762-7912


In [41]:
Using only boolean algebra 
A person who her first name starts with an a
A person who her last name start with a b
A person who her first name doesn't end with an a and her last name doesn't end with a b
A person who her country code is not US and verify the condition (1 and 2) or 3
A person who verifies the condition 4 but doesn't verify the condition 1 and currency code isn't ZWD

Using some 1st order logic
A person who verfies that at least one of his columns start with a +
A person who verifies that at least one of his first name, job or last name start with an a

SyntaxError: invalid syntax (<ipython-input-41-6ed7eebe17e0>, line 1)