# Pandas Tutorial 04

Filtering - Using Conditionals to Filter Rows and Columns

In [1]:
import pandas as pd

In [2]:
people = {
    "first": ["Corey", 'Jane', 'John'], 
    "last": ["Schafer", 'Doe', 'Doe'], 
    "email": ["CoreyMSchafer@gmail.com", 'JaneDoe@email.com', 'JohnDoe@email.com']
}

In [3]:
df_new = pd.DataFrame(people)

In [4]:
df_new

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


# Filtering .. 
Find all the rows with first name Jane. It will give you the boolean output each record wise.
Only it found in second record, thats why giving output True, False for other records

In [6]:
df_new['first'] == 'Jane'

0    False
1     True
2    False
Name: first, dtype: bool

In [7]:
filt = df_new['last'] == 'Doe'

In [8]:
filt

0    False
1     True
2     True
Name: last, dtype: bool

In [10]:
#This will return a dataframe
df_new[filt]

Unnamed: 0,first,last,email
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [13]:
# If I want only the email column
# Remember, filt will select all the rows and 'email' will take email from all rows
df_new.loc[filt, 'email']

1    JaneDoe@email.com
2    JohnDoe@email.com
Name: email, dtype: object

# AND(&) and OR(|) operator 
Let say we want all the names with lastname Doe and firstname John

In [17]:
filt = (df_new['last'] == 'Doe') & (df_new['first'] == 'John')

In [19]:
df_new[filt]

Unnamed: 0,first,last,email
2,John,Doe,JohnDoe@email.com


In [21]:
df_new.loc[filt, 'email']

2    JohnDoe@email.com
Name: email, dtype: object

# find email adress whose firstname is John or lastname is Doe

In [22]:
filt = (df_new['last'] == 'Doe') | (df_new['first'] == 'John')

In [23]:
df_new[filt]

Unnamed: 0,first,last,email
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [24]:
df_new.loc[filt, 'email']

1    JaneDoe@email.com
2    JohnDoe@email.com
Name: email, dtype: object

# Lets get the opposite -- find those emails whose firstname is not John or lastname is not Doe -- easy use ~ (tild) infront of filt

In [25]:
df_new.loc[~filt, 'email']

0    CoreyMSchafer@gmail.com
Name: email, dtype: object

# Lets go back to our stack overflow dataset

In [26]:
df = pd.read_csv("/home/mostafiz/Python-Practice/Pandas/DataSet/developer_survey_2019/survey_results_public.csv")

df_schema = pd.read_csv("/home/mostafiz/Python-Practice/Pandas/DataSet/developer_survey_2019/survey_results_schema.csv")

In [28]:
df

Unnamed: 0,Respondent,MainBranch,Hobbyist,OpenSourcer,OpenSource,Employment,Country,Student,EdLevel,UndergradMajor,...,WelcomeChange,SONewContent,Age,Gender,Trans,Sexuality,Ethnicity,Dependents,SurveyLength,SurveyEase
0,1,I am a student who is learning to code,Yes,Never,The quality of OSS and closed source software ...,"Not employed, and not looking for work",United Kingdom,No,Primary/elementary school,,...,Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,14.0,Man,No,Straight / Heterosexual,,No,Appropriate in length,Neither easy nor difficult
1,2,I am a student who is learning to code,No,Less than once per year,The quality of OSS and closed source software ...,"Not employed, but looking for work",Bosnia and Herzegovina,"Yes, full-time","Secondary school (e.g. American high school, G...",,...,Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,19.0,Man,No,Straight / Heterosexual,,No,Appropriate in length,Neither easy nor difficult
2,3,"I am not primarily a developer, but I write co...",Yes,Never,The quality of OSS and closed source software ...,Employed full-time,Thailand,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)",Web development or web design,...,Just as welcome now as I felt last year,Tech meetups or events in your area;Courses on...,28.0,Man,No,Straight / Heterosexual,,Yes,Appropriate in length,Neither easy nor difficult
3,4,I am a developer by profession,No,Never,The quality of OSS and closed source software ...,Employed full-time,United States,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",...,Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,22.0,Man,No,Straight / Heterosexual,White or of European descent,No,Appropriate in length,Easy
4,5,I am a developer by profession,Yes,Once a month or more often,"OSS is, on average, of HIGHER quality than pro...",Employed full-time,Ukraine,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",...,Just as welcome now as I felt last year,Tech meetups or events in your area;Courses on...,30.0,Man,No,Straight / Heterosexual,White or of European descent;Multiracial,No,Appropriate in length,Easy
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88878,88377,,Yes,Less than once a month but more than once per ...,The quality of OSS and closed source software ...,"Not employed, and not looking for work",Canada,No,Primary/elementary school,,...,,Tech articles written by other developers;Tech...,,Man,No,,,No,Appropriate in length,Easy
88879,88601,,No,Never,The quality of OSS and closed source software ...,,,,,,...,,,,,,,,,,
88880,88802,,No,Never,,Employed full-time,,,,,...,,,,,,,,,,
88881,88816,,No,Never,"OSS is, on average, of HIGHER quality than pro...","Independent contractor, freelancer, or self-em...",,,,,...,,,,,,,,,,


# Persons salary more than ....


In [34]:
high_salary = df['ConvertedComp'] > 1700000

In [35]:
df.loc[high_salary]

Unnamed: 0,Respondent,MainBranch,Hobbyist,OpenSourcer,OpenSource,Employment,Country,Student,EdLevel,UndergradMajor,...,WelcomeChange,SONewContent,Age,Gender,Trans,Sexuality,Ethnicity,Dependents,SurveyLength,SurveyEase
57,58,I am a developer by profession,Yes,Once a month or more often,"OSS is, on average, of LOWER quality than prop...",Employed full-time,United States,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",...,Just as welcome now as I felt last year,,47.0,Man,No,Straight / Heterosexual,White or of European descent,Yes,,Easy
101,102,I am a developer by profession,No,Never,The quality of OSS and closed source software ...,Employed full-time,United States,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",...,Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,37.0,Man,No,Straight / Heterosexual,White or of European descent,Yes,Too long,Easy
164,166,I am a developer by profession,Yes,Less than once per year,The quality of OSS and closed source software ...,Employed full-time,United States,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","A social science (ex. anthropology, psychology...",...,Just as welcome now as I felt last year,,30.0,Man,No,Straight / Heterosexual,White or of European descent,No,Appropriate in length,Neither easy nor difficult
434,436,I am a developer by profession,Yes,Less than once per year,The quality of OSS and closed source software ...,Employed full-time,United States,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",...,Just as welcome now as I felt last year,Tech articles written by other developers;Tech...,38.0,Man,No,Straight / Heterosexual,White or of European descent,Yes,Too long,Easy
450,452,I am a developer by profession,Yes,Never,The quality of OSS and closed source software ...,Employed full-time,United States,"Yes, full-time",I never completed any formal education,,...,Somewhat more welcome now than last year,Tech articles written by other developers;Cour...,35.0,Man,No,,White or of European descent,No,Appropriate in length,Easy
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88162,88712,I am a developer by profession,Yes,Less than once per year,"OSS is, on average, of HIGHER quality than pro...",Employed full-time,United States,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",...,Just as welcome now as I felt last year,,36.0,Man,No,Straight / Heterosexual,East Asian;White or of European descent;Biracial,Yes,Too long,Easy
88264,88815,I am a developer by profession,Yes,Never,The quality of OSS and closed source software ...,Employed full-time,United States,No,"Master’s degree (MA, MS, M.Eng., MBA, etc.)","Computer science, computer engineering, or sof...",...,Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,27.0,Man,No,Straight / Heterosexual,South Asian,No,Too long,Neither easy nor difficult
88266,88818,I am a developer by profession,Yes,Never,"OSS is, on average, of LOWER quality than prop...",Employed full-time,United States,No,Associate degree,Web development or web design,...,Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,28.0,Man,No,Straight / Heterosexual,White or of European descent,No,Too long,Easy
88321,88874,I am a developer by profession,Yes,Once a month or more often,"OSS is, on average, of LOWER quality than prop...",Employed full-time,United States,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","A social science (ex. anthropology, psychology...",...,Just as welcome now as I felt last year,,26.0,Man,No,Straight / Heterosexual,White or of European descent,No,Appropriate in length,Easy


In [39]:
df.loc[high_salary, ['Country', 'LanguageWorkedWith']] #Pass a list

Unnamed: 0,Country,LanguageWorkedWith
57,United States,C#;Java;SQL
101,United States,C#;HTML/CSS;JavaScript;SQL;TypeScript
164,United States,Bash/Shell/PowerShell;Go;HTML/CSS;Java;JavaScr...
434,United States,Bash/Shell/PowerShell;HTML/CSS;Java;JavaScript...
450,United States,SQL;VBA
...,...,...
88162,United States,Bash/Shell/PowerShell;C#;HTML/CSS;JavaScript;P...
88264,United States,HTML/CSS;Java;JavaScript;Kotlin;Python;SQL
88266,United States,C#;HTML/CSS;JavaScript;SQL;TypeScript
88321,United States,C++;Python;Scala;SQL


# Respondent from 5 different countries 

In [45]:
#Making a list of those countries
countries = ['Canada', 'Unites States', 'United Kigndom', 'Germany', 'Finland']
filt = df['Country'].isin(countries)

In [44]:
df.loc[filt, 'Country']

5         Canada
11        Canada
13       Germany
20        Canada
30        Canada
          ...   
88829    Germany
88838     Canada
88850    Germany
88855     Canada
88878     Canada
Name: Country, Length: 9807, dtype: object

# Find respondent who work with Python programming language
We can use string method here : str.contain()

In [57]:
filt = df['LanguageWorkedWith'].str.contains("Python", na = False)#It can not work with NAN values

In [58]:
df.loc[filt, "LanguageWorkedWith"]

0                          HTML/CSS;Java;JavaScript;Python
1                                      C++;HTML/CSS;Python
3                                      C;C++;C#;Python;SQL
4              C++;HTML/CSS;Java;JavaScript;Python;SQL;VBA
7        Bash/Shell/PowerShell;C;C++;HTML/CSS;Java;Java...
                               ...                        
88854    Bash/Shell/PowerShell;C;C++;HTML/CSS;Java;Java...
88860      Bash/Shell/PowerShell;C++;Python;Ruby;Other(s):
88865      Bash/Shell/PowerShell;HTML/CSS;Python;Other(s):
88872             C;C++;HTML/CSS;JavaScript;PHP;Python;SQL
88876                           HTML/CSS;JavaScript;Python
Name: LanguageWorkedWith, Length: 36443, dtype: object