In [1]:
import numpy as np
import pandas as pd

In [2]:
people={
    'first': ['Corey', 'Jane', 'John', 'Chris', np.nan, None,'NA'],
    'last': ['Schafer', 'Doe', 'Doe', 'Schafer', np.nan,np.nan, 'Missing'],
    'email': ['CoreyMSchafer@gmail.com', 'JaneDoe@gmail.com', 'JohnDoe@gmail.com', None, np.nan, 'Anonymous@gmail.com','NA'],
    'age': ['33', '55', '63', '36', None, None, 'Missing']
}

In [3]:
df = pd.DataFrame(people)

In [4]:
df

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@gmail.com,55
2,John,Doe,JohnDoe@gmail.com,63
3,Chris,Schafer,,36
4,,,,
5,,,Anonymous@gmail.com,
6,,Missing,,Missing


In [5]:
#drop rows missing values
df.dropna() 

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@gmail.com,55
2,John,Doe,JohnDoe@gmail.com,63
6,,Missing,,Missing


In [6]:
df.dropna(axis='index', how='any') #dropping wrt rows

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@gmail.com,55
2,John,Doe,JohnDoe@gmail.com,63
6,,Missing,,Missing


In [7]:
df.dropna(axis='index', how='all')

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@gmail.com,55
2,John,Doe,JohnDoe@gmail.com,63
3,Chris,Schafer,,36
5,,,Anonymous@gmail.com,
6,,Missing,,Missing


In [8]:
df.dropna(axis='columns', how='all') #dropping wrt columns

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@gmail.com,55
2,John,Doe,JohnDoe@gmail.com,63
3,Chris,Schafer,,36
4,,,,
5,,,Anonymous@gmail.com,
6,,Missing,,Missing


In [9]:
df.dropna(axis='columns', how='any')

0
1
2
3
4
5
6


In [10]:
#drop rows if they are missing data in some specific columns.
#we need the email address, if they dont have it then drop those rows
df.dropna(axis='index', how='any', subset=['email'])

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@gmail.com,55
2,John,Doe,JohnDoe@gmail.com,63
5,,,Anonymous@gmail.com,
6,,Missing,,Missing


In [12]:
df.dropna(axis='index', how='any', subset=['last','email'])

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@gmail.com,55
2,John,Doe,JohnDoe@gmail.com,63
6,,Missing,,Missing


In [13]:
#changes are not permanent
df

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@gmail.com,55
2,John,Doe,JohnDoe@gmail.com,63
3,Chris,Schafer,,36
4,,,,
5,,,Anonymous@gmail.com,
6,,Missing,,Missing


In [14]:
#making permanent changes
df=df.dropna(axis='index', how='any', subset=['last','email'])

In [15]:
df

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@gmail.com,55
2,John,Doe,JohnDoe@gmail.com,63
6,,Missing,,Missing


In [16]:
df = pd.DataFrame(people)
df

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@gmail.com,55
2,John,Doe,JohnDoe@gmail.com,63
3,Chris,Schafer,,36
4,,,,
5,,,Anonymous@gmail.com,
6,,Missing,,Missing


In [17]:
# make permanent changes by setting inplace=True
df.dropna(axis='index', how='any', subset=['last','email'], inplace=True)

In [18]:
df

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@gmail.com,55
2,John,Doe,JohnDoe@gmail.com,63
6,,Missing,,Missing


In [19]:
df = pd.DataFrame(people)
df

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@gmail.com,55
2,John,Doe,JohnDoe@gmail.com,63
3,Chris,Schafer,,36
4,,,,
5,,,Anonymous@gmail.com,
6,,Missing,,Missing


In [20]:
#replace NA or 'Missing values'
df.replace('NA', np.nan, inplace=True)
df.replace('Missing', np.nan, inplace=True)

In [21]:
df

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33.0
1,Jane,Doe,JaneDoe@gmail.com,55.0
2,John,Doe,JohnDoe@gmail.com,63.0
3,Chris,Schafer,,36.0
4,,,,
5,,,Anonymous@gmail.com,
6,,,,


In [22]:
df.isna() #checks if values are mssing or not

Unnamed: 0,first,last,email,age
0,False,False,False,False
1,False,False,False,False
2,False,False,False,False
3,False,False,True,False
4,True,True,True,True
5,True,True,False,True
6,True,True,True,True


In [23]:
# filling the missing values
df.fillna('MISSING')

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@gmail.com,55
2,John,Doe,JohnDoe@gmail.com,63
3,Chris,Schafer,MISSING,36
4,MISSING,MISSING,MISSING,MISSING
5,MISSING,MISSING,Anonymous@gmail.com,MISSING
6,MISSING,MISSING,MISSING,MISSING


In [24]:
df.fillna(0)

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@gmail.com,55
2,John,Doe,JohnDoe@gmail.com,63
3,Chris,Schafer,0,36
4,0,0,0,0
5,0,0,Anonymous@gmail.com,0
6,0,0,0,0


In [25]:
df['age']

0      33
1      55
2      63
3      36
4    None
5    None
6     NaN
Name: age, dtype: object

In [26]:
df['age'].mean()

TypeError: can only concatenate str (not "int") to str

In [27]:
type(np.nan)

float

In [28]:
df['age']=df['age'].astype(float)

In [29]:
df.dtypes

first     object
last      object
email     object
age      float64
dtype: object

In [30]:
df['age'].mean()

46.75

In [None]:
#convert whole dataframe to some dtype? use df.astype()

In [31]:
df=pd.read_csv('survey_results_public.csv')

In [32]:
df

Unnamed: 0,Respondent,MainBranch,Hobbyist,Age,Age1stCode,CompFreq,CompTotal,ConvertedComp,Country,CurrencyDesc,...,SurveyEase,SurveyLength,Trans,UndergradMajor,WebframeDesireNextYear,WebframeWorkedWith,WelcomeChange,WorkWeekHrs,YearsCode,YearsCodePro
0,1,I am a developer by profession,Yes,,13,Monthly,,,Germany,European Euro,...,Neither easy nor difficult,Appropriate in length,No,"Computer science, computer engineering, or sof...",ASP.NET Core,ASP.NET;ASP.NET Core,Just as welcome now as I felt last year,50.0,36,27
1,2,I am a developer by profession,No,,19,,,,United Kingdom,Pound sterling,...,,,,"Computer science, computer engineering, or sof...",,,Somewhat more welcome now than last year,,7,4
2,3,I code primarily as a hobby,Yes,,15,,,,Russian Federation,,...,Neither easy nor difficult,Appropriate in length,,,,,Somewhat more welcome now than last year,,4,
3,4,I am a developer by profession,Yes,25.0,18,,,,Albania,Albanian lek,...,,,No,"Computer science, computer engineering, or sof...",,,Somewhat less welcome now than last year,40.0,7,4
4,5,"I used to be a developer by profession, but no...",Yes,31.0,16,,,,United States,,...,Easy,Too short,No,"Computer science, computer engineering, or sof...",Django;Ruby on Rails,Ruby on Rails,Just as welcome now as I felt last year,,15,8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64456,64858,,Yes,,16,,,,United States,,...,,,,"Computer science, computer engineering, or sof...",,,,,10,Less than 1 year
64457,64867,,Yes,,,,,,Morocco,,...,,,,,,,,,,
64458,64898,,Yes,,,,,,Viet Nam,,...,,,,,,,,,,
64459,64925,,Yes,,,,,,Poland,,...,,,,,Angular;Angular.js;React.js,,,,,


In [36]:
df.head(10)

Unnamed: 0,Respondent,MainBranch,Hobbyist,Age,Age1stCode,CompFreq,CompTotal,ConvertedComp,Country,CurrencyDesc,...,SurveyEase,SurveyLength,Trans,UndergradMajor,WebframeDesireNextYear,WebframeWorkedWith,WelcomeChange,WorkWeekHrs,YearsCode,YearsCodePro
0,1,I am a developer by profession,Yes,,13,Monthly,,,Germany,European Euro,...,Neither easy nor difficult,Appropriate in length,No,"Computer science, computer engineering, or sof...",ASP.NET Core,ASP.NET;ASP.NET Core,Just as welcome now as I felt last year,50.0,36,27.0
1,2,I am a developer by profession,No,,19,,,,United Kingdom,Pound sterling,...,,,,"Computer science, computer engineering, or sof...",,,Somewhat more welcome now than last year,,7,4.0
2,3,I code primarily as a hobby,Yes,,15,,,,Russian Federation,,...,Neither easy nor difficult,Appropriate in length,,,,,Somewhat more welcome now than last year,,4,
3,4,I am a developer by profession,Yes,25.0,18,,,,Albania,Albanian lek,...,,,No,"Computer science, computer engineering, or sof...",,,Somewhat less welcome now than last year,40.0,7,4.0
4,5,"I used to be a developer by profession, but no...",Yes,31.0,16,,,,United States,,...,Easy,Too short,No,"Computer science, computer engineering, or sof...",Django;Ruby on Rails,Ruby on Rails,Just as welcome now as I felt last year,,15,8.0
5,6,I am a developer by profession,No,,14,,,,Germany,European Euro,...,Neither easy nor difficult,Appropriate in length,,,React.js,,,,6,4.0
6,7,I am a developer by profession,Yes,,18,Monthly,,,India,United States dollar,...,,,,"Computer science, computer engineering, or sof...",,,A lot more welcome now than last year,,6,4.0
7,8,I am a developer by profession,Yes,36.0,12,Yearly,116000.0,116000.0,United States,United States dollar,...,Easy,Appropriate in length,No,"Computer science, computer engineering, or sof...",Django;React.js;Vue.js,Flask,Just as welcome now as I felt last year,39.0,17,13.0
8,9,I am a developer by profession,No,30.0,20,,,,Tunisia,United States dollar,...,Neither easy nor difficult,Appropriate in length,No,"Computer science, computer engineering, or sof...",Angular.js,jQuery,Just as welcome now as I felt last year,50.0,6,4.0
9,10,I am a developer by profession,Yes,22.0,14,Yearly,25000.0,32315.0,United Kingdom,Pound sterling,...,Easy,Appropriate in length,No,Mathematics or statistics,Flask;jQuery,Flask;jQuery,Somewhat more welcome now than last year,36.0,8,4.0


In [37]:
# gives a 5 num summary of all the col with numerical values
df.describe()

Unnamed: 0,Respondent,Age,CompTotal,ConvertedComp,WorkWeekHrs
count,64461.0,45446.0,34826.0,34756.0,41151.0
mean,32554.079738,30.834111,3.190464e+242,103756.1,40.782174
std,18967.44236,9.585392,inf,226885.3,17.816383
min,1.0,1.0,0.0,0.0,1.0
25%,16116.0,24.0,20000.0,24648.0,40.0
50%,32231.0,29.0,63000.0,54049.0,40.0
75%,49142.0,35.0,125000.0,95000.0,44.0
max,65639.0,279.0,1.1111110000000001e+247,2000000.0,475.0


In [38]:
df.isna().sum()

Respondent                0
MainBranch              299
Hobbyist                 45
Age                   19015
Age1stCode             6561
                      ...  
WebframeWorkedWith    22182
WelcomeChange         11778
WorkWeekHrs           23310
YearsCode              6777
YearsCodePro          18112
Length: 61, dtype: int64

In [39]:
df['YearsCode']

0         36
1          7
2          4
3          7
4         15
        ... 
64456     10
64457    NaN
64458    NaN
64459    NaN
64460    NaN
Name: YearsCode, Length: 64461, dtype: object

In [40]:
df['YearsCode'].mean()

TypeError: can only concatenate str (not "int") to str

In [41]:
#this col not only contains NaN values but also string values
df['YearsCode']=df['YearsCode'].astype(float)

ValueError: could not convert string to float: 'Less than 1 year'

In [42]:
df['YearsCode'].unique()

array(['36', '7', '4', '15', '6', '17', '8', '10', '35', '5', '37', '19',
       '9', '22', '30', '23', '20', '2', 'Less than 1 year', '3', '13',
       '25', '16', '43', '11', '38', '33', nan, '24', '21', '12', '40',
       '27', '50', '46', '14', '18', '28', '32', '44', '26', '42', '31',
       '34', '29', '1', '39', '41', '45', 'More than 50 years', '47',
       '49', '48'], dtype=object)

In [43]:
df['YearsCode'].replace('Less than 1 year', 0, inplace=True)

In [44]:
df['YearsCode'].replace('More than 50 years', 51, inplace=True)

In [45]:
df['YearsCode'].unique()

array(['36', '7', '4', '15', '6', '17', '8', '10', '35', '5', '37', '19',
       '9', '22', '30', '23', '20', '2', 0, '3', '13', '25', '16', '43',
       '11', '38', '33', nan, '24', '21', '12', '40', '27', '50', '46',
       '14', '18', '28', '32', '44', '26', '42', '31', '34', '29', '1',
       '39', '41', '45', 51, '47', '49', '48'], dtype=object)

In [46]:
df['YearsCode']=df['YearsCode'].astype(float)

In [47]:
df['YearsCode'].mean()

12.709052770265584

In [48]:
df['YearsCode'].median()

10.0

In [49]:
#replacing nan values with the mean of col
df['YearsCode'].replace(np.nan,df['YearsCode'].mean() , inplace=True)

In [50]:
df['YearsCode'].unique()

array([36.        ,  7.        ,  4.        , 15.        ,  6.        ,
       17.        ,  8.        , 10.        , 35.        ,  5.        ,
       37.        , 19.        ,  9.        , 22.        , 30.        ,
       23.        , 20.        ,  2.        ,  0.        ,  3.        ,
       13.        , 25.        , 16.        , 43.        , 11.        ,
       38.        , 33.        , 12.70905277, 24.        , 21.        ,
       12.        , 40.        , 27.        , 50.        , 46.        ,
       14.        , 18.        , 28.        , 32.        , 44.        ,
       26.        , 42.        , 31.        , 34.        , 29.        ,
        1.        , 39.        , 41.        , 45.        , 51.        ,
       47.        , 49.        , 48.        ])

In [51]:
df['YearsCode']

0        36.000000
1         7.000000
2         4.000000
3         7.000000
4        15.000000
           ...    
64456    10.000000
64457    12.709053
64458    12.709053
64459    12.709053
64460    12.709053
Name: YearsCode, Length: 64461, dtype: float64

In [52]:
df.head(10)

Unnamed: 0,Respondent,MainBranch,Hobbyist,Age,Age1stCode,CompFreq,CompTotal,ConvertedComp,Country,CurrencyDesc,...,SurveyEase,SurveyLength,Trans,UndergradMajor,WebframeDesireNextYear,WebframeWorkedWith,WelcomeChange,WorkWeekHrs,YearsCode,YearsCodePro
0,1,I am a developer by profession,Yes,,13,Monthly,,,Germany,European Euro,...,Neither easy nor difficult,Appropriate in length,No,"Computer science, computer engineering, or sof...",ASP.NET Core,ASP.NET;ASP.NET Core,Just as welcome now as I felt last year,50.0,36.0,27.0
1,2,I am a developer by profession,No,,19,,,,United Kingdom,Pound sterling,...,,,,"Computer science, computer engineering, or sof...",,,Somewhat more welcome now than last year,,7.0,4.0
2,3,I code primarily as a hobby,Yes,,15,,,,Russian Federation,,...,Neither easy nor difficult,Appropriate in length,,,,,Somewhat more welcome now than last year,,4.0,
3,4,I am a developer by profession,Yes,25.0,18,,,,Albania,Albanian lek,...,,,No,"Computer science, computer engineering, or sof...",,,Somewhat less welcome now than last year,40.0,7.0,4.0
4,5,"I used to be a developer by profession, but no...",Yes,31.0,16,,,,United States,,...,Easy,Too short,No,"Computer science, computer engineering, or sof...",Django;Ruby on Rails,Ruby on Rails,Just as welcome now as I felt last year,,15.0,8.0
5,6,I am a developer by profession,No,,14,,,,Germany,European Euro,...,Neither easy nor difficult,Appropriate in length,,,React.js,,,,6.0,4.0
6,7,I am a developer by profession,Yes,,18,Monthly,,,India,United States dollar,...,,,,"Computer science, computer engineering, or sof...",,,A lot more welcome now than last year,,6.0,4.0
7,8,I am a developer by profession,Yes,36.0,12,Yearly,116000.0,116000.0,United States,United States dollar,...,Easy,Appropriate in length,No,"Computer science, computer engineering, or sof...",Django;React.js;Vue.js,Flask,Just as welcome now as I felt last year,39.0,17.0,13.0
8,9,I am a developer by profession,No,30.0,20,,,,Tunisia,United States dollar,...,Neither easy nor difficult,Appropriate in length,No,"Computer science, computer engineering, or sof...",Angular.js,jQuery,Just as welcome now as I felt last year,50.0,6.0,4.0
9,10,I am a developer by profession,Yes,22.0,14,Yearly,25000.0,32315.0,United Kingdom,Pound sterling,...,Easy,Appropriate in length,No,Mathematics or statistics,Flask;jQuery,Flask;jQuery,Somewhat more welcome now than last year,36.0,8.0,4.0


In [53]:
df.loc[2]

Respondent                                                   3
MainBranch                         I code primarily as a hobby
Hobbyist                                                   Yes
Age                                                        NaN
Age1stCode                                                  15
                                        ...                   
WebframeWorkedWith                                         NaN
WelcomeChange         Somewhat more welcome now than last year
WorkWeekHrs                                                NaN
YearsCode                                                  4.0
YearsCodePro                                               NaN
Name: 2, Length: 61, dtype: object

In [54]:
df=pd.read_csv('survey_results_public.csv', index_col='Respondent')

In [55]:
df

Unnamed: 0_level_0,MainBranch,Hobbyist,Age,Age1stCode,CompFreq,CompTotal,ConvertedComp,Country,CurrencyDesc,CurrencySymbol,...,SurveyEase,SurveyLength,Trans,UndergradMajor,WebframeDesireNextYear,WebframeWorkedWith,WelcomeChange,WorkWeekHrs,YearsCode,YearsCodePro
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,I am a developer by profession,Yes,,13,Monthly,,,Germany,European Euro,EUR,...,Neither easy nor difficult,Appropriate in length,No,"Computer science, computer engineering, or sof...",ASP.NET Core,ASP.NET;ASP.NET Core,Just as welcome now as I felt last year,50.0,36,27
2,I am a developer by profession,No,,19,,,,United Kingdom,Pound sterling,GBP,...,,,,"Computer science, computer engineering, or sof...",,,Somewhat more welcome now than last year,,7,4
3,I code primarily as a hobby,Yes,,15,,,,Russian Federation,,,...,Neither easy nor difficult,Appropriate in length,,,,,Somewhat more welcome now than last year,,4,
4,I am a developer by profession,Yes,25.0,18,,,,Albania,Albanian lek,ALL,...,,,No,"Computer science, computer engineering, or sof...",,,Somewhat less welcome now than last year,40.0,7,4
5,"I used to be a developer by profession, but no...",Yes,31.0,16,,,,United States,,,...,Easy,Too short,No,"Computer science, computer engineering, or sof...",Django;Ruby on Rails,Ruby on Rails,Just as welcome now as I felt last year,,15,8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64858,,Yes,,16,,,,United States,,,...,,,,"Computer science, computer engineering, or sof...",,,,,10,Less than 1 year
64867,,Yes,,,,,,Morocco,,,...,,,,,,,,,,
64898,,Yes,,,,,,Viet Nam,,,...,,,,,,,,,,
64925,,Yes,,,,,,Poland,,,...,,,,,Angular;Angular.js;React.js,,,,,


In [56]:
#.loc- location
df.loc[2]

# its not looking for index position 2, instead it is looing for the row name which matches with 2

MainBranch                                         I am a developer by profession
Hobbyist                                                                       No
Age                                                                           NaN
Age1stCode                                                                     19
CompFreq                                                                      NaN
CompTotal                                                                     NaN
ConvertedComp                                                                 NaN
Country                                                            United Kingdom
CurrencyDesc                                                       Pound sterling
CurrencySymbol                                                                GBP
DatabaseDesireNextYear                                                        NaN
DatabaseWorkedWith                                                            NaN
DevType         

In [59]:
df.loc[[2,1,4]]

Unnamed: 0_level_0,MainBranch,Hobbyist,Age,Age1stCode,CompFreq,CompTotal,ConvertedComp,Country,CurrencyDesc,CurrencySymbol,...,SurveyEase,SurveyLength,Trans,UndergradMajor,WebframeDesireNextYear,WebframeWorkedWith,WelcomeChange,WorkWeekHrs,YearsCode,YearsCodePro
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,I am a developer by profession,No,,19,,,,United Kingdom,Pound sterling,GBP,...,,,,"Computer science, computer engineering, or sof...",,,Somewhat more welcome now than last year,,7,4
1,I am a developer by profession,Yes,,13,Monthly,,,Germany,European Euro,EUR,...,Neither easy nor difficult,Appropriate in length,No,"Computer science, computer engineering, or sof...",ASP.NET Core,ASP.NET;ASP.NET Core,Just as welcome now as I felt last year,50.0,36,27
4,I am a developer by profession,Yes,25.0,18,,,,Albania,Albanian lek,ALL,...,,,No,"Computer science, computer engineering, or sof...",,,Somewhat less welcome now than last year,40.0,7,4


In [60]:
#.iloc - index location
df.loc[2]

MainBranch                                         I am a developer by profession
Hobbyist                                                                       No
Age                                                                           NaN
Age1stCode                                                                     19
CompFreq                                                                      NaN
CompTotal                                                                     NaN
ConvertedComp                                                                 NaN
Country                                                            United Kingdom
CurrencyDesc                                                       Pound sterling
CurrencySymbol                                                                GBP
DatabaseDesireNextYear                                                        NaN
DatabaseWorkedWith                                                            NaN
DevType         

In [62]:
#df.iloc[row(s),col(s)]

# while specifying a range we can also mention the step size
df.iloc[:,:]

Unnamed: 0_level_0,MainBranch,Hobbyist,Age,Age1stCode,CompFreq,CompTotal,ConvertedComp,Country,CurrencyDesc,CurrencySymbol,...,SurveyEase,SurveyLength,Trans,UndergradMajor,WebframeDesireNextYear,WebframeWorkedWith,WelcomeChange,WorkWeekHrs,YearsCode,YearsCodePro
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,I am a developer by profession,Yes,,13,Monthly,,,Germany,European Euro,EUR,...,Neither easy nor difficult,Appropriate in length,No,"Computer science, computer engineering, or sof...",ASP.NET Core,ASP.NET;ASP.NET Core,Just as welcome now as I felt last year,50.0,36,27
2,I am a developer by profession,No,,19,,,,United Kingdom,Pound sterling,GBP,...,,,,"Computer science, computer engineering, or sof...",,,Somewhat more welcome now than last year,,7,4
3,I code primarily as a hobby,Yes,,15,,,,Russian Federation,,,...,Neither easy nor difficult,Appropriate in length,,,,,Somewhat more welcome now than last year,,4,
4,I am a developer by profession,Yes,25.0,18,,,,Albania,Albanian lek,ALL,...,,,No,"Computer science, computer engineering, or sof...",,,Somewhat less welcome now than last year,40.0,7,4
5,"I used to be a developer by profession, but no...",Yes,31.0,16,,,,United States,,,...,Easy,Too short,No,"Computer science, computer engineering, or sof...",Django;Ruby on Rails,Ruby on Rails,Just as welcome now as I felt last year,,15,8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64858,,Yes,,16,,,,United States,,,...,,,,"Computer science, computer engineering, or sof...",,,,,10,Less than 1 year
64867,,Yes,,,,,,Morocco,,,...,,,,,,,,,,
64898,,Yes,,,,,,Viet Nam,,,...,,,,,,,,,,
64925,,Yes,,,,,,Poland,,,...,,,,,Angular;Angular.js;React.js,,,,,


In [63]:
df.dtypes

MainBranch                       object
Hobbyist                         object
Age                             float64
Age1stCode                       object
CompFreq                         object
CompTotal                       float64
ConvertedComp                   float64
Country                          object
CurrencyDesc                     object
CurrencySymbol                   object
DatabaseDesireNextYear           object
DatabaseWorkedWith               object
DevType                          object
EdLevel                          object
Employment                       object
Ethnicity                        object
Gender                           object
JobFactors                       object
JobSat                           object
JobSeek                          object
LanguageDesireNextYear           object
LanguageWorkedWith               object
MiscTechDesireNextYear           object
MiscTechWorkedWith               object
NEWCollabToolsDesireNextYear     object


In [64]:
df.loc[df['Age']<30]

Unnamed: 0_level_0,MainBranch,Hobbyist,Age,Age1stCode,CompFreq,CompTotal,ConvertedComp,Country,CurrencyDesc,CurrencySymbol,...,SurveyEase,SurveyLength,Trans,UndergradMajor,WebframeDesireNextYear,WebframeWorkedWith,WelcomeChange,WorkWeekHrs,YearsCode,YearsCodePro
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
4,I am a developer by profession,Yes,25.0,18,,,,Albania,Albanian lek,ALL,...,,,No,"Computer science, computer engineering, or sof...",,,Somewhat less welcome now than last year,40.0,7,4
10,I am a developer by profession,Yes,22.0,14,Yearly,25000.0,32315.0,United Kingdom,Pound sterling,GBP,...,Easy,Appropriate in length,No,Mathematics or statistics,Flask;jQuery,Flask;jQuery,Somewhat more welcome now than last year,36.0,8,4
11,I am a developer by profession,Yes,23.0,13,Yearly,31000.0,40070.0,United Kingdom,Pound sterling,GBP,...,Easy,Appropriate in length,No,"Computer science, computer engineering, or sof...",Angular;Django;React.js,Angular;Angular.js;Django;React.js,Just as welcome now as I felt last year,40.0,10,2
14,I am a developer by profession,Yes,27.0,13,Yearly,66000.0,66000.0,United States,United States dollar,USD,...,Neither easy nor difficult,Appropriate in length,No,"Computer science, computer engineering, or sof...",Angular;Vue.js,Angular;Vue.js,Just as welcome now as I felt last year,40.0,5,1
17,I am a developer by profession,Yes,25.0,14,Yearly,79000.0,79000.0,United States,United States dollar,USD,...,Easy,Appropriate in length,No,"Computer science, computer engineering, or sof...",ASP.NET Core;Gatsby;React.js;Vue.js,ASP.NET;Gatsby;jQuery;React.js;Vue.js,Just as welcome now as I felt last year,40.0,7,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59132,,Yes,22.0,16,,,,Mongolia,,,...,Neither easy nor difficult,Appropriate in length,No,"Another engineering discipline (such as civil,...",Angular.js;React.js,Angular;Angular.js;React.js,Just as welcome now as I felt last year,,3,Less than 1 year
59168,,Yes,17.0,Younger than 5 years,,,,Costa Rica,,,...,Easy,Too short,,,Drupal;Flask;Gatsby;Spring;Vue.js,Angular;Angular.js;ASP.NET;ASP.NET Core;Django...,Just as welcome now as I felt last year,,14,
59638,,Yes,23.0,10,,,,United Kingdom,,,...,Neither easy nor difficult,Appropriate in length,No,"Computer science, computer engineering, or sof...",Django;Flask;React.js;Spring,Django;Flask,Just as welcome now as I felt last year,,5,
60134,,Yes,22.0,10,,,,Spain,,,...,Easy,Appropriate in length,No,"Computer science, computer engineering, or sof...",Express;React.js,Express;React.js;Spring,Just as welcome now as I felt last year,,10,


In [65]:
df.loc[df['MainBranch']=='I am a developer by profession', ['WorkWeekHrs']]

Unnamed: 0_level_0,WorkWeekHrs
Respondent,Unnamed: 1_level_1
1,50.0
2,
4,40.0
6,
7,
...,...
65631,40.0
65632,
65634,
65635,


In [69]:
df.loc[(df['MainBranch']=='I am a developer by profession') & (df['WorkWeekHrs']>=40) , ['WorkWeekHrs']].count()

WorkWeekHrs    28104
dtype: int64