### Add/Remove rows and columns

Adding columns

In [3]:
import pandas as pd
people = {
    'first_name': ['Corey', 'Jane', 'Lee', 'Lee'],
    'last_name': ['Chou', 'Kalvin', 'Leo', 'Dan'],
    'email': ['aaa', 'bbb', 'ccc', 'ddd']
}
df_people = pd.DataFrame(people)

In [4]:
df_people

Unnamed: 0,first_name,last_name,email
0,Corey,Chou,aaa
1,Jane,Kalvin,bbb
2,Lee,Leo,ccc
3,Lee,Dan,ddd


In [5]:
df_people['full_name'] = df_people['first_name'] + ' ' + df_people['last_name']

In [6]:
df_people

Unnamed: 0,first_name,last_name,email,full_name
0,Corey,Chou,aaa,Corey Chou
1,Jane,Kalvin,bbb,Jane Kalvin
2,Lee,Leo,ccc,Lee Leo
3,Lee,Dan,ddd,Lee Dan


remove columns

In [7]:
df_people.drop(columns=['first_name', 'last_name'], inplace=True)

In [8]:
df_people

Unnamed: 0,email,full_name
0,aaa,Corey Chou
1,bbb,Jane Kalvin
2,ccc,Lee Leo
3,ddd,Lee Dan


Break down a column into two columns

In [9]:
df_people['full_name'].str.split(' ', expand=True)

Unnamed: 0,0,1
0,Corey,Chou
1,Jane,Kalvin
2,Lee,Leo
3,Lee,Dan


In [10]:
df_people[['first_n', 'last_n']] = df_people['full_name'].str.split(' ', expand=True)

In [11]:
df_people

Unnamed: 0,email,full_name,first_n,last_n
0,aaa,Corey Chou,Corey,Chou
1,bbb,Jane Kalvin,Jane,Kalvin
2,ccc,Lee Leo,Lee,Leo
3,ddd,Lee Dan,Lee,Dan


Adding single row data

In [12]:
df_people.append({'first_n':'Tony', 'last_n':'Wang'}, ignore_index=True)

  df_people.append({'first_n':'Tony', 'last_n':'Wang'}, ignore_index=True)


Unnamed: 0,email,full_name,first_n,last_n
0,aaa,Corey Chou,Corey,Chou
1,bbb,Jane Kalvin,Jane,Kalvin
2,ccc,Lee Leo,Lee,Leo
3,ddd,Lee Dan,Lee,Dan
4,,,Tony,Wang


Adding one DF to other 

In [13]:
people = {
    'first_n': ['Tony', 'Steve'],
    'last_n': ['Stark', 'Rogers'],
    'email': ['ironman@avenge.com', 'cap@avenge.com']
}
df_people2 = pd.DataFrame(people)

In [14]:
df_people2

Unnamed: 0,first_n,last_n,email
0,Tony,Stark,ironman@avenge.com
1,Steve,Rogers,cap@avenge.com


In [15]:
df_people.append(df_people2, ignore_index=True)
# append() doesn't have inplace argument like the drop method has.

  df_people.append(df_people2, ignore_index=True)


Unnamed: 0,email,full_name,first_n,last_n
0,aaa,Corey Chou,Corey,Chou
1,bbb,Jane Kalvin,Jane,Kalvin
2,ccc,Lee Leo,Lee,Leo
3,ddd,Lee Dan,Lee,Dan
4,ironman@avenge.com,,Tony,Stark
5,cap@avenge.com,,Steve,Rogers


In [16]:
df_new = df_people.append(df_people2, ignore_index=True, sort=False)

  df_new = df_people.append(df_people2, ignore_index=True, sort=False)


In [17]:
df_new

Unnamed: 0,email,full_name,first_n,last_n
0,aaa,Corey Chou,Corey,Chou
1,bbb,Jane Kalvin,Jane,Kalvin
2,ccc,Lee Leo,Lee,Leo
3,ddd,Lee Dan,Lee,Dan
4,ironman@avenge.com,,Tony,Stark
5,cap@avenge.com,,Steve,Rogers


remove a row

In [18]:
df_new.drop(index=[3,4])

Unnamed: 0,email,full_name,first_n,last_n
0,aaa,Corey Chou,Corey,Chou
1,bbb,Jane Kalvin,Jane,Kalvin
2,ccc,Lee Leo,Lee,Leo
5,cap@avenge.com,,Steve,Rogers


In [19]:
print(df_new['first_n'] == 'Lee')
print(df_new[df_new['first_n'] == 'Lee'].index)
df_new.drop(index=df_new[df_new['first_n']=='Lee'].index)

0    False
1    False
2     True
3     True
4    False
5    False
Name: first_n, dtype: bool
Int64Index([2, 3], dtype='int64')


Unnamed: 0,email,full_name,first_n,last_n
0,aaa,Corey Chou,Corey,Chou
1,bbb,Jane Kalvin,Jane,Kalvin
4,ironman@avenge.com,,Tony,Stark
5,cap@avenge.com,,Steve,Rogers


### Sorting 

In [22]:
df_new.sort_values(by='last_n', ascending=False)

Unnamed: 0,email,full_name,first_n,last_n
4,ironman@avenge.com,,Tony,Stark
5,cap@avenge.com,,Steve,Rogers
2,ccc,Lee Leo,Lee,Leo
1,bbb,Jane Kalvin,Jane,Kalvin
3,ddd,Lee Dan,Lee,Dan
0,aaa,Corey Chou,Corey,Chou


In [23]:
# sorting with multiple conditions
df_new.sort_values(by=['last_n', 'first_n'])

Unnamed: 0,email,full_name,first_n,last_n
0,aaa,Corey Chou,Corey,Chou
3,ddd,Lee Dan,Lee,Dan
1,bbb,Jane Kalvin,Jane,Kalvin
2,ccc,Lee Leo,Lee,Leo
5,cap@avenge.com,,Steve,Rogers
4,ironman@avenge.com,,Tony,Stark


In [25]:
# last name ascending order and first name deascending order
df_new.sort_values(by=['last_n', 'first_n'], ascending=[False, True], inplace=True)

In [26]:
df_new

Unnamed: 0,email,full_name,first_n,last_n
4,ironman@avenge.com,,Tony,Stark
5,cap@avenge.com,,Steve,Rogers
2,ccc,Lee Leo,Lee,Leo
1,bbb,Jane Kalvin,Jane,Kalvin
3,ddd,Lee Dan,Lee,Dan
0,aaa,Corey Chou,Corey,Chou


In [27]:
df_new.sort_index()

Unnamed: 0,email,full_name,first_n,last_n
0,aaa,Corey Chou,Corey,Chou
1,bbb,Jane Kalvin,Jane,Kalvin
2,ccc,Lee Leo,Lee,Leo
3,ddd,Lee Dan,Lee,Dan
4,ironman@avenge.com,,Tony,Stark
5,cap@avenge.com,,Steve,Rogers


Sorting a Series

In [28]:
df_new['first_n'].sort_values()

0    Corey
1     Jane
2      Lee
3      Lee
5    Steve
4     Tony
Name: first_n, dtype: object

In [29]:
df = pd.read_csv('../../data-2019/survey_results_public.csv')
schema_df = pd.read_csv('../../data-2019/survey_results_schema.csv')

In [30]:
df.head(2)

Unnamed: 0,Respondent,MainBranch,Hobbyist,OpenSourcer,OpenSource,Employment,Country,Student,EdLevel,UndergradMajor,...,WelcomeChange,SONewContent,Age,Gender,Trans,Sexuality,Ethnicity,Dependents,SurveyLength,SurveyEase
0,1,I am a student who is learning to code,Yes,Never,The quality of OSS and closed source software ...,"Not employed, and not looking for work",United Kingdom,No,Primary/elementary school,,...,Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,14.0,Man,No,Straight / Heterosexual,,No,Appropriate in length,Neither easy nor difficult
1,2,I am a student who is learning to code,No,Less than once per year,The quality of OSS and closed source software ...,"Not employed, but looking for work",Bosnia and Herzegovina,"Yes, full-time","Secondary school (e.g. American high school, G...",,...,Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,19.0,Man,No,Straight / Heterosexual,,No,Appropriate in length,Neither easy nor difficult


In [31]:
df.sort_values(by='Country', inplace=True)

In [34]:
df[['Country', 'ConvertedComp']].head(10)

Unnamed: 0,Country,ConvertedComp
39018,Afghanistan,19152.0
62723,Afghanistan,1000000.0
85185,Afghanistan,
50437,Afghanistan,
88340,Afghanistan,
88735,Afghanistan,
6391,Afghanistan,
39754,Afghanistan,
88181,Afghanistan,
48122,Afghanistan,4464.0


In [36]:
df.sort_values(by=['Country', 'ConvertedComp'], ascending=[True, False], inplace=True)

In [37]:
df[['Country', 'ConvertedComp']].head(10)

Unnamed: 0,Country,ConvertedComp
62723,Afghanistan,1000000.0
50172,Afghanistan,153216.0
39018,Afghanistan,19152.0
58082,Afghanistan,17556.0
7056,Afghanistan,14364.0
22327,Afghanistan,7980.0
48122,Afghanistan,4464.0
10697,Afghanistan,3996.0
8112,Afghanistan,1596.0
29560,Afghanistan,1116.0


to find out the largest 10 income data

In [38]:
df['ConvertedComp'].nlargest(10)

25833    2000000.0
87353    2000000.0
21895    2000000.0
28080    2000000.0
72274    2000000.0
77665    2000000.0
79701    2000000.0
51798    2000000.0
75088    2000000.0
32056    2000000.0
Name: ConvertedComp, dtype: float64

In [39]:
df.nlargest(10, "ConvertedComp")

Unnamed: 0,Respondent,MainBranch,Hobbyist,OpenSourcer,OpenSource,Employment,Country,Student,EdLevel,UndergradMajor,...,WelcomeChange,SONewContent,Age,Gender,Trans,Sexuality,Ethnicity,Dependents,SurveyLength,SurveyEase
25833,25983,I am a developer by profession,Yes,Less than once per year,"OSS is, on average, of HIGHER quality than pro...",Employed full-time,Canada,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",...,Just as welcome now as I felt last year,,24.0,Man,No,Straight / Heterosexual,White or of European descent,No,Appropriate in length,Easy
87353,87896,I am a developer by profession,Yes,Less than once per year,The quality of OSS and closed source software ...,Employed full-time,Germany,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",...,Just as welcome now as I felt last year,Tech articles written by other developers;Tech...,32.0,Man,No,Gay or Lesbian,White or of European descent,No,Appropriate in length,Neither easy nor difficult
21895,22013,I am a developer by profession,Yes,Never,The quality of OSS and closed source software ...,Employed full-time,India,No,"Professional degree (JD, MD, etc.)","A natural science (ex. biology, chemistry, phy...",...,A lot more welcome now than last year,Tech articles written by other developers;Indu...,,Man,No,Straight / Heterosexual,,Yes,Too long,Easy
28080,28243,I am a developer by profession,Yes,Once a month or more often,"OSS is, on average, of HIGHER quality than pro...","Independent contractor, freelancer, or self-em...",India,No,"Master’s degree (MA, MS, M.Eng., MBA, etc.)","Computer science, computer engineering, or sof...",...,A lot less welcome now than last year,Tech meetups or events in your area,,,,Straight / Heterosexual,,Yes,Too short,Easy
72274,72732,"I am not primarily a developer, but I write co...",No,Less than once a month but more than once per ...,"OSS is, on average, of LOWER quality than prop...",,India,"Yes, full-time","Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",...,A lot less welcome now than last year,Tech articles written by other developers;Tech...,,Man,No,,,Yes,Too long,Easy
77665,78151,I am a developer by profession,Yes,Never,"OSS is, on average, of HIGHER quality than pro...",Employed full-time,Mexico,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",...,Just as welcome now as I felt last year,Tech meetups or events in your area;Courses on...,32.0,Man,No,Straight / Heterosexual,Hispanic or Latino/Latina,No,Appropriate in length,Easy
79701,80200,I am a developer by profession,Yes,Never,"OSS is, on average, of LOWER quality than prop...",Employed full-time,Netherlands,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",...,,Tech articles written by other developers,25.0,Woman,No,Bisexual,White or of European descent,No,Appropriate in length,Easy
51798,52132,I am a developer by profession,Yes,Less than once a month but more than once per ...,"OSS is, on average, of HIGHER quality than pro...",Employed full-time,Peru,No,Some college/university study without earning ...,I never declared a major,...,Just as welcome now as I felt last year,Tech articles written by other developers;Tech...,48.0,Man,,,Black or of African descent;East Asian;Hispani...,Yes,Appropriate in length,Easy
75088,75561,I am a developer by profession,Yes,Less than once a month but more than once per ...,The quality of OSS and closed source software ...,Employed full-time,Singapore,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","A humanities discipline (ex. literature, histo...",...,Just as welcome now as I felt last year,Tech meetups or events in your area,37.0,Man,No,Straight / Heterosexual,White or of European descent,Yes,Appropriate in length,Easy
32056,32250,I am a developer by profession,Yes,Once a month or more often,The quality of OSS and closed source software ...,Employed full-time,Switzerland,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",...,Just as welcome now as I felt last year,Industry news about technologies you're intere...,30.0,Man,No,Straight / Heterosexual,White or of European descent,No,Appropriate in length,Easy
