## Creating a dataframe

In [1]:
import pandas as pd

In [2]:
people = {
    'first' : ['Sunny', 'Raunak', 'Siddharth'],
    'last' : ['Tamang', 'Tamang', 'Tamang'],
    'email' : ['sunny@emai.com', 'raunak@email.com', 'sid@email.com']
}

In [3]:
df = pd.DataFrame(people)

In [4]:
df

Unnamed: 0,first,last,email
0,Sunny,Tamang,sunny@emai.com
1,Raunak,Tamang,raunak@email.com
2,Siddharth,Tamang,sid@email.com


### Access the values

In [8]:
df['email']

0      sunny@emai.com
1    raunak@email.com
2       sid@email.com
Name: email, dtype: object

In [9]:
type(df['email'])

pandas.core.series.Series

> It gives the series as the output in 1D array

Or we can also use the below code snippet to access the values. But it is not efficient if there is a space in your column or it is an attribute of python

In [10]:
df.email

0      sunny@emai.com
1    raunak@email.com
2       sid@email.com
Name: email, dtype: object

## Access multiple columns

In [11]:
df[['last', 'email']]

Unnamed: 0,last,email
0,Tamang,sunny@emai.com
1,Tamang,raunak@email.com
2,Tamang,sid@email.com


> this returns the 2dimensional array which is a dataframe

## Get all the column names

In [12]:
df.columns

Index(['first', 'last', 'email'], dtype='object')

## Get all the rows

> We can use any one of the below:<br>
> <li>loc</li>
> <li>iloc</li>

### Using iloc [interger location]

In [14]:
# get the first row
df.iloc[0]

first             Sunny
last             Tamang
email    sunny@emai.com
Name: 0, dtype: object

In [19]:
# Accessing the first and the second row

df.iloc[[0,1]]

Unnamed: 0,first,last,email
0,Sunny,Tamang,sunny@emai.com
1,Raunak,Tamang,raunak@email.com


In [20]:
# Access the email column of the first 2 rows
df.iloc[[0, 1], 2]

0      sunny@emai.com
1    raunak@email.com
Name: email, dtype: object

### using loc

In [21]:
df

Unnamed: 0,first,last,email
0,Sunny,Tamang,sunny@emai.com
1,Raunak,Tamang,raunak@email.com
2,Siddharth,Tamang,sid@email.com


In [25]:
# Accessing the first row
df.loc[0]

first             Sunny
last             Tamang
email    sunny@emai.com
Name: 0, dtype: object

In [26]:
# Accessing the first and the second row
df.loc[[0, 1]]

Unnamed: 0,first,last,email
0,Sunny,Tamang,sunny@emai.com
1,Raunak,Tamang,raunak@email.com


In [27]:
# Accessing the email values of the first 2 rows
df.loc[[0, 1], 'email']

0      sunny@emai.com
1    raunak@email.com
Name: email, dtype: object

In [31]:
# Accessing the last and email of the first 2 rows
df.loc[[0, 1], ['email', 'last']]

Unnamed: 0,email,last
0,sunny@emai.com,Tamang
1,raunak@email.com,Tamang


## using the stackoverflow survey data

In [32]:
df = pd.read_csv('survey_results_public.csv')
schema_df = pd.read_csv('survey_results_schema.csv')

In [33]:
df.shape

(83439, 48)

In [34]:
df.head()

Unnamed: 0,ResponseId,MainBranch,Employment,Country,US_State,UK_Country,EdLevel,Age1stCode,LearnCode,YearsCode,...,Age,Gender,Trans,Sexuality,Ethnicity,Accessibility,MentalHealth,SurveyLength,SurveyEase,ConvertedCompYearly
0,1,I am a developer by profession,"Independent contractor, freelancer, or self-em...",Slovakia,,,"Secondary school (e.g. American high school, G...",18 - 24 years,Coding Bootcamp;Other online resources (ex: vi...,,...,25-34 years old,Man,No,Straight / Heterosexual,White or of European descent,None of the above,None of the above,Appropriate in length,Easy,62268.0
1,2,I am a student who is learning to code,"Student, full-time",Netherlands,,,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",11 - 17 years,"Other online resources (ex: videos, blogs, etc...",7.0,...,18-24 years old,Man,No,Straight / Heterosexual,White or of European descent,None of the above,None of the above,Appropriate in length,Easy,
2,3,"I am not primarily a developer, but I write co...","Student, full-time",Russian Federation,,,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",11 - 17 years,"Other online resources (ex: videos, blogs, etc...",,...,18-24 years old,Man,No,Prefer not to say,Prefer not to say,None of the above,None of the above,Appropriate in length,Easy,
3,4,I am a developer by profession,Employed full-time,Austria,,,"Master’s degree (M.A., M.S., M.Eng., MBA, etc.)",11 - 17 years,,,...,35-44 years old,Man,No,Straight / Heterosexual,White or of European descent,I am deaf / hard of hearing,,Appropriate in length,Neither easy nor difficult,
4,5,I am a developer by profession,"Independent contractor, freelancer, or self-em...",United Kingdom of Great Britain and Northern I...,,England,"Master’s degree (M.A., M.S., M.Eng., MBA, etc.)",5 - 10 years,Friend or family member,17.0,...,25-34 years old,Man,No,,White or of European descent,None of the above,,Appropriate in length,Easy,


In [36]:
# Select all the columns
df.columns

Index(['ResponseId', 'MainBranch', 'Employment', 'Country', 'US_State',
       'UK_Country', 'EdLevel', 'Age1stCode', 'LearnCode', 'YearsCode',
       'YearsCodePro', 'DevType', 'OrgSize', 'Currency', 'CompTotal',
       'CompFreq', 'LanguageHaveWorkedWith', 'LanguageWantToWorkWith',
       'DatabaseHaveWorkedWith', 'DatabaseWantToWorkWith',
       'PlatformHaveWorkedWith', 'PlatformWantToWorkWith',
       'WebframeHaveWorkedWith', 'WebframeWantToWorkWith',
       'MiscTechHaveWorkedWith', 'MiscTechWantToWorkWith',
       'ToolsTechHaveWorkedWith', 'ToolsTechWantToWorkWith',
       'NEWCollabToolsHaveWorkedWith', 'NEWCollabToolsWantToWorkWith', 'OpSys',
       'NEWStuck', 'NEWSOSites', 'SOVisitFreq', 'SOAccount', 'SOPartFreq',
       'SOComm', 'NEWOtherComms', 'Age', 'Gender', 'Trans', 'Sexuality',
       'Ethnicity', 'Accessibility', 'MentalHealth', 'SurveyLength',
       'SurveyEase', 'ConvertedCompYearly'],
      dtype='object')

In [42]:
# accessing the country column

df['Country']

0                                                 Slovakia
1                                              Netherlands
2                                       Russian Federation
3                                                  Austria
4        United Kingdom of Great Britain and Northern I...
                               ...                        
83434                             United States of America
83435                                                Benin
83436                             United States of America
83437                                               Canada
83438                                               Brazil
Name: Country, Length: 83439, dtype: object

### Count the number of country values 

In [43]:
df['Country'].value_counts()

United States of America                                15288
India                                                   10511
Germany                                                  5625
United Kingdom of Great Britain and Northern Ireland     4475
Canada                                                   3012
                                                        ...  
Saint Kitts and Nevis                                       1
Dominica                                                    1
Saint Vincent and the Grenadines                            1
Tuvalu                                                      1
Papua New Guinea                                            1
Name: Country, Length: 181, dtype: int64

In [49]:
## Accessing the specific row and specific column
df.loc[0, 'Country']

'Slovakia'

In [51]:
## Accessing the multiple specif rows and columns
df.loc[[0, 1, 2], 'Country']

0              Slovakia
1           Netherlands
2    Russian Federation
Name: Country, dtype: object

### We can also use slicing to select rows

In [52]:
df.loc[0:2, 'Country']

0              Slovakia
1           Netherlands
2    Russian Federation
Name: Country, dtype: object

### We can also use slicing to select the multiple columns as well

In [53]:
df.loc[0:2, 'Country':'LearnCode']

Unnamed: 0,Country,US_State,UK_Country,EdLevel,Age1stCode,LearnCode
0,Slovakia,,,"Secondary school (e.g. American high school, G...",18 - 24 years,Coding Bootcamp;Other online resources (ex: vi...
1,Netherlands,,,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",11 - 17 years,"Other online resources (ex: videos, blogs, etc..."
2,Russian Federation,,,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",11 - 17 years,"Other online resources (ex: videos, blogs, etc..."
