# Part I
Installation and Data Loading

In [None]:
import pandas as pd

In [None]:
main_df = pd.read_csv('data/survey_results_public.csv', index_col='ResponseId')
schema_df = pd.read_csv('data/survey_results_schema.csv', index_col='qname')

In [None]:
main_df.head()

In [None]:
main_df.tail()

In [None]:
main_df.shape

In [None]:
main_df.info()

In [None]:
pd.set_option('display.max_columns', 85)
pd.set_option('display.max_rows', 85)

In [None]:
schema_df

# Part II
Selecting Rows and Columns

In [None]:
main_df.columns

In [None]:
main_df.iloc[[0, 1]]

In [None]:
main_df.iloc[[0, 1], 2]

In [None]:
main_df.loc[19]

In [None]:
main_df.loc[1:10, 'YearsCodePro']

In [None]:
main_df['OpSys'].value_counts()

In [None]:
main_df.loc[0:15, 'OpSys':'SOAccount']

In [None]:
main_df['DevType'].value_counts()

# Part III
Indexes - Set, Reset, Use

In [None]:
main_df.set_index('OpSys', inplace=True)

In [None]:
main_df.loc['Linux-based']

In [None]:
main_df.reset_index(inplace=True)

In [None]:
schema_df.columns

In [None]:
schema_df.sort_index()

In [None]:
schema_df.loc['Webframe', 'question']

In [None]:
main_df['WebframeHaveWorkedWith'].unique()

In [None]:
schema_df.loc['CompTotal', 'question']

In [None]:
main_df['CompTotal'].mean()

In [None]:
schema_df.loc['EdLevel', 'question']

In [None]:
main_df['EdLevel'].unique()

In [None]:
schema_df.loc['MainBranch', 'question']

In [None]:
main_df['MainBranch'].unique()

In [None]:
schema_df.loc['Employment', 'question']

In [None]:
main_df['Employment'].unique()

# Part IV
Conditionals - Filtering Rows and Columns

In [None]:
filt = (main_df['MainBranch'] == 'I am a developer by profession')

In [None]:
main_df.loc[filt]

In [None]:
main_df.loc[filt, 'Employment']

In [None]:
high_salary = (main_df['CompTotal'] > 70000)
countries = ['United States of America', 'Germany', 'Canada', 'India', 'United Kingdom']
filt = main_df['Country'].isin(countries)

In [None]:
main_df.loc[high_salary, ['Country', 'LanguageHaveWorkedWith', 'CompTotal']]

In [None]:
main_df.loc[filt, ['Country', 'LanguageHaveWorkedWith', 'CompTotal']]

In [None]:
using_python = main_df['LanguageHaveWorkedWith'].str.contains('Python', na=False)

In [None]:
main_df.loc[using_python, 'LanguageHaveWorkedWith']

# Part V
Updating Data in DataFrame - Rows and Columns

In [None]:
main_df.columns

In [None]:
main_df.columns = main_df.columns.str.replace('NEW', 'New')
main_df.columns = main_df.columns.str.replace('SO', 'So')

In [None]:
main_df.columns

In [None]:
# https://stackoverflow.com/questions/1175208/elegant-python-function-to-convert-camelcase-to-snake-case

def camel_to_snake(s):
    return ''.join(['_'+c.lower() if c.isupper() else c for c in s]).lstrip('_')

In [None]:
main_df.columns = [camel_to_snake(column_name) for column_name in main_df.columns]

In [None]:
main_df.columns

In [None]:
main_df['age'] = main_df['age'].map({'Under 18 years old': 'kiddo', '65 years or older': 'old'})
filt = (main_df['age'] == 'kiddo') | (main_df['age'] == 'old')
main_df.loc[filt, 'age']

In [None]:
main_df['age'] = main_df['age'].map({'kiddo': 'Under 18 years old', 'old': 'Under 18 years old'})
main_df['age']

In [None]:
main_df['age'].replace({None: ' nothing left here ...'}, inplace=True)
main_df['age']

In [None]:
main_df.rename(columns={'CompTotal': 'SalaryUSD'}, inplace=True)
main_df['SalaryUSD']

# Part VI
Adding/Removing Data in DataFrame - Rows and Columns