In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('stack-overflow-developer-survey-2019/survey_results_public.csv')

In [3]:
pd.set_option('display.max_columns', 85)

In [4]:
schema_df = pd.read_csv('stack-overflow-developer-survey-2019/survey_results_schema.csv')

In [5]:
# 

# Indexing

In [6]:
people = {
    "first": ["Corey", 'Jane', 'John'], 
    "last": ["Schafer", 'Doe', 'Doe'], 
    "email": ["CoreyMSchafer@gmail.com", 'JaneDoe@email.com', 'JohnDoe@email.com']
}

In [7]:
df = pd.DataFrame(people)

In [8]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [9]:
df['email']

0    CoreyMSchafer@gmail.com
1          JaneDoe@email.com
2          JohnDoe@email.com
Name: email, dtype: object

In [12]:
df.set_index('email', inplace=True) #setting email as an index

In [13]:
df

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
CoreyMSchafer@gmail.com,Corey,Schafer
JaneDoe@email.com,Jane,Doe
JohnDoe@email.com,John,Doe


In [14]:
df.index

Index(['CoreyMSchafer@gmail.com', 'JaneDoe@email.com', 'JohnDoe@email.com'], dtype='object', name='email')

In [19]:
df.loc['CoreyMSchafer@gmail.com','last'] #accessing the last name

'Schafer'

In [16]:
df.iloc[0] #df.loc gives error

first      Corey
last     Schafer
Name: CoreyMSchafer@gmail.com, dtype: object

In [20]:
df.reset_index(inplace=True)
df

Unnamed: 0,email,first,last
0,CoreyMSchafer@gmail.com,Corey,Schafer
1,JaneDoe@email.com,Jane,Doe
2,JohnDoe@email.com,John,Doe


In [22]:
df.loc[0,'last']

'Schafer'

In [None]:
df = pd.read_csv('stack-overflow-developer-survey-2019/survey_results_public.csv',index_col = 'Respondent')

In [None]:
schema_df = pd.read_csv('stack-overflow-developer-survey-2019/survey_results_schema.csv',index_col='Column')

df

schema_df

In [None]:
schema_df.loc['WorkPlan'] #accessing the meaning of workplan

schema_df.sort_index()

# Filtering

In [23]:
people = {
    "first": ["Corey", 'Jane', 'John'], 
    "last": ["Schafer", 'Doe', 'Doe'], 
    "email": ["CoreyMSchafer@gmail.com", 'JaneDoe@email.com', 'JohnDoe@email.com']
}

In [24]:
df = pd.DataFrame(people)

In [25]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [26]:
filt = (df['last'] == 'Doe') #only printing the ones with last names as Doe

In [27]:
df[filt]

Unnamed: 0,first,last,email
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [28]:
df.loc[filt, 'email'] #loc helps us to find by using row and colums.First is row and second is column

1    JaneDoe@email.com
2    JohnDoe@email.com
Name: email, dtype: object

# Updating Rows and Columns

In [29]:
people = {
    "first": ["Corey", 'Jane', 'John'], 
    "last": ["Schafer", 'Doe', 'Doe'], 
    "email": ["CoreyMSchafer@gmail.com", 'JaneDoe@email.com', 'JohnDoe@email.com']
}

In [30]:
df = pd.DataFrame(people)

In [31]:
df.columns = ['first_name', 'last_name', 'email'] #changing all the columns name

In [32]:
df

Unnamed: 0,first_name,last_name,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [33]:
df.columns = [x.upper() for x in df ]

In [34]:
df

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [None]:
df.rename(columns={'first_name': 'first', 'last_name': 'last'}, inplace=True) #update specific column by concept of dictionary

In [None]:
df

In [35]:
df.loc[2, 'LAST_NAME' ] = 'Lama' #chaning specific element

In [36]:
df

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Lama,JohnDoe@email.com


In [None]:
filt = (df['EMAIL'] == 'JohnDoe@email.com')
df[filt]['LAST_NAME'] # we cant equate this......

In [37]:
filt = (df['EMAIL'] == 'JohnDoe@email.com')
df.loc[filt,'LAST_NAME'] = 'SMITH'

In [38]:
df

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,SMITH,JohnDoe@email.com


In [40]:
df.columns = [x.lower() for x in df ]

In [41]:
df

Unnamed: 0,first_name,last_name,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,SMITH,JohnDoe@email.com


In [42]:
df['email'] = df['email'].str.lower() #Changing all the rows of email

In [43]:
df

Unnamed: 0,first_name,last_name,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,SMITH,johndoe@email.com


### 1)Apply method->used for calling functions on our values.Can work in DF or Series Object

In [44]:
df['email'].apply(len)

0    23
1    17
2    17
Name: email, dtype: int64

In [45]:
def update_email(email):
    return email.upper()

In [46]:
df['email'].apply(update_email)

0    COREYMSCHAFER@GMAIL.COM
1          JANEDOE@EMAIL.COM
2          JOHNDOE@EMAIL.COM
Name: email, dtype: object

In [47]:
df['email'] = df['email'].apply(update_email)

In [48]:
df

Unnamed: 0,first_name,last_name,email
0,Corey,Schafer,COREYMSCHAFER@GMAIL.COM
1,Jane,Doe,JANEDOE@EMAIL.COM
2,John,SMITH,JOHNDOE@EMAIL.COM


In [49]:
df['email'] = df['email'].apply(lambda x: x.lower())

In [50]:
df

Unnamed: 0,first_name,last_name,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,SMITH,johndoe@email.com


In [None]:
df['email'].apply(len)

In [51]:
df.apply(len, axis='columns') #how many datas are in cloumns

0    3
1    3
2    3
dtype: int64

In [52]:
df

Unnamed: 0,first_name,last_name,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,SMITH,johndoe@email.com


In [53]:
df.applymap(len)

Unnamed: 0,first_name,last_name,email
0,5,7,23
1,4,3,17
2,4,5,17


In [55]:
df['first_name'] = df['first_name'].replace({'Corey': 'Chris', 'Jane': 'Mary'})
df

Unnamed: 0,first_name,last_name,email
0,Chris,Schafer,coreymschafer@gmail.com
1,Mary,Doe,janedoe@email.com
2,John,SMITH,johndoe@email.com
