# Section 1 of Pandas Kaggle exercise
Link to the exercise this notebook is based on https://www.kaggle.com/residentmario/indexing-selecting-assigning

In [1]:
import pandas as pd

In [7]:
# creating a simple dataframe
df = pd.DataFrame({'height':[160,170], 'weight':[60,50]})

# creating a simple data frame with strings
df2 = pd.DataFrame({'animal':['Tiger','Lion','Bird'], 'weight':[70,80,10]})

# printing the dataframes 
print(df)
print(df2)

   height  weight
0     160      60
1     170      50
  animal  weight
0  Tiger      70
1   Lion      80
2   Bird      10


In [13]:
# assigning index labels for df
df = pd.DataFrame({'height':[160,170], 'weight':[60,50]}, index=['Person1','Person2'])
print(df)

         height  weight
Person1     160      60
Person2     170      50


In [9]:
# creating a panda series
pd.Series([1,23,4,5,6])

0     1
1    23
2     4
3     5
4     6
dtype: int64

In [17]:
# loading in a dataset from local machine. employee_attrition.csv used 
df = pd.read_csv(r"E:\IOD_data\Employee-attrition.csv")

# showing that the dataset has loaded correctly
df.head()

Unnamed: 0,EmployeeID,recorddate_key,birthdate_key,orighiredate_key,terminationdate_key,age,length_of_service,city_name,department_name,job_title,store_name,gender_short,gender_full,termreason_desc,termtype_desc,STATUS_YEAR,STATUS,BUSINESS_UNIT
0,1318,12/31/2006 0:00,1/3/1954,8/28/1989,1/1/1900,52,17,Vancouver,Executive,CEO,35,M,Male,Not Applicable,Not Applicable,2006,ACTIVE,HEADOFFICE
1,1318,12/31/2007 0:00,1/3/1954,8/28/1989,1/1/1900,53,18,Vancouver,Executive,CEO,35,M,Male,Not Applicable,Not Applicable,2007,ACTIVE,HEADOFFICE
2,1318,12/31/2008 0:00,1/3/1954,8/28/1989,1/1/1900,54,19,Vancouver,Executive,CEO,35,M,Male,Not Applicable,Not Applicable,2008,ACTIVE,HEADOFFICE
3,1318,12/31/2009 0:00,1/3/1954,8/28/1989,1/1/1900,55,20,Vancouver,Executive,CEO,35,M,Male,Not Applicable,Not Applicable,2009,ACTIVE,HEADOFFICE
4,1318,12/31/2010 0:00,1/3/1954,8/28/1989,1/1/1900,56,21,Vancouver,Executive,CEO,35,M,Male,Not Applicable,Not Applicable,2010,ACTIVE,HEADOFFICE


In [19]:
# loading in dataset but using Employee ID as the index
df = pd.read_csv(r"E:\IOD_data\Employee-attrition.csv", index_col= 0)

# showing the new dataframe
df.head()

Unnamed: 0_level_0,recorddate_key,birthdate_key,orighiredate_key,terminationdate_key,age,length_of_service,city_name,department_name,job_title,store_name,gender_short,gender_full,termreason_desc,termtype_desc,STATUS_YEAR,STATUS,BUSINESS_UNIT
EmployeeID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
1318,12/31/2006 0:00,1/3/1954,8/28/1989,1/1/1900,52,17,Vancouver,Executive,CEO,35,M,Male,Not Applicable,Not Applicable,2006,ACTIVE,HEADOFFICE
1318,12/31/2007 0:00,1/3/1954,8/28/1989,1/1/1900,53,18,Vancouver,Executive,CEO,35,M,Male,Not Applicable,Not Applicable,2007,ACTIVE,HEADOFFICE
1318,12/31/2008 0:00,1/3/1954,8/28/1989,1/1/1900,54,19,Vancouver,Executive,CEO,35,M,Male,Not Applicable,Not Applicable,2008,ACTIVE,HEADOFFICE
1318,12/31/2009 0:00,1/3/1954,8/28/1989,1/1/1900,55,20,Vancouver,Executive,CEO,35,M,Male,Not Applicable,Not Applicable,2009,ACTIVE,HEADOFFICE
1318,12/31/2010 0:00,1/3/1954,8/28/1989,1/1/1900,56,21,Vancouver,Executive,CEO,35,M,Male,Not Applicable,Not Applicable,2010,ACTIVE,HEADOFFICE


# Section 2 (until indexing)

In [20]:
# accessing age column 
df['age']

EmployeeID
1318    52
1318    53
1318    54
1318    55
1318    56
        ..
8258    21
8264    19
8279    21
8296    19
8321    20
Name: age, Length: 49653, dtype: int64

In [25]:
# selecting a specific value from the age column. resetting the dataframe without using employee id as the index
df = pd.read_csv(r"E:\IOD_data\Employee-attrition.csv")

# accessing a specific value from the age column
df['age'][0]


52

In [26]:
# using indexing to access the rows using iloc
df.iloc[0]

EmployeeID                        1318
recorddate_key         12/31/2006 0:00
birthdate_key                 1/3/1954
orighiredate_key             8/28/1989
terminationdate_key           1/1/1900
age                                 52
length_of_service                   17
city_name                    Vancouver
department_name              Executive
job_title                          CEO
store_name                          35
gender_short                         M
gender_full                       Male
termreason_desc         Not Applicable
termtype_desc           Not Applicable
STATUS_YEAR                       2006
STATUS                          ACTIVE
BUSINESS_UNIT               HEADOFFICE
Name: 0, dtype: object

In [27]:
# getting all values in the column using iloc
df.iloc[:,0]

0        1318
1        1318
2        1318
3        1318
4        1318
         ... 
49648    8258
49649    8264
49650    8279
49651    8296
49652    8321
Name: EmployeeID, Length: 49653, dtype: int64

In [28]:
# getting a specific number of rows for the age column
df.iloc[:3,0]

0    1318
1    1318
2    1318
Name: EmployeeID, dtype: int64

In [31]:
# getting a specific number of rows by passing a list
df.iloc[[1,2,3],0]

1    1318
2    1318
3    1318
Name: EmployeeID, dtype: int64

In [None]:
# using labels and loc to access columns

In [33]:
df.loc[0,'age']

52

In [34]:
# getting all rows under age using loc
df.loc[:,'age']

0        52
1        53
2        54
3        55
4        56
         ..
49648    21
49649    19
49650    21
49651    19
49652    20
Name: age, Length: 49653, dtype: int64