In [1]:
# If you are going in order re: the lessons, you should have already downloaded the data file!
import numpy as np
import pandas as pd
import chardet    

edata = open('CU_data_July2017_full.csv', 'rb').read()
result = chardet.detect(edata)
encode = result['encoding']

tdata = pd.read_csv('CU_data_July2017_full.csv',index_col='Day',skiprows = 3,skipfooter=14,engine='python',
                      encoding = encode) # 
tdata = tdata.drop(['Time (CST)','Time (CST).1', '4 inch','8 inch','Morning Low','Comments'],axis=1)

tdata = tdata.rename(columns={"Low": "T_low", "High": "T_high","Mean":"T_mean"})

## PANDAS: ACCESSING ELEMENTS: OVERVIEW
- (Albeit important) sidenote: index of DataFrame (for each row) can be: 
    - a measure of time - continuous observations 
    - a categorical string - name, country 
    - an event identifier 
    - other things!
<br><br>
- We won't generally use the NumPy array indexing syntaxes, as that can give us unanticipated results
    - some operations involving accessing elements will assume what you pass into them is the labelled index, and other operations, the positional index- big problem if your Pandas indices are *integers* 
<br><br>
- Instead, use some Pandas-specific indexers:
  - **loc**: references the labelled, *explicit* index
  - **iloc**: references the positional, *implicit* index

## ACCESSING ELEMENTS: USING LABELLED (EXPLICIT) INDEX: LOC


### (1) Syntax
**USAGE**
<br><br>
**Series**: loc[labelled index]
<br><br>
**DataFrame**: loc[labelled row index, labelled column index]

**NOTES**
- labels that are actually numerical are read in as numerical data types, but treated as labels!
<br><br>
- *for slicing*: element associated with upper bound of index you give IS included!

### (2) Examples of accessing elements

In [2]:
# Access all data for the first day of the month
# Our row indices are days of the month, so they are read in as integers, but treated as labels
print(tdata.head())
tdata.loc[1,:] # 1 is interpreted as a LABEL, not as the integer position
               # don't pass it in using quotes

     T_high  T_low  T_mean  Depart  Heating  Cooling  Precipitation  Snowfall  \
Day                                                                             
1      84.0     66    75.0     0.0      0.0     10.0           0.55         0   
2      88.0     61    75.0     0.0      0.0     10.0           0.00         0   
3      89.0     67    78.0     3.0      0.0     13.0           0.00         0   
4      89.0     67    78.0     3.0      0.0     13.0           0.00         0   
5      86.0     70    78.0     3.0      0.0     13.0           0.00         0   

     Snow Depth  
Day              
1             0  
2             0  
3             0  
4             0  
5             0  


T_high           84.00
T_low            66.00
T_mean           75.00
Depart            0.00
Heating           0.00
Cooling          10.00
Precipitation     0.55
Snowfall          0.00
Snow Depth        0.00
Name: 1, dtype: float64

In [3]:
# How about the first three days of the month?
tdata.loc[1:3,:]

Unnamed: 0_level_0,T_high,T_low,T_mean,Depart,Heating,Cooling,Precipitation,Snowfall,Snow Depth
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,84.0,66,75.0,0.0,0.0,10.0,0.55,0,0
2,88.0,61,75.0,0.0,0.0,10.0,0.0,0,0
3,89.0,67,78.0,3.0,0.0,13.0,0.0,0,0


In [4]:
# How about only high and low temperatures for the first, third, and fifth days of the month?
# Can pass LISTS
tdata.loc[[1,3,5],'T_high':'T_low']

Unnamed: 0_level_0,T_high,T_low
Day,Unnamed: 1_level_1,Unnamed: 2_level_1
1,84.0,66
3,89.0,67
5,86.0,70


## ACCESSING ELEMENTS: USING POSITIONAL (IMPLICIT) INDEX: ILOC


### (1) Syntax
**USAGE**
<br><br>
**Series**: iloc[positional index]
<br><br>
**DataFrame**: iloc[positional row index, positional column index]

**NOTES**
  - iloc expects the *implicit* index - the positional integer
<br><br>
  - *for slicing*:  element associated with upper bound of index you give is NOT included!
<br><br>

### (2) Examples of accessing elements

In [5]:
# Access all data for the first day of the month
tdata.iloc[0,:] # 0 is interpreted as the position now

T_high           84.00
T_low            66.00
T_mean           75.00
Depart            0.00
Heating           0.00
Cooling          10.00
Precipitation     0.55
Snowfall          0.00
Snow Depth        0.00
Name: 1, dtype: float64

In [6]:
# How about the first three days of the month?
tdata.iloc[0:3,:] # upper bound is NON-inclusive using iloc!  ALERT!

Unnamed: 0_level_0,T_high,T_low,T_mean,Depart,Heating,Cooling,Precipitation,Snowfall,Snow Depth
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,84.0,66,75.0,0.0,0.0,10.0,0.55,0,0
2,88.0,61,75.0,0.0,0.0,10.0,0.0,0,0
3,89.0,67,78.0,3.0,0.0,13.0,0.0,0,0


In [7]:
# How about only high and low temperatures for the first three days of the month?
tdata.iloc[0:3,0:2]

Unnamed: 0_level_0,T_high,T_low
Day,Unnamed: 1_level_1,Unnamed: 2_level_1
1,84.0,66
2,88.0,61
3,89.0,67


## CHANGING VALUE OF AN ELEMENT
- use *df.at*, which assumes label-based indices
<br><br>
- example:

In [8]:
# Say we know that the high temperature on July 1st was actually 89, not 84
print(tdata.head())
tdata.at[1, 'T_high'] = 89
print(tdata.head())

     T_high  T_low  T_mean  Depart  Heating  Cooling  Precipitation  Snowfall  \
Day                                                                             
1      84.0     66    75.0     0.0      0.0     10.0           0.55         0   
2      88.0     61    75.0     0.0      0.0     10.0           0.00         0   
3      89.0     67    78.0     3.0      0.0     13.0           0.00         0   
4      89.0     67    78.0     3.0      0.0     13.0           0.00         0   
5      86.0     70    78.0     3.0      0.0     13.0           0.00         0   

     Snow Depth  
Day              
1             0  
2             0  
3             0  
4             0  
5             0  
     T_high  T_low  T_mean  Depart  Heating  Cooling  Precipitation  Snowfall  \
Day                                                                             
1      89.0     66    75.0     0.0      0.0     10.0           0.55         0   
2      88.0     61    75.0     0.0      0.0     10.0           