# Pandas Revision

### 1.1 Import a csv file

In [62]:
import pandas as pd
df = pd.read_csv('nyc_weather.csv')
df.head()

# what if you want to show more data ?

df.head(4)

Unnamed: 0,EST,Temperature,DewPoint,Humidity,Sea Level PressureIn,VisibilityMiles,WindSpeedMPH,PrecipitationIn,CloudCover,Events,WindDirDegrees
0,1/1/2016,38,23,52,30.03,10,8.0,0,5,,281
1,1/2/2016,36,18,46,30.02,10,7.0,0,3,,275
2,1/3/2016,40,21,47,29.86,10,8.0,0,1,,277
3,1/4/2016,25,9,44,30.05,10,9.0,0,3,,345


### 1.1 Selecting a Column

In [63]:
df['Temperature']

# what if you want to select two columns ?

0     38
1     36
2     40
3     25
4     20
5     33
6     39
7     39
8     44
9     50
10    33
11    35
12    26
13    30
14    43
15    47
16    36
17    25
18    22
19    32
20    31
21    26
22    26
23    28
24    34
25    43
26    41
27    37
28    36
29    34
30    46
Name: Temperature, dtype: int64

### 1.2 Change index 

In [64]:
df = df.set_index(df['EST']) 

# why inplace = true ?

df.head()

Unnamed: 0_level_0,EST,Temperature,DewPoint,Humidity,Sea Level PressureIn,VisibilityMiles,WindSpeedMPH,PrecipitationIn,CloudCover,Events,WindDirDegrees
EST,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1/1/2016,1/1/2016,38,23,52,30.03,10,8.0,0,5,,281
1/2/2016,1/2/2016,36,18,46,30.02,10,7.0,0,3,,275
1/3/2016,1/3/2016,40,21,47,29.86,10,8.0,0,1,,277
1/4/2016,1/4/2016,25,9,44,30.05,10,9.0,0,3,,345
1/5/2016,1/5/2016,20,-3,41,30.57,10,5.0,0,0,,333


### 1.3 Drop a Column

In [65]:
df.drop('EST',axis = 1,inplace=True) 

#  axis => (0 or ‘index’) for rows , (1 or ‘columns’) for columns

df.head()

Unnamed: 0_level_0,Temperature,DewPoint,Humidity,Sea Level PressureIn,VisibilityMiles,WindSpeedMPH,PrecipitationIn,CloudCover,Events,WindDirDegrees
EST,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1/1/2016,38,23,52,30.03,10,8.0,0,5,,281
1/2/2016,36,18,46,30.02,10,7.0,0,3,,275
1/3/2016,40,21,47,29.86,10,8.0,0,1,,277
1/4/2016,25,9,44,30.05,10,9.0,0,3,,345
1/5/2016,20,-3,41,30.57,10,5.0,0,0,,333


### 1.3 Selecting rows

In [66]:
df.iloc[0]

Temperature                38
DewPoint                   23
Humidity                   52
Sea Level PressureIn    30.03
VisibilityMiles            10
WindSpeedMPH                8
PrecipitationIn             0
CloudCover                  5
Events                    NaN
WindDirDegrees            281
Name: 1/1/2016, dtype: object

In [67]:
df.loc['1/1/2016']

Temperature                38
DewPoint                   23
Humidity                   52
Sea Level PressureIn    30.03
VisibilityMiles            10
WindSpeedMPH                8
PrecipitationIn             0
CloudCover                  5
Events                    NaN
WindDirDegrees            281
Name: 1/1/2016, dtype: object

### 1.4 Boolean Masking

In [68]:
df['Temperature'] > 40

EST
1/1/2016     False
1/2/2016     False
1/3/2016     False
1/4/2016     False
1/5/2016     False
1/6/2016     False
1/7/2016     False
1/8/2016     False
1/9/2016      True
1/10/2016     True
1/11/2016    False
1/12/2016    False
1/13/2016    False
1/14/2016    False
1/15/2016     True
1/16/2016     True
1/17/2016    False
1/18/2016    False
1/19/2016    False
1/20/2016    False
1/21/2016    False
1/22/2016    False
1/23/2016    False
1/24/2016    False
1/25/2016    False
1/26/2016     True
1/27/2016     True
1/28/2016    False
1/29/2016    False
1/30/2016    False
1/31/2016     True
Name: Temperature, dtype: bool

In [69]:
# Get All Days With DewPoint > 10

df[df['Temperature'] > 40] 

Unnamed: 0_level_0,Temperature,DewPoint,Humidity,Sea Level PressureIn,VisibilityMiles,WindSpeedMPH,PrecipitationIn,CloudCover,Events,WindDirDegrees
EST,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1/9/2016,44,38,77,30.16,9,8.0,T,8,Rain,76
1/10/2016,50,46,71,29.59,4,,1.8,7,Rain,109
1/15/2016,43,31,62,29.82,9,5.0,T,2,,101
1/16/2016,47,37,70,29.52,8,7.0,0.24,7,Rain,340
1/26/2016,43,29,56,30.03,10,7.0,0,2,,244
1/27/2016,41,22,45,30.03,10,7.0,T,3,Rain,311
1/31/2016,46,28,52,29.9,10,5.0,0,0,,241


### 1.5 Statistical Functions 

In [70]:
print(df['VisibilityMiles'].max())
print(df['VisibilityMiles'].mean())

# what if I want all data for temp > median ?
print(df["Temperature"].median())

print(df[df['Temperature'] > df["Temperature"].median()] )

10
9.193548387096774
35.0
           Temperature  DewPoint  Humidity  Sea Level PressureIn  \
EST                                                                
1/1/2016            38        23        52                 30.03   
1/2/2016            36        18        46                 30.02   
1/3/2016            40        21        47                 29.86   
1/7/2016            39        11        33                 30.28   
1/8/2016            39        29        64                 30.20   
1/9/2016            44        38        77                 30.16   
1/10/2016           50        46        71                 29.59   
1/15/2016           43        31        62                 29.82   
1/16/2016           47        37        70                 29.52   
1/17/2016           36        23        66                 29.78   
1/26/2016           43        29        56                 30.03   
1/27/2016           41        22        45                 30.03   
1/28/2016           37

## Exercise

### 2.1 Write a function that returns the inforamtion of the day that have the max temperature

In [29]:
# Write Your Code Here

### 2.2 Write a funtion that returns the date of Raining days

In [11]:
# Write Your Code Here

### 2.3 Write a function that returns the day that its temperature is above the average

In [12]:
# Write Your Code Here