## New York Weather Data Analysis

### Reading the dataset

In [14]:
import pandas as pd
df = pd.read_csv('ny_weather.csv')
df

Unnamed: 0,EST,Temperature,DewPoint,Humidity,Sea Level PressureIn,VisibilityMiles,WindSpeedMPH,PrecipitationIn,CloudCover,Events,WindDirDegrees
0,01-01-16,38,23,52,30.03,10,8.0,0,5,,281
1,01-02-16,36,18,46,30.02,10,7.0,0,3,,275
2,01-03-16,40,21,47,29.86,10,8.0,0,1,,277
3,01-04-16,25,9,44,30.05,10,9.0,0,3,,345
4,01-05-16,20,-3,41,30.57,10,5.0,0,0,,333
5,01-06-16,33,4,35,30.5,10,4.0,0,0,,259
6,01-07-16,39,11,33,30.28,10,2.0,0,3,,293
7,01-08-16,39,29,64,30.2,10,4.0,0,8,,79
8,01-09-16,44,38,77,30.16,9,8.0,T,8,Rain,76
9,01-10-16,50,46,71,29.59,4,,1.8,7,Rain,109


### Getting the total number of rows and columns in the dataset

In [18]:
rows,columns = df.shape
print(f'No. of rows = {rows} \nNo. of columns = {columns}')

No. of rows = 31 
No. of columns = 11


### Getting a basic picture of the dataset

In [21]:
df.head()

Unnamed: 0,EST,Temperature,DewPoint,Humidity,Sea Level PressureIn,VisibilityMiles,WindSpeedMPH,PrecipitationIn,CloudCover,Events,WindDirDegrees
0,01-01-16,38,23,52,30.03,10,8.0,0,5,,281
1,01-02-16,36,18,46,30.02,10,7.0,0,3,,275
2,01-03-16,40,21,47,29.86,10,8.0,0,1,,277
3,01-04-16,25,9,44,30.05,10,9.0,0,3,,345
4,01-05-16,20,-3,41,30.57,10,5.0,0,0,,333


In [23]:
df.tail()

Unnamed: 0,EST,Temperature,DewPoint,Humidity,Sea Level PressureIn,VisibilityMiles,WindSpeedMPH,PrecipitationIn,CloudCover,Events,WindDirDegrees
26,1/27/2016,41,22,45,30.03,10,7.0,T,3,Rain,311
27,1/28/2016,37,20,51,29.9,10,5.0,0,1,,234
28,1/29/2016,36,21,50,29.58,10,8.0,0,4,,298
29,1/30/2016,34,16,46,30.01,10,7.0,0,0,,257
30,1/31/2016,46,28,52,29.9,10,5.0,0,0,,241


In [26]:
df[1:4]

Unnamed: 0,EST,Temperature,DewPoint,Humidity,Sea Level PressureIn,VisibilityMiles,WindSpeedMPH,PrecipitationIn,CloudCover,Events,WindDirDegrees
1,01-02-16,36,18,46,30.02,10,7.0,0,3,,275
2,01-03-16,40,21,47,29.86,10,8.0,0,1,,277
3,01-04-16,25,9,44,30.05,10,9.0,0,3,,345


### Getting the column names of the dataset

In [28]:
df.columns

Index(['EST', 'Temperature', 'DewPoint', 'Humidity', 'Sea Level PressureIn',
       'VisibilityMiles', 'WindSpeedMPH', 'PrecipitationIn', 'CloudCover',
       'Events', 'WindDirDegrees'],
      dtype='object')

### Getting the temperature column

In [2]:
df['Temperature']

0     38
1     36
2     40
3     25
4     20
5     33
6     39
7     39
8     44
9     50
10    33
11    35
12    26
13    30
14    43
15    47
16    36
17    25
18    22
19    32
20    31
21    26
22    26
23    28
24    34
25    43
26    41
27    37
28    36
29    34
30    46
Name: Temperature, dtype: int64

In [31]:
df.Temperature

0     38
1     36
2     40
3     25
4     20
5     33
6     39
7     39
8     44
9     50
10    33
11    35
12    26
13    30
14    43
15    47
16    36
17    25
18    22
19    32
20    31
21    26
22    26
23    28
24    34
25    43
26    41
27    37
28    36
29    34
30    46
Name: Temperature, dtype: int64

### Getting EST and Temperature columns together

In [35]:
df[['EST','Temperature']]

Unnamed: 0,EST,Temperature
0,01-01-16,38
1,01-02-16,36
2,01-03-16,40
3,01-04-16,25
4,01-05-16,20
5,01-06-16,33
6,01-07-16,39
7,01-08-16,39
8,01-09-16,44
9,01-10-16,50


### Datatypes in Pandas

#### Dataframe datatype

In [32]:
type(df)

pandas.core.frame.DataFrame

#### Series datatype - Each column of dataframe is series datatype

In [33]:
type(df['Temperature'])

pandas.core.series.Series

### Getting maximum Temperature in the temperature column

In [3]:
df['Temperature'].max()

50

### Getting minimum Temperature in the temperature column

In [38]:
df['Temperature'].min()

20

### Getting the dates on which it rained in New York

In [7]:
df['EST'][df['Events']=='Rain']

8      01-09-16
9      01-10-16
15    1/16/2016
26    1/27/2016
Name: EST, dtype: object

## Data Wrangling / Data Munching

### Replacing the NaN values of columns with zero

In [10]:
df.fillna(0,inplace=True)
df['WindSpeedMPH'].mean()

6.225806451612903