### Without Pandas

In [None]:
# This program shows that without using pandas getting answers on below questions is hard and not convinient
# (1) Max temperature in New York in month of January
# (2) List of days when it rained
# (3) Average speed of wind in month of january
__parsed_rows = []

def parse_csv():
    import csv
    __file_path = "nyc_weather.csv"
    __index = {
        'date': 0,
        'temperature': 1,
        'DewPoint': 2,
        'Humidity': 3,
        'Sea_Level_PressureIn': 4,
        'VisibilityMiles': 5,
        'WindSpeedMPH': 6,
        'PrecipitationIn': 7,
        'CloudCover': 8,
        'Events' : 9,
        'WindDirDegrees': 10
    }

    global __parsed_rows
    with open(__file_path, "r") as f:
        reader = csv.reader(f)
        next(reader, None)
        for row in reader:
            __parsed_rows.append({
                'date':  row[__index['date']],
                'temperature': row[__index['temperature']],
                'DewPoint': row[__index['DewPoint']],
                'Humidity': row[__index['Humidity']],
                'Sea_Level_PressureIn': row[__index['Sea_Level_PressureIn']],
                'VisibilityMiles': row[__index['VisibilityMiles']],
                'WindSpeedMPH': row[__index['WindSpeedMPH']],
                'PrecipitationIn': row[__index['PrecipitationIn']],
                'CloudCover': row[__index['CloudCover']],
                'Events': row[__index['Events']],
                'WindDirDegrees': row[__index['WindDirDegrees']]
            })


def get_days_for_event(event_name):
    days = []
    for row in __parsed_rows:
        if row['Events'] == event_name:
            days.append(row['date'])
    return days

def get_max_temperature():
    max_temp = 0
    for row in __parsed_rows:
        if int(row['temperature']) > max_temp:
            max_temp = int(row['temperature'])
    return max_temp

def get_average_wind_speed():
    total = 0
    count = 0
    for row in __parsed_rows:
        speed = 0 if row['WindSpeedMPH']=='' else int(row['WindSpeedMPH'])
        total += speed
        count+=1
    return total/count

if __name__=="__main__":
    parse_csv()

    print("Max temperature is: ",get_max_temperature())
    print ("Days of rain: ", get_days_for_event('Rain'))
    print("Average wind speed is: ", get_average_wind_speed())

### With Pandas

#### Import modules

In [None]:
#import pandas module
import pandas as pd

#create data frame
df = pd.read_csv("nyc_weather.csv") 

#display data frame
df

#### output of same use case with pandas

In [None]:
#display the maximum of the 'Temperature' Column
#df['Temperature'].max()

print("Maximum temperature is",df['Temperature'].max())


In [None]:
#Display the dates where the events is set to 'Rain'
print("Days of Rain:")
df['EST'][df['Events']=='Rain'] 

In [None]:
#Display the mean of 'WindSpeedMPH' Column
df['WindSpeedMPH'].mean() 

#alternate way
#df.WindSpeedMPH.mean() 

In [None]:
#fill the empty cells with 0's
df.fillna(0, inplace=True)
df

In [None]:
#displaying the correct value
df['WindSpeedMPH'].mean()

#### Some useful Functions

In [None]:
#Returns the first five rows
df.head()

In [None]:
#Returns first 10 rows
df.head(10)

In [None]:
#Returns the last five rows
df.tail()

In [None]:
#Return a tuple representing the rows and columns of the DataFrame
df.shape

In [None]:
#Drop column Humidity. 0 for rows, 1 for columns
df.drop('Humidity',1).head()

In [None]:
# Provided the mean value of all the items in the column.
df['WindSpeedMPH'].mean() 

#df.WindSpeedMPH.mean() # alternate way

In [None]:
#Miniumum value of 'Temperature Column
df['Temperature'].min()

In [None]:
#Sum of all the values of Temperature Column
df['Temperature'].sum()

In [None]:
#Returns some stats like count, min, percentiles on the data Frame
df.describe()

In [None]:
#convert the data file to csv file
df.to_csv('new_file.csv')

In [None]:
#Remove the index from the csv file
df.to_csv('new_file.csv', index=False)

In [None]:
#Header = False will not include the column names in the csv file
df.to_csv('new_file.csv',header=False)

#### Some additional resources
https://pandas.pydata.org/pandas-docs/stable/reference/frame.html

https://www.dataquest.io/blog/pandas-cheat-sheet/