##Deliverables

2(a): To convert the values in the Date column to Datetime values I put two extra parameters in the read_csv function. They are dayFirst = True and parse_dates = True. The first parameter specifies that the date currently has the day as the first part of the date i.e 01/10/2019 could would mean the 10th of January 2019 if this command was not specified. The second parameter instructs pandas to parse the specified column as a pandas Timestamp.

2(b): To create a multi index the set_index command was used with the list of column names to be indexed and also the parameter drop=False which prevents pandas from dropping the columns


Warning Message: /Users/gearoidlacey/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:128: FutureWarning: 'Date' is both a column name and an index level.
Defaulting to column but this will raise an ambiguity error in a future version


Altair requirement: To initialize this nbextension in the browser every time the notebook (or other app) loads:
    
          jupyter nbextension enable vega --py --sys-prefix

In [None]:
import numpy as np
import pandas as pd
from urllib import request
from bokeh.plotting import figure, output_file, show
from bokeh.models import DatetimeTickFormatter,tickers
import altair as alt


def get_data():
    data = request.urlopen("http://paulbarry.itcarlow.ie/weatherdata/weather_reports.csv")
    csv_data = pd.read_csv(data, sep='|', dayfirst=True,  parse_dates=[0])
    return csv_data


def create_multiIndex(data):
    data['Dates'] = data['Date']
    data = data.set_index(["Date","Time"], drop=False)
    return data


def fix_windspeed(data):
    def use_higher(row):
        if  pd.isnull(row['Wind Speed (kts)']):
            row['Wind Speed (kts)'] = ''

        if 'Gust' in row['Wind Speed (kts)'] :
            temp = row['Wind Speed (kts)'].split('Gust')
            if int(temp[0]) > int(temp[1]):
                gust = temp[0]
            else:
                gust = temp[1]

            return gust

    data['Wind Speed (kts)'] = data.apply(use_higher, axis=1)
    data['Wind Speed (kts)'] = data['Wind Speed (kts)'].fillna('0')
    return data


def fill_trace(data):
    def set_trace(row):
        if row['Rain (mm)'] == 'Trace':
            rain = '0.00'
        elif row['Rain (mm)'] == 'n/a':
            rain = '0.00'
        else:
            rain = row['Rain (mm)']

        return rain
        
    data['Rain (mm)'] = data.apply(set_trace, axis=1)

    return data


def fill_dashes(row,items,value, col_names):

    if row[items] == 'n/a':
        row[items] = value
    if row[items] == '--':
        new_value = value
    else:
        new_value = row[items]
    row.fillna(value)
    return new_value


def fill_numeric_blanks(data):
    col_names = ['Wind Speed (kts)','Rain (mm)', 'Pressure (hPa)', 'Temp (◦C)', 'Humidity (%)' ]
    for items in col_names:
        value = '0'
        data[items] = data.apply(fill_dashes, axis=1, args=(items,value, col_names))
        data[items] = data[items].fillna('0')

    return data


def fill_descriptive_blanks(data):
    col_names = ['Date', 'Time', 'Location', 'Wind Direction','Weather','Dates' ]
    for items in col_names:
        value = 'Unknown'
        data[items] = data.apply(fill_dashes, axis=1, args=(items,value, col_names))
    return data


def change_types(data):
    data['Rain (mm)'] = data['Rain (mm)'].astype(float)
    data['Wind Speed (kts)'] = data['Wind Speed (kts)'].astype(int)
    data['Temp (◦C)'] = data['Temp (◦C)'].astype(int)
    data['Humidity (%)'] = data['Humidity (%)'].astype(int)
    data['Pressure (hPa)'] = data['Pressure (hPa)'].astype(int)

    return data


def create_graph1(data):
    graph1_data = data.groupby(['Location'])['Rain (mm)'].mean()
    temp_df = graph1_data.to_frame()
    temp_df.reset_index(level=0, inplace=True)  # index gets converted to a column

    output_file('avg_rainfall.html')
    p = figure(x_range=temp_df['Location'].tolist(), plot_width=1200, plot_height=600)
    p.xaxis[0].axis_label = 'Location'
    p.yaxis[0].axis_label = 'Average Rainfall (mm)'

    p.line(temp_df['Location'].tolist(), temp_df['Rain (mm)'].tolist(), line_width=2)
    p.circle(temp_df['Location'].tolist(), temp_df['Rain (mm)'].tolist(), fill_color="blue", size=8)
    p.xaxis.major_label_orientation = 45
    p.yaxis.major_label_orientation = "vertical"

    show(p)


def create_graph2(data):
    graph1_data = data.groupby(['Location'])['Temp (◦C)'].mean()
    temp_df = graph1_data.to_frame()
    temp_df.reset_index(level=0, inplace=True)  # index gets converted to a column

    output_file('avg_temp.html')
    p = figure(x_range=temp_df['Location'].tolist(), plot_width=1200, plot_height=600)
    p.xaxis[0].axis_label = 'Location'
    p.yaxis[0].axis_label = 'Average Temperature (◦C)'

    p.line(temp_df['Location'].tolist(), temp_df['Temp (◦C)'].tolist(), line_width=2)
    p.circle(temp_df['Location'].tolist(), temp_df['Temp (◦C)'].tolist(), fill_color="red", size=8)
    p.xaxis.major_label_orientation = 45
    p.yaxis.major_label_orientation = "vertical"

    show(p)


def create_graph3(data):
    #how has rainfall rates changed over time

    graph1_data = data.groupby(['Dates'])['Rain (mm)'].mean()
    temp_df = graph1_data.to_frame()
    temp_df.reset_index(level=0, inplace=True)  # index gets converted to a column

    output_file('rain_vs_temp.html')
    p = figure(x_axis_type="datetime", plot_width=1200, plot_height=600)
    p.xaxis[0].axis_label = 'Date'
    p.yaxis[0].axis_label = 'Average Rainfall (mm)'

    p.line(temp_df['Dates'].tolist(), temp_df['Rain (mm)'].tolist(), line_width=2)
    p.xaxis.formatter = DatetimeTickFormatter(days=["%d/%b"])
    p.xaxis[0].ticker.desired_num_ticks = 10
    p.xaxis.major_label_orientation = 45
    p.yaxis.major_label_orientation = "vertical"
    show(p)


def create_windSpeed_bin(data):
    means = data.groupby(['Location'])['Wind Speed (kts)'].mean()
    means = means.to_frame()
    bins = pd.cut(means['Wind Speed (kts)'], 3, labels=['Low', 'Average', 'High'])
    bins = bins.to_frame()

    means.columns.values[0] = 'Average Wind Speed (kts)'

    new_df = pd.concat([means, bins], axis=1, join_axes=[means.index])
    new_df.reset_index(level=0, inplace=True)  # convert index to a column
    new_df

    graph =alt.Chart(new_df).mark_bar().encode(
        x='Location',
        y='Average Wind Speed (kts)',
        color='Wind Speed (kts)'
    )
    graph.display()


def create_bin_graphs(data):
    columns = ['Rain (mm)', 'Temp (◦C)', 'Humidity (%)']
    filenumber = 1
    for items in columns:
        means = data.groupby(['Location'])[items].mean()
        means = means.to_frame()
        bins = pd.cut(means[items], 3, labels=['Low', 'Average', 'High'])
        bins = bins.to_frame()

        means.columns.values[0] = 'Average ' + items

        new_df = pd.concat([means, bins], axis=1, join_axes=[means.index])
        new_df.reset_index(level=0, inplace=True)
        new_df

        graph = alt.Chart(new_df).mark_bar().encode(
            x='Location',
            y=items,
            color=means.columns.values[0]
        )
        graph.display()

data = get_data()
data = create_multiIndex(data)
data = fix_windspeed(data)
data = fill_trace(data)
data = fill_numeric_blanks(data)
data = change_types(data)
create_graph1(data)
create_graph2(data)
create_graph3(data)
create_windSpeed_bin(data)
create_bin_graphs(data)


In [None]:
a = data.groupby(['Location'])['Wind Speed (kts)'].mean()
a = a.to_frame()
a
b = pd.cut(a['Wind Speed (kts)'], 3, labels=['Low','Average','High'])
b.to_frame()


In [None]:
df = data.groupby(['Location'])['Wind Speed (kts)'].mean()
df= df.to_frame()
df.columns.values[0] = 'Max Wind Speed (kts)'

temp_df = b.to_frame()
new_df = pd.concat([df,temp_df],axis=1,join_axes=[df.index])
new_df.reset_index(level=0, inplace=True)

In [None]:
#new_df['Wind Speed (kts)'].values[1]
#new_df.reset_index(level=0, inplace=True)

output_file('bin1.html')
p = figure(x_range=new_df['Wind Speed (kts)'].tolist(), plot_width=900, plot_height=400)
p.xaxis[0].axis_label = 'Overall Wind Speed Description'
p.yaxis[0].axis_label = 'Max Wind speed per Location'

p.line(new_df['Wind Speed (kts)'],new_df['Max Wind Speed (kts)'], line_width=2)
p.circle(new_df['Wind Speed (kts)'].tolist(),new_df['Max Wind Speed (kts)'].tolist(), fill_color="red", size=8)

show(p)

In [None]:
#new_df['Wind Speed (kts)'].values[1]
#new_df.reset_index(level=0, inplace=True)

output_file('bin1.html')
p = figure(x_range=new_df['Wind Speed (kts)'].tolist(), plot_width=900, plot_height=400)
p.xaxis[0].axis_label = 'Overall Wind Speed Description'
p.yaxis[0].axis_label = 'Max Wind speed per Location'

p.line(new_df['Max Wind Speed (kts)'].tolist(), new_df['Wind Speed (kts)'].tolist(), line_width=2)
p.circle(new_df['Max Wind Speed (kts)'].tolist(), new_df['Wind Speed (kts)'].tolist(), fill_color="red", size=8)

show(p)
print('1' ,len(new_df['Wind Speed (kts)']))
print('2' ,len(new_df['Max Wind Speed (kts)']))


In [None]:
import matplotlib.pyplot as plt
a = set(new_df['Wind Speed (kts)'])
a = list(a)

#colours = {'red':a[0], 'blue':a[1], 'green':a[2]}
colours = new_df['Max Wind Speed (kts)']
#new_df.reset_index(level=0, inplace=True)
#plt.plot(new_df['Wind Speed (kts)'].tolist(), new_df['Max Wind Speed (kts)'].tolist())
plt.subplots(figsize=(60, 10))
#plt.plot(figsize=(1900, 3100),sharey=False)
plt.bar(new_df['Location'].tolist(), new_df['Max Wind Speed (kts)'].tolist(),color=colours)
plt.show()

In [None]:
d = data.loc[data['Location'] == 'PHOENIX PARK(A)']
d['Wind Speed (kts)'].max()

In [None]:
d

In [None]:
import ggplot

In [None]:
new_df.reset_index(level=0, inplace=True)
g = ggplot(new_df, aes(new_df['Location'].tolist(), new_df['Max Wind Speed (kts)'].tolist()), fill=new_df['Wind Speed (kts)'])

In [None]:
import plotnine
(ggplot(new_df, aes(new_df['Location'].tolist(), new_df['Max Wind Speed (kts)'].tolist(), color=new_df['Wind Speed (kts)']))
 + geom_point())

In [None]:
a = set(new_df['Wind Speed (kts)'])
a
colours = {'red':a[0], 'blue':a[1], 'green':a[2]}

In [None]:
import altair as alt
alt.Chart(new_df).mark_bar().encode(
    x='Location',
    y='Max Wind Speed (kts)',
    color= 'Wind Speed (kts)'
)

In [None]:
#df = data.groupby(['Location'])['Wind Speed (kts)'].mean()
#df= df.to_frame()
#df.columns.values[0] = 'Max Wind Speed (kts)'

#temp_df = b.to_frame()
#new_df = pd.concat([df,temp_df],axis=1,join_axes=[df.index])
#new_df.reset_index(level=0, inplace=True)


###############
means = data.groupby(['Location'])['Wind Speed (kts)'].mean()
means = means.to_frame()
bins = pd.cut(means['Wind Speed (kts)'], 3, labels=['Low', 'Average', 'High'])
bins = bins.to_frame()

#df = data.groupby(['Location'])['Wind Speed (kts)'].mean()
#df = df.to_frame()

means.columns.values[0] = 'Max Wind Speed (kts)'
#means.reset_index(level=0, inplace=True)
#bins.reset_index(level=0, inplace=True)

#temp_df = bins.to_frame()
new_df = pd.concat([means, bins], axis=1, join_axes=[means.index])
new_df.reset_index(level=0, inplace=True)
new_df
alt.Chart(new_df).mark_bar().encode(
    x='Location',
    y='Max Wind Speed (kts)',
    color='Wind Speed (kts)'
)

In [None]:
bins

In [None]:
new_df['Max Wind Speed (kts)']

In [None]:
columns = ['Wind Speed (kts)','Rain (mm)', 'Temp (◦C)', 'Humidity (%)']
filenumber=1
for items in columns:
    means = data.groupby(['Location'])[items].mean()
    means = means.to_frame()
    bins = pd.cut(means[items], 3, labels=['Low', 'Moderate', 'High'])
    bins = bins.to_frame()

    bracket = items.index('(')
    legend = items[:bracket]

    bins.columns.values[0] = 'Average ' + legend +'Category '
    means.columns.values[0] = 'Average ' +items + ' Per Station'
 
    new_df = pd.concat([means, bins], axis=1, join_axes=[means.index])
    new_df.reset_index(level=0, inplace=True)
    new_df
    
    graph = alt.Chart(new_df).mark_bar().encode(
        x='Location',
        y= means.columns.values[0] ,
        color= bins.columns.values[0]
    )
    graph.display()
    
    
    

In [None]:
output_file('bin1.html')
p = figure(x_range=new_df['Wind Speed (kts)'].tolist(), plot_width=900, plot_height=400)
p.xaxis[0].axis_label = 'Overall Wind Speed Description'
p.yaxis[0].axis_label = 'Max Wind speed per Location'

p.line(new_df['Max Wind Speed (kts)'].tolist(), new_df['Wind Speed (kts)'].tolist(), line_width=2)
p.circle(new_df['Max Wind Speed (kts)'].tolist(), new_df['Wind Speed (kts)'].tolist(), fill_color="red", size=8)

show(p)


In [None]:
from flask import Flask

In [28]:
import numpy as np
import pandas as pd
from urllib import request as r
from bokeh.plotting import figure, output_file, show
from bokeh.models import DatetimeTickFormatter, tickers
from bokeh.embed import components
from bokeh.io import output_notebook
import altair as alt
from flask import Flask, render_template,request



def get_data():
    data = r.urlopen("http://paulbarry.itcarlow.ie/weatherdata/weather_reports.csv")
    csv_data = pd.read_csv(data, sep='|', dayfirst=True, parse_dates=[0],encoding='utf-8')
    return csv_data


def create_multiIndex(data):
    data['Dates'] = data['Date']
    data = data.set_index(["Date", "Time"], drop=False)
    return data


def fix_windspeed(data):
    def use_higher(row):
        if pd.isnull(row['Wind Speed (kts)']):
            row['Wind Speed (kts)'] = ''

        if 'Gust' in row['Wind Speed (kts)']:
            temp = row['Wind Speed (kts)'].split('Gust')
            if int(temp[0]) > int(temp[1]):
                gust = temp[0]
            else:
                gust = temp[1]

            return gust

    data['Wind Speed (kts)'] = data.apply(use_higher, axis=1)
    data['Wind Speed (kts)'] = data['Wind Speed (kts)'].fillna('0')
    return data


def fill_trace(data):
    def set_trace(row):
        if row['Rain (mm)'] == 'Trace':
            rain = '0.00'
        elif row['Rain (mm)'] == 'n/a':
            rain = '0.00'
        else:
            rain = row['Rain (mm)']

        return rain

    data['Rain (mm)'] = data.apply(set_trace, axis=1)

    return data


def fill_dashes(row, items, value, col_names):
    if row[items] == 'n/a':
        new_value = value
    if row[items] == "--":
        new_value = value
    if row[items] == 'CALM':
        new_value = value
    else:
        if row[items] == "--":
            row[items] = value
        
        new_value = row[items]
    
    return new_value


def fill_numeric_blanks(data):
    col_names = ['Wind Speed (kts)', 'Rain (mm)', 'Pressure (hPa)', 'Temp (◦C)', 'Humidity (%)']
    for items in col_names:
        value = '-1'
        data[items] = data.apply(fill_dashes, axis=1, args=(items, value, col_names))
        data[items] = data[items].fillna('0')

    return data


def fill_descriptive_blanks(data):
    col_names = ['Date', 'Time', 'Location', 'Wind Direction', 'Weather', 'Dates']
    for items in col_names:
        value = 'Unknown'
        data[items] = data.apply(fill_dashes, axis=1, args=(items, value, col_names))
    return data


def change_types(data):
    data['Rain (mm)'] = data['Rain (mm)'].astype(float)
    data['Wind Speed (kts)'] = data['Wind Speed (kts)'].astype(int)
    data['Temp (◦C)'] = data['Temp (◦C)'].astype(int)
    data['Humidity (%)'] = data['Humidity (%)'].astype(int)
    data['Pressure (hPa)'] = data['Pressure (hPa)'].astype(int)
    
    col_names = ['Wind Speed (kts)', 'Rain (mm)', 'Pressure (hPa)', 'Temp (◦C)', 'Humidity (%)']
    for items in col_names:
        temp = data.loc[data[items] != -1]
        data[items] = data[items].fillna(temp.mean())

    return data


def create_graph1(data):
    graph1_data = data.groupby(['Location'])['Rain (mm)'].mean()
    temp_df = graph1_data.to_frame()
    temp_df.reset_index(level=0, inplace=True)  # index gets converted to a column

    #output_file('avg_rainfall.html')
    p = figure(x_range=temp_df['Location'].tolist(), plot_width=1200, plot_height=600)
    p.xaxis[0].axis_label = 'Location'
    p.yaxis[0].axis_label = 'Average Rainfall (mm)'

    p.line(temp_df['Location'].tolist(), temp_df['Rain (mm)'].tolist(), line_width=2)
    p.circle(temp_df['Location'].tolist(), temp_df['Rain (mm)'].tolist(), fill_color="red", size=8)
    p.xaxis.major_label_orientation = 45
    p.yaxis.major_label_orientation = "vertical"
    return p


def create_graph2(data):
    graph1_data = data.groupby(['Location'])['Temp (◦C)'].mean()
    temp_df = graph1_data.to_frame()
    temp_df.reset_index(level=0, inplace=True)  # index gets converted to a column

    #output_file('templates/avg_temp.html')
    p = figure(x_range=temp_df['Location'].tolist(), plot_width=1200, plot_height=600)
    p.xaxis[0].axis_label = 'Location'
    p.yaxis[0].axis_label = 'Average Temperature (◦C)'

    p.line(temp_df['Location'].tolist(), temp_df['Temp (◦C)'].tolist(), line_width=2)
    p.circle(temp_df['Location'].tolist(), temp_df['Temp (◦C)'].tolist(), fill_color="red", size=8)
    p.xaxis.major_label_orientation = 45
    p.yaxis.major_label_orientation = "vertical"
    return p


def create_graph3(data):
    # how has rainfall rates changed over time

    graph1_data = data.groupby(['Wind Direction'])['Wind Speed (kts)'].mean()
    temp_df = graph1_data.to_frame()
    temp_df.reset_index(level=0, inplace=True)  # index gets converted to a column

    #output_file('rain_vs_temp.html')
    p = figure(x_range=temp_df['Wind Direction'].tolist(), plot_width=1200, plot_height=600)
    p.xaxis[0].axis_label = 'Wind Direction'
    p.yaxis[0].axis_label = 'Wind Speed (kts)'

    p.line(temp_df['Wind Direction'].tolist(), temp_df['Wind Speed (kts)'].tolist(), line_width=2)
    p.circle(temp_df['Wind Direction'].tolist(), temp_df['Wind Speed (kts)'].tolist(), fill_color="red", size=8)
    p.xaxis.major_label_orientation = 45
    p.yaxis.major_label_orientation = "vertical"
    return p


def create_bins(data):
    columns = ['Wind Speed (kts)', 'Rain (mm)', 'Temp (◦C)', 'Humidity (%)']
    filenumber = 1
    for items in columns:
        means = data.groupby(['Location'])[items].mean()
        means = means.to_frame()
        bins = pd.cut(means[items], 3, labels=['Low', 'Moderate', 'High'])
        bins = bins.to_frame()

        bracket = items.index('(')
        legend = items[:bracket]

        bins.columns.values[0] = 'Average ' + legend + 'Category '
        means.columns.values[0] = 'Average ' + items + ' Per Station'

        new_df = pd.concat([means, bins], axis=1, join_axes=[means.index])
        new_df.reset_index(level=0, inplace=True)
        new_df

        graph = alt.Chart(new_df).mark_bar().encode(
            x='Location',
            y=means.columns.values[0],
            color=bins.columns.values[0]
        )
        graph.display()


data = get_data()
data = create_multiIndex(data)
data = fix_windspeed(data)
data = fill_trace(data)
data = fill_numeric_blanks(data)
data = fill_descriptive_blanks(data)
data = change_types(data)
output_notebook()



In [None]:
plot = create_graph1(data)
show(plot)

In [None]:
plot2 = create_graph2(data)
show(plot2)

In [24]:
plot3 = create_graph3(data)
show(plot3)

In [None]:
create_bins(data)

In [27]:
temp = data.loc[data['Temp (◦C)'] == int('n/a') ]
temp

ValueError: invalid literal for int() with base 10: 'n/a'

In [None]:
temp['Rain (mm)'].mean()

In [None]:
temp['Rain (mm)']

In [None]:
TEMP = data.loc[data['Wind Direction'] == '--']   #--
TEMP