In [12]:
import pandas as pd

In [13]:
# Import CSV to Pandas
# link = "https://www.ncei.noaa.gov/access/services/data/v1?dataset=global-hourly&stations=72495723213&dataTypes=DATE,REPORT_TYPE,WND&startDate=2000-01-01&endDate="
# endDate = "2023-12-12"
# link += endDate
link ="./global-hourly-2023-12-29T11-33-21.csv"
data = pd.read_csv(link,usecols=["WND","REPORT_TYPE","DATE"],dtype={"WND": str, "REPORT_TYPE": str, "DATE": str})

In [14]:
# Remove SOD and SOM reports 
data = data[~data['REPORT_TYPE'].isin(['SOD  ','SOM  '])]

In [15]:
# Split the wind data into seperate collumns
split_values = data['WND'].str.split(',', expand=True)
num_columns = len(split_values.columns)
split_values.columns = ["Direction","Direction_Quality","Type","Speed","Speed_Quality"]
data = pd.concat([data, split_values], axis=1)

In [16]:
# Drop redundant collumns
data.drop(["REPORT_TYPE","WND"],axis=1,inplace=True)

In [17]:
# Convert Speed and Direction collumns to ints
data['Direction'] = data['Direction'].astype(int)
data['Speed'] = data['Speed'].astype(int)

In [18]:
# Standardize calm and variable wind
data.loc[data['Type'] == 'C', 'Speed'] = 0
data.loc[data['Type'] == 'C', 'Direction'] = 0
data.loc[data['Type'] == 'V', 'Direction'] = 999

In [19]:
# Remove data flagged as erroneous
data = data[data['Type'].isin(["N","C","V"])]
data = data[data['Direction_Quality'].isin(['0', '1', '4', '5', '9'])]# Direction quality
data = data[data['Speed_Quality'].isin(['0', '1', '4', '5', '9'])]# Speed quality

In [20]:
# Remove impossibile measurements
data = data[(data['Direction'] == 999) | (data['Direction'].between(0, 360))]

In [21]:
# Remove validation collumns
data.drop(["Direction_Quality","Type","Speed_Quality"],axis=1,inplace=True)

In [22]:
data.to_csv("output.csv",index=False)