In [21]:
import pandas as pd
import numpy as np

In [22]:
# Import CSV to Pandas
link ="./global-hourly-2024-01-02T20-32-28.csv"
data = pd.read_csv(link,usecols=["WND","REPORT_TYPE","DATE","TMP"],dtype=str)

In [23]:
# Remove irrelevant reports 
data = data[~data['REPORT_TYPE'].isin(['SOD  ','SOM  '])]

In [24]:
# Split the wind data into seperate collumns
split_values = data['WND'].str.split(',', expand=True)
num_columns = len(split_values.columns)
split_values.columns = ["Direction","Direction_Quality","Type","Speed","Speed_Quality"]
data = pd.concat([data, split_values], axis=1)

In [25]:
# Convert Speed and Direction collumns to ints
data['Direction'] = data['Direction'].astype(int)
data['Speed'] = data['Speed'].astype(int)

In [26]:
# Standardize calm and variable wind
data.loc[data['Type'] == 'C', 'Speed'] = 0
data.loc[data['Type'] == 'C', 'Direction'] = 0

In [27]:
# Remove data flagged as erroneous
data = data[data['Type'].isin(["N","C"])]
data = data[data['Direction_Quality'].isin(['0', '1', '4', '5', '9'])]# Direction quality
data = data[data['Speed_Quality'].isin(['0', '1', '4', '5', '9'])]# Speed quality

In [28]:
# Remove impossibile measurements
data = data[(data['Direction'].between(0, 360))]

In [29]:
#Convert Wind to Vector
windRadians = data["Direction"]*np.pi/180
data['Wx'] = data['Speed']*np.cos(windRadians)
data['Wy'] = data['Speed']*np.sin(windRadians)

In [30]:
# Convert time to seconds
timestamp_s = pd.to_datetime(data['DATE']).map(pd.Timestamp.timestamp)
data['DATE'] = timestamp_s

In [31]:
# Add day-part and year-part to dataframe
day = 24*60*60
year = (365.2425)*day

data['Day sin'] = np.sin(timestamp_s * (2 * np.pi / day))
data['Day cos'] = np.cos(timestamp_s * (2 * np.pi / day))
data['Year sin'] = np.sin(timestamp_s * (2 * np.pi / year))
data['Year cos'] = np.cos(timestamp_s * (2 * np.pi / year))

In [32]:
# Split temp data
split_values = data['TMP'].str.split(',', expand=True)
num_columns = len(split_values.columns)
split_values.columns = ["Temp","Quality"]
data = pd.concat([data, split_values], axis=1)

In [33]:
# Process and clean temp data
data['Temp'] = data['Temp'].astype(int)
data['Temp'] = data['Temp'] / 10
data = data[~data['Quality'].isin(['9'])]

In [34]:
# Remove unnecessary collumns
data.drop(["Direction","Speed","Direction_Quality","Type","Speed_Quality","REPORT_TYPE","WND","TMP","Quality"],axis=1,inplace=True)

In [37]:
# Export data to CSV
data.to_csv("cleanData.csv",index=False)