##### imports

In [18]:
import numpy as np
import pandas as pd

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

In [9]:
df = pd.read_csv('meteologica.txt', delimiter=';')
df.head(3)

Unnamed: 0,Observacion,U,V,T
0,512,13,-34,2873
1,726,17,-21,2821
2,730,10,-21,2809


# Data Preprocessing

In [10]:
def add_wind_speed(df):
    return np.sqrt((df['U'] / 20) ** 2 + (df['V'] / 10) ** 2)

def wind_radians_degrees(df):
    radians = np.arctan2(df['U'], df['V'])
    degrees = (np.degrees(radians) + 360) % 360
    return degrees

def direction_sector(df):
    if df['Direction (degrees)'] >= 315 or df['Direction (degrees)'] < 45:
        return 'North'
    elif df['Direction (degrees)'] >= 45 and df['Direction (degrees)'] < 135:
        return 'East'
    elif df['Direction (degrees)'] >= 135 and df['Direction (degrees)'] < 225:
        return 'South'
    else:
        return 'West'
    
def preprocessing(df):
    df['Speed (m/ps)'] = df.apply(add_wind_speed, axis=1)
    df['Direction (degrees)'] = df.apply(wind_radians_degrees, axis=1)
    df['Direction (sector)'] = df.apply(direction_sector, axis=1)
    north_df = df[df['Direction (sector)'] == 'North']
    east_df = df[df['Direction (sector)'] == 'East']
    south_df = df[df['Direction (sector)'] == 'South']
    west_df = df[df['Direction (sector)'] == 'West']
    
    return north_df, east_df, south_df, west_df
    

In [11]:
north_df, east_df, south_df, west_df = preprocessing(df)

# Linear Regression - North Sector

In [13]:
X_n = north_df[['Speed (m/ps)']]
y_n = north_df['Observacion']

X_train_n, X_test_n, y_train_n, y_test_n = train_test_split(X_n, 
                                                            y_n, 
                                                            test_size=0.2,
                                                            random_state=42
                                                           )

In [35]:
lin_reg_n = LinearRegression()
lin_reg_n.fit(X_train_n, y_train_n)

y_intercept_n = round(lin_reg_n.intercept_, 2)
coef_n = round(lin_reg_n.coef_[0], 2)

display(f'Y-Intercept for north sector: {y_intercept_n}')
display(f'Coefficient for north sector: {coef_n}')

'Y-Intercept for north sector: -15.06'

'Coefficient for north sector: 95.2'

In [31]:
y_pred_n = lin_reg_n.predict(X_test_n)

mse_n = round(mean_squared_error(y_test_n, y_pred_n), 2)
mae_n = round(np.mean(np.abs(y_test_n - y_pred_n)), 2)
mse_n_percentage = round((mse_n / (y_test_n.mean() ** 2)) * 100, 2)
mae_n_percentage = round((mae_n / y_test_n.mean()) * 100, 2)

display(f'MAE as a precentage: {mae_n_percentage}')
display(f'MSE as a precentage: {mse_n_percentage}')

'MAE as a precentage: 54.55'

'MSE as a precentage: 52.84'