In [1]:
import pandas as pd
import numpy as np

In [2]:
data = pd.read_csv("./data/augmented_train.csv", index_col=0).reset_index(drop=True)
data.head()

Unnamed: 0,TMQ,U850,V850,UBOT,VBOT,QREFHT,PS,PSL,T200,T500,...,TS,TREFHT,Z1000,Z200,ZBOT,Label,LOCATION,YEAR,MONTH,DAY
0,49.687851,-6.740328,-0.229605,-7.50403,-2.30124,0.019844,101345.5625,101347.6328,218.55455,268.150818,...,302.358093,301.793884,119.566132,12439.52441,66.690376,0,3,2001,7,12
1,32.432846,11.944582,7.3621,3.239414,12.670375,0.020695,97944.415473,100941.730367,220.562507,266.031227,...,303.098166,301.233936,66.253009,12387.744103,66.45426,1,4,2006,10,24
2,21.478655,-5.751558,1.967963,-10.633558,5.244116,0.007956,102631.2031,102631.2031,214.236053,256.602936,...,292.071594,289.385406,219.865478,12018.46777,63.401463,0,0,2002,11,9
3,27.546276,0.887099,3.154071,0.529584,-0.829196,0.012604,101903.6953,101903.6953,215.129959,265.565369,...,295.798981,294.09726,163.254562,12283.79785,64.725082,2,2,2008,7,2
4,15.849665,11.807032,6.403656,10.211574,6.852407,0.008851,101348.375,101348.375,211.818268,257.652802,...,293.132507,291.138214,114.518639,11963.05957,63.839413,0,0,1998,6,10


## Ajout d'un attribut "SOUTHERN_HEMISPHERE"

Nous avons obtenus au notebook *oversample.ipynb*:

In [3]:
import pandas as pd

temp = {
    'LAT': [-30.743155, -20.886571, -24.641460, 22.998696, 13.142112, 22.059974],
    'LON': [353.750000, 229.687500, 241.875000, 276.666667, 253.125000, 244.375000]
}
df = pd.DataFrame(temp)
df.index.name = 'LOCATION'
df

Unnamed: 0_level_0,LAT,LON
LOCATION,Unnamed: 1_level_1,Unnamed: 2_level_1
0,-30.743155,353.75
1,-20.886571,229.6875
2,-24.64146,241.875
3,22.998696,276.666667
4,13.142112,253.125
5,22.059974,244.375


Ainsi, les "LOCATION" inférieure ou égale à 2 font partie de l'hémisphère sud

In [4]:
data["SOUTHERN_HEMISPHERE"] = data["LOCATION"] <= 2
data.head()

Unnamed: 0,TMQ,U850,V850,UBOT,VBOT,QREFHT,PS,PSL,T200,T500,...,TREFHT,Z1000,Z200,ZBOT,Label,LOCATION,YEAR,MONTH,DAY,SOUTHERN_HEMISPHERE
0,49.687851,-6.740328,-0.229605,-7.50403,-2.30124,0.019844,101345.5625,101347.6328,218.55455,268.150818,...,301.793884,119.566132,12439.52441,66.690376,0,3,2001,7,12,False
1,32.432846,11.944582,7.3621,3.239414,12.670375,0.020695,97944.415473,100941.730367,220.562507,266.031227,...,301.233936,66.253009,12387.744103,66.45426,1,4,2006,10,24,False
2,21.478655,-5.751558,1.967963,-10.633558,5.244116,0.007956,102631.2031,102631.2031,214.236053,256.602936,...,289.385406,219.865478,12018.46777,63.401463,0,0,2002,11,9,True
3,27.546276,0.887099,3.154071,0.529584,-0.829196,0.012604,101903.6953,101903.6953,215.129959,265.565369,...,294.09726,163.254562,12283.79785,64.725082,2,2,2008,7,2,True
4,15.849665,11.807032,6.403656,10.211574,6.852407,0.008851,101348.375,101348.375,211.818268,257.652802,...,291.138214,114.518639,11963.05957,63.839413,0,0,1998,6,10,True


## Transformation cyclique sur MONTH

En ce moment, les mois de l'année Janvier et Décembre sont traités comme étant deux extrême opposés. J'implémente ici une représentation cyclique des mois, et ainsi du temps, afin de garder la proximité temporelle entre les mois. Ainsi, le mois de décembre reste "près" du mois de janvier pour le modèle.

In [5]:
radians = (data['MONTH'] - 1) * (2 * np.pi / 12)

# Create sine and cosine columns
data['SIN_MONTH'] = np.sin(radians)
data['COS_MONTH'] = np.cos(radians)

data = data.drop(columns=["MONTH"])
data.head()

Unnamed: 0,TMQ,U850,V850,UBOT,VBOT,QREFHT,PS,PSL,T200,T500,...,Z1000,Z200,ZBOT,Label,LOCATION,YEAR,DAY,SOUTHERN_HEMISPHERE,SIN_MONTH,COS_MONTH
0,49.687851,-6.740328,-0.229605,-7.50403,-2.30124,0.019844,101345.5625,101347.6328,218.55455,268.150818,...,119.566132,12439.52441,66.690376,0,3,2001,12,False,1.224647e-16,-1.0
1,32.432846,11.944582,7.3621,3.239414,12.670375,0.020695,97944.415473,100941.730367,220.562507,266.031227,...,66.253009,12387.744103,66.45426,1,4,2006,24,False,-1.0,-1.83697e-16
2,21.478655,-5.751558,1.967963,-10.633558,5.244116,0.007956,102631.2031,102631.2031,214.236053,256.602936,...,219.865478,12018.46777,63.401463,0,0,2002,9,True,-0.8660254,0.5
3,27.546276,0.887099,3.154071,0.529584,-0.829196,0.012604,101903.6953,101903.6953,215.129959,265.565369,...,163.254562,12283.79785,64.725082,2,2,2008,2,True,1.224647e-16,-1.0
4,15.849665,11.807032,6.403656,10.211574,6.852407,0.008851,101348.375,101348.375,211.818268,257.652802,...,114.518639,11963.05957,63.839413,0,0,1998,10,True,0.5,-0.8660254


## Ajout de la magnitude et orientation des vents

In [16]:
# Magnitude
data["WIND850_MAGNITUDE"] = np.sqrt(data["U850"]**2 + data["V850"]**2)
data["WINDBOT_MAGNITUDE"] = np.sqrt(data["UBOT"]**2 + data["VBOT"]**2)

# Wind direction
radians_850 = np.arctan2(data["V850"], data["U850"])
data["WIND850_SIN"] = np.sin(radians_850)
data["WIND850_COS"] = np.cos(radians_850)

radians_BOT = np.arctan2(data["VBOT"], data["UBOT"])
data["WINDBOT_SIN"] = np.sin(radians_BOT)
data["WINDBOT_COS"] = np.cos(radians_BOT)

data = data.drop(columns=["U850", "V850", "UBOT", "VBOT"])
data.head()

Unnamed: 0,TMQ,QREFHT,PS,PSL,T200,T500,PRECT,TS,TREFHT,Z1000,...,DAY,SOUTHERN_HEMISPHERE,SIN_MONTH,COS_MONTH,WIND850_MAGNITUDE,WINDBOT_MAGNITUDE,WIND850_SIN,WIND850_COS,WINDBOT_SIN,WINDBOT_COS
0,49.687851,0.019844,101345.5625,101347.6328,218.55455,268.150818,0.07272,302.358093,301.793884,119.566132,...,12,False,1.224647e-16,-1.0,6.744237,7.84896,-0.034045,-0.99942,-0.29319,-0.956054
1,32.432846,0.020695,97944.415473,100941.730367,220.562507,266.031227,0.193427,303.098166,301.233936,66.253009,...,24,False,-1.0,-1.83697e-16,14.031163,13.077928,0.524696,0.851289,0.968837,0.247701
2,21.478655,0.007956,102631.2031,102631.2031,214.236053,256.602936,0.06696,292.071594,289.385406,219.865478,...,9,True,-0.8660254,0.5,6.078922,11.856362,0.323736,-0.946148,0.442304,-0.896865
3,27.546276,0.012604,101903.6953,101903.6953,215.129959,265.565369,8.1e-05,295.798981,294.09726,163.254562,...,2,True,1.224647e-16,-1.0,3.276448,0.983882,0.96265,0.27075,-0.842779,0.538259
4,15.849665,0.008851,101348.375,101348.375,211.818268,257.652802,0.035712,293.132507,291.138214,114.518639,...,10,True,0.5,-0.8660254,13.431784,12.297631,0.476754,0.879037,0.557214,0.830369


In [28]:
data.to_csv("./data/augmented_feature_train.csv")