# Machine Learning Project 2 
Predicting the Power of a Wind Turbine


# Task 0 - Prepare the Data

In [4]:
import pandas as pd
data = pd.read_csv ("train.csv")
X, y = data.values[:, :-1], data.values[:,-1]
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.33, random_state=42)

# Task 1 - A naive attempt

In [5]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

linear_model = LinearRegression()
linear_model.fit(X_train, y_train)

train_score = linear_model.score(X_train, y_train)
test_score = linear_model.score(X_test, y_test)

print("Performance d'entraînement :", train_score)
print("Performance de test :", test_score)

Performance d'entraînement : 2.2160832228057536e-05
Performance de test : -0.000407971009189545


# Task 2 - Train a few MLP Regressors

In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error

data = pd.read_csv("train.csv")
X, y = data.values[:, 1:-1], data.values[:, -1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

mlp_params1 = {
    'hidden_layer_sizes': (128, 256, 128),
    'activation': 'identity',
    'solver': 'adam',
    'batch_size': 256,
    'learning_rate': 'adaptive',
    'learning_rate_init': 0.001,
    'shuffle': True,
    'verbose': True,
    'max_iter': 300
}

mlp_params2 = mlp_params1.copy()
mlp_params2['activation'] = 'relu'

mlp_model1 = MLPRegressor(**mlp_params1)
mlp_model2 = MLPRegressor(**mlp_params2)

mlp_model1.fit(X_train, y_train)
mlp_model2.fit(X_train, y_train)

train_score_mlp1 = mlp_model1.score(X_train, y_train)
test_score_mlp1 = mlp_model1.score(X_test, y_test)

train_score_mlp2 = mlp_model2.score(X_train, y_train)
test_score_mlp2 = mlp_model2.score(X_test, y_test)

print("\nTask 2 - MLP Regressors")
print("MLP Model 1 (identity activation)")
print("Training Score:", train_score_mlp1)
print("Testing Score:", test_score_mlp1)

print("\nMLP Model 2 (relu activation)")
print("Training Score:", train_score_mlp2)
print("Testing Score:", test_score_mlp2)

Iteration 1, loss = 2432696.75655920
Iteration 2, loss = 1123945.55193586
Iteration 3, loss = 1133623.34120132
Iteration 4, loss = 1054237.34858963
Iteration 5, loss = 1007189.02152708
Iteration 6, loss = 1005421.82329287
Iteration 7, loss = 967646.87293341
Iteration 8, loss = 955669.53286686
Iteration 9, loss = 942984.40602447
Iteration 10, loss = 927107.63427123
Iteration 11, loss = 915031.97637511
Iteration 12, loss = 909582.29394865
Iteration 13, loss = 895684.59308368
Iteration 14, loss = 891913.24233375
Iteration 15, loss = 884865.77832479
Iteration 16, loss = 883671.60580794
Iteration 17, loss = 878308.45373443
Iteration 18, loss = 873158.20540252
Iteration 19, loss = 869034.52697065
Iteration 20, loss = 862385.66446221
Iteration 21, loss = 862963.66175307
Iteration 22, loss = 861044.20007432
Iteration 23, loss = 859083.33394420
Iteration 24, loss = 855970.86019236
Iteration 25, loss = 855129.81861798
Iteration 26, loss = 854107.98870262
Iteration 27, loss = 854288.47217629
Iter




Task 2 - MLP Regressors
MLP Model 1 (identity activation)
Training Score: -0.4427907403730824
Testing Score: -0.4446306804206772

MLP Model 2 (relu activation)
Training Score: 0.963709477106169
Testing Score: 0.9179085127373722


# Task 3 - Find the best MLP Regressor

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error

data = pd.read_csv("train.csv")
X, y = data.values[:, 1:-1], data.values[:, -1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

mlp_params1 = {
    'hidden_layer_sizes': (140, 256, 128),
    'activation': 'identity',
    'solver': 'adam',
    'batch_size': 256,
    'learning_rate': 'adaptive',
    'learning_rate_init': 0.001,
    'shuffle': True,
    'verbose': True,
    'max_iter': 300
}

mlp_params2 = mlp_params1.copy()
mlp_params2['activation'] = 'relu'

mlp_model1 = MLPRegressor(**mlp_params1)
mlp_model2 = MLPRegressor(**mlp_params2)

mlp_model1.fit(X_train, y_train)
mlp_model2.fit(X_train, y_train)

train_score_mlp1 = mlp_model1.score(X_train, y_train)
test_score_mlp1 = mlp_model1.score(X_test, y_test)

train_score_mlp2 = mlp_model2.score(X_train, y_train)
test_score_mlp2 = mlp_model2.score(X_test, y_test)

print("\nTask 2 - MLP Regressors")
print("MLP Model 1 (identity activation)")
print("Training Score:", train_score_mlp1)
print("Testing Score:", test_score_mlp1)

print("\nMLP Model 2 (relu activation)")
print("Training Score:", train_score_mlp2)
print("Testing Score:", test_score_mlp2)

# Task 4 - Join the leaderboard!


In [None]:
import utils
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error

data = pd.read_csv("trainconcour.csv")
X, y = data.values[:, 1:-1], data.values[:, -1]

mlp_params1 = {
    'hidden_layer_sizes': (140, 256, 128),
    'activation': 'identity',
    'solver': 'adam',
    'batch_size': 256,
    'learning_rate': 'adaptive',
    'learning_rate_init': 0.001,
    'shuffle': True,
    'verbose': True,
    'max_iter': 300
}

mlp_params2 = mlp_params1.copy()
mlp_params2['activation'] = 'relu'

mlp_model2 = MLPRegressor(**mlp_params2)

mlp_model2.fit(X, y)

dataconcours=utils.generate_submission(mlp_model2,"testconcour.csv")
