# LAB1 : Implementation manuelle de la regression lineaire multiple

### Dans cette lab l'objectif est de coder nous meme notre modèle de la regression lineaire Multiple sans passer par une bibliothèque tierce . 

In [17]:
import numpy as np
import pandas as pd
import copy

# Fonction coût pour la régression linéaire (Mean Squared Error)
def compute_cost_linear_regression(x, y, w, b):
    m = x.shape[0]  # Nombre d'exemples d'entraînement
    cost = 0.0
    for i in range(m):
        f_wb_i = np.dot(x[i], w) + b  # Prédiction pour l'exemple i
        err = f_wb_i - y[i]  # Erreur
        cost += err ** 2
    cost /= (2 * m)
    return cost 

# Descente de gradient pour calculer les gradients de w et b
def compute_gradient_descent(x, y, w, b):
    m = x.shape[0]
    n = len(w)
    dj_dw = np.zeros(n)
    dj_db = 0.0

    for i in range(m):
        f_wb_i = np.dot(x[i], w) + b
        err = f_wb_i - y[i]
        
        for j in range(n):
            dj_dw[j] += err * x[i, j]  # Gradient par rapport à w_j
        dj_db += err  # Gradient par rapport à b

    dj_dw /= m
    dj_db /= m
    return dj_dw, dj_db

# Descente de gradient pour ajuster w et b
def linear_gradient_descent(x, y, w_in, b_in, alpha, num_iters):
    w = copy.deepcopy(w_in)  # Copie des poids
    b = b_in
    cost_history = []

    for i in range(num_iters):
        # Calcul du coût actuel
        cost = compute_cost_linear_regression(x, y, w, b)
        cost_history.append(cost)

        # Calcul des gradients
        dj_dw, dj_db = compute_gradient_descent(x, y, w, b)

        # Mise à jour des paramètres
        w = w - alpha * dj_dw
        b = b - alpha * dj_db

        if i % 1000 == 0:
            print(f"Iteration {i}: Cost {cost}")

    return w, b, cost_history

# Fonction de prédiction
def predict(x, w, b):
    return np.dot(x, w) + b

# Fonction pour normaliser manuellement les features
def manual_normalization(x):
    mean = np.mean(x, axis=0)  # Moyenne des colonnes
    std = np.std(x, axis=0)  # Écart-type des colonnes
    return (x - mean) / std, mean, std  # Retourne les données normalisées, la moyenne et l'écart-type

# Lire le fichier CSV
data = pd.read_csv('house-prices.csv')

# Extraire les features et la cible
x_train = data[['SqFt', 'Bedrooms']].values 
y_train = data.get('Price').values 

# Normaliser manuellement les données d'entrée (features)
x_train_scaled, mean, std = manual_normalization(x_train)

# Initialisation des paramètres
w_init = np.array([0.5, 1])  # Ajustez en fonction du nombre de variables explicatives
b_init = 0.5
alpha = 0.001  # Taux d'apprentissage
iterations = 10000

# Entraînement du modèle
w_final, b_final, cost_hist = linear_gradient_descent(x_train_scaled, y_train, w_init, b_init, alpha, iterations)

# Prédiction pour un nouvel exemple (ex. 2030 SqFt, 4 chambres)
x_test = np.array([[2030, 4]])

# Normaliser manuellement l'exemple de test
x_test_scaled = (x_test - mean) / std
y_pred = predict(x_test_scaled, w_final, b_final)

print(f"Prediction for {x_test[0]}: {y_pred}")


Iteration 0: Cost 8863704678.928347
Iteration 1000: Cost 1374661096.4474719
Iteration 2000: Cost 373285054.99258614
Iteration 3000: Cost 238459804.5686218
Iteration 4000: Cost 220256290.72399595
Iteration 5000: Cost 217795055.70822418
Iteration 6000: Cost 217461793.4826385
Iteration 7000: Cost 217416533.58450592
Iteration 8000: Cost 217410341.00364435
Iteration 9000: Cost 217409477.55370894
Prediction for [2030    4]: [144058.29250012]


### Utilisation de scikit-learn pour comparer les resultats 

In [20]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression

# Lire le fichier CSV
data = pd.read_csv('house-prices.csv')

# Extraire les features et la cible
x_train = data[['SqFt', 'Bedrooms']].values
y_train = data['Price'].values

# Normalisation des données avec scikit-learn
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)

# Entraînement du modèle de régression linéaire avec scikit-learn
model = LinearRegression()
model.fit(x_train_scaled, y_train)

# Prédiction pour un nouvel exemple (ex. 2030 SqFt, 4 chambres)
x_test = np.array([[2030, 4]])

# Normalisation de l'exemple de test
x_test_scaled = scaler.transform(x_test)
y_pred = model.predict(x_test_scaled)

print(f"Prediction for {x_test[0]}: {y_pred}")


Prediction for [2030    4]: [144059.32018322]


### Regression Lineaire Simple 

#### implementation manuelle 

In [23]:
# using one feature instead many feature linear regression manually coded : 

import numpy as np
import pandas as pd
import copy

# Cost function for linear regression (Mean Squared Error)
def compute_cost_linear_regression(x, y, w, b):
    m = x.shape[0]  # Number of training examples
    cost = 0.0
    for i in range(m):
        f_wb_i = np.dot(x[i], w) + b  # Prediction for example i
        err = f_wb_i - y[i]  # Error
        cost += err ** 2
    cost /= (2 * m)
    return cost 

# Gradient descent to calculate gradients of w and b
def compute_gradient_descent(x, y, w, b):
    m = x.shape[0]
    dj_dw = 0.0
    dj_db = 0.0

    for i in range(m):
        f_wb_i = np.dot(x[i], w) + b
        err = f_wb_i - y[i]
        dj_dw += err * x[i]  # Gradient with respect to w
        dj_db += err  # Gradient with respect to b

    dj_dw /= m
    dj_db /= m
    return dj_dw, dj_db

# Gradient descent to adjust w and b
def linear_gradient_descent(x, y, w_in, b_in, alpha, num_iters):
    w = copy.deepcopy(w_in)  # Copy weights
    b = b_in
    cost_history = []

    for i in range(num_iters):
        # Calculate the current cost
        cost = compute_cost_linear_regression(x, y, w, b)
        cost_history.append(cost)

        # Calculate the gradients
        dj_dw, dj_db = compute_gradient_descent(x, y, w, b)

        # Update parameters
        w = w - alpha * dj_dw
        b = b - alpha * dj_db

        if i % 1000 == 0:
            print(f"Iteration {i}: Cost {cost}")

    return w, b, cost_history

# Prediction function
def predict(x, w, b):
    return np.dot(x, w) + b

# Read the CSV file
data = pd.read_csv('house-prices.csv')

# Extract the single feature and the target
x_train = data[['SqFt']].values  # Use only one feature
y_train = data['Price'].values

# Normalization manually
mean = np.mean(x_train)
std = np.std(x_train)
x_train_scaled = (x_train - mean) / std

# Initialize parameters
w_init = np.array([0.5])  # Adjust for one feature
b_init = 0.5
alpha = 0.01  # Learning rate
iterations = 10000

# Train the model
w_final, b_final, cost_hist = linear_gradient_descent(x_train_scaled, y_train, w_init, b_init, alpha, iterations)

# Prediction for a new example (e.g., 2030 SqFt)
x_test = np.array([[2030]])

# Normalize the test example
x_test_scaled = (x_test - mean) / std
y_pred = predict(x_test_scaled, w_final, b_final)

print(f"Prediction for {x_test[0]}: {y_pred}")


Iteration 0: Cost 8863718753.865515
Iteration 1000: Cost 248628341.22142452
Iteration 2000: Cost 248628325.164993
Iteration 3000: Cost 248628325.16499305
Iteration 4000: Cost 248628325.1649929
Iteration 5000: Cost 248628325.1649929
Iteration 6000: Cost 248628325.1649929
Iteration 7000: Cost 248628325.1649929
Iteration 8000: Cost 248628325.1649929
Iteration 9000: Cost 248628325.1649929
Prediction for [2030]: [132468.29612393]


### Regression lineaire implemnentation avec scikit-learn pour comparer les resultats 


In [22]:
# one feature using scikit-learn library 


import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression

# Read the CSV file
data = pd.read_csv('house-prices.csv')

# Extract the single feature and the target
x_train = data[['SqFt']].values  # Use only one feature
y_train = data['Price'].values

# Normalization with scikit-learn
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)

# Train the linear regression model with scikit-learn
model = LinearRegression()
model.fit(x_train_scaled, y_train)

# Prediction for a new example (e.g., 2030 SqFt)
x_test = np.array([[2030]])

# Normalize the test example
x_test_scaled = scaler.transform(x_test)
y_pred = model.predict(x_test_scaled)

print(f"Prediction for {x_test[0]}: {y_pred}")


Prediction for [2030]: [132468.29612393]
