In [None]:
import jupyter
import numpy as np
import pandas as pd
import matplotlib 
import sklearn
from sklearn.linear_model import LinearRegression
from sklearn.datasets import make_regression
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes
from sklearn.metrics import mean_squared_error




## Exercise 1: Scikit-learn estimator

In [None]:


# Create data
X = [[1], [2.1], [3]]
y = [[1], [2], [3]]

# Fit the model
model = LinearRegression()
model.fit(X, y)

# Make predictions
to_predict = [[4]]
predicted = model.predict(to_predict)

predicted

In [None]:
# Print coefficients, intercept and score
print("Coefficients:", model.coef_) # This is the slope of the line, showing how much y increases for a one-unit increase in X.
print("Intercept:", model.intercept_) # This is the starting value of y when X is 0, acting as a small offset for the prediction.
print("Score:", model.score(X, y))

## Exercise 2: Linear regression in 1D

### 1.

In [None]:

X, y, coef = make_regression(n_samples=100,
                         n_features=1,
                         n_informative=1,
                         noise=10,
                         coef=True,
                         random_state=0,
                         bias=100.0)

plt.figure(figsize=(8, 6))
plt.scatter(X, y, color='blue', label='Data points')
plt.title('X (1 Dimensions) and y')
plt.xlabel('X')
plt.ylabel('y')
plt.legend()
plt.show()

### 2.

In [None]:

# Fit a Linear Regression model to the generated data
model = LinearRegression()
model.fit(X, y)

# Get the coefficient and intercept from the fitted model
fitted_coef = model.coef_[0]
fitted_intercept = model.intercept_

# Constructing the equation of the fitted line
equation = f"y = {fitted_coef} * x + {fitted_intercept}"

print(equation)

### 3

In [None]:
# Plotting the generated data again with the fitted line
plt.figure(figsize=(8, 6))
plt.scatter(X, y, color='blue', label='Data points')  # Plot the data points
plt.plot(X, model.predict(X), color='red', label='Fitted Line')  # Plot the regression line
plt.title('Generated Data Set with Fitted Line')
plt.xlabel('X')
plt.ylabel('y')
plt.legend()
plt.show()

### 4

In [None]:
y_pred = model.predict(X)
y_pred[:10]

### 5

In [None]:
# Let's define the compute_mse function first
def compute_mse(y_true, y_pred):
    # Compute the Mean Squared Error between true and predicted values
    mse = ((y_true - y_pred) ** 2).mean()
    return mse


# Compute the MSE on the dataset
mse = compute_mse(y, y_pred)
mse


### 6

In [None]:
# Generating data with noise parameter set to 50
X, y, coef = make_regression(n_samples=100,
                             n_features=1,
                             n_informative=1,
                             noise=50,
                             coef=True,
                             random_state=0,
                             bias=100.0)

# Fitting the model and making predictions
model = LinearRegression()
model.fit(X, y)
y_pred = model.predict(X)

# Computing MSE using the custom function
mse_noisy = compute_mse(y, y_pred)

# Get the coefficient and intercept from the fitted model
fitted_coef_noisy = model.coef_[0]
fitted_intercept_noisy = model.intercept_

# Equation of the fitted line with noisy data
equation_noisy = f"y = {fitted_coef_noisy} * x + {fitted_intercept_noisy}"
print("Equation of the fitted line with noisy data:", equation_noisy)
print("MSE with noisy data:", mse_noisy)



## Exercise 3: Train test split

In [None]:

# Create the data
X = np.arange(1, 21).reshape(10, -1)  # Reshape data into a 2D array
y = np.arange(1, 11)

# Define test size (20% in this case)
test_size = 0.2

# Split the data (shuffle=False for no shuffling)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, shuffle=False)

# Print the data splits
print(f"X_train:\n {X_train}\n")
print(f"y_train:\n {y_train}\n")
print(f"X_test:\n {X_test}\n")
print(f"y_test:\n {y_test}\n")


## Exercise 4: Forecast diabetes progression

In [None]:

diabetes = load_diabetes(as_frame=True)
X, y = diabetes.data, diabetes.target

In [None]:
print(diabetes.DESCR)  # Get the dataset description

In [None]:
X.isnull().sum()

In [None]:
y

#### 1

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=43)

print(y_train.values[:10])
print(y_test.values[:10])

#### 2

In [None]:
model = LinearRegression()
model.fit(X_train, y_train)

# Get the coefficients and the intercept
coefficients = model.coef_
intercept = model.intercept_

coefficients_with_names = list(zip(diabetes.feature_names, coefficients))
coefficients_with_names

#### 3

In [None]:
y_pred = model.predict(X_test)
y_pred[:10]

#### 4

In [None]:

y_train_pred =model.predict(X_train)

mse_train = mean_squared_error(y_train, y_train_pred)

# Compute the Mean Squared Error (MSE) on the test set
mse_test = mean_squared_error(y_test, y_pred)

mse_train, mse_test