In [None]:
import pandas as pd
import numpy as np
import pandas as pd


# import matplotlib package
import matplotlib as mpl
import matplotlib.pyplot as plt

# import ML packages
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error,mean_squared_error, r2_score
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import cross_val_score
import warnings
warnings.filterwarnings('ignore')

## 1. Exploring the dataset

In [None]:
capita_GDP = pd.read_csv('/kaggle/input/canada-per-capita-income/Canada_per_capita_income.csv')

In [None]:
capita_GDP.info()

In [None]:
capita_GDP.head()

## 2. Data visualization

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(capita_GDP["year"], capita_GDP["income"], 'b.')
plt.title('Cancada per capita income over years')
plt.xlabel("Year")
plt.ylabel("Per Capita Income")
plt.axis([1970,2025,0,50000])
plt.grid(True)
plt.show()

## 3. Model trainning
### 3.1 Choosing prediction target

In [None]:
# Convert column to DataFrame
y = capita_GDP[['income']]

### 3.2 Choosing features

In [None]:
# Convert column to DataFrame
X= capita_GDP[['year']]

### 3.3 Data splitting

In [None]:
# Split the data randomly into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

### 3.4 Model training using Linear Regression and Polymonial Regression

In [None]:
# Perform Linear Regression
linear_reg_model = LinearRegression()
linear_reg_model.fit(X_train, y_train)
linear_reg_model.intercept_, linear_reg_model.coef_

In [None]:
# Extend features
poly_features = PolynomialFeatures(degree = 2)
X_poly_train = poly_features.fit_transform(X_train)

# Use extended features to train linear regression model
poly_reg_model = LinearRegression()
poly_reg_model.fit(X_poly_train, y_train)


### 3.5 Model Validation

In [None]:
# Evaluate Linear Regression model
y_linear_reg = linear_reg_model.predict(X_test)
linear_reg_mae = mean_absolute_error(y_test,y_linear_reg)


# Evaluate Polynomial Regression model
X_poly_test = poly_features.fit_transform(X_test)
y_poly_reg = poly_reg_model.predict(X_poly_test)
poly_reg_mae = mean_absolute_error(y_test, y_poly_reg)

print("Evaluation of Linear Regression Model")
print("Mean Absolute Error:", linear_reg_mae)

print("\nEvaluation of Linear Regression Model")
print("Mean Absolute Error:", poly_reg_mae)

### 3.6 Cross-Validation

In [None]:
# compute the 5-folds cross-validation score of Linear Regression model
linear_reg_scores = cross_val_score(linear_reg_model,X,y,
                                   scoring = 'neg_mean_squared_error',cv = 5)
linear_reg_rmse = np.sqrt(-linear_reg_scores)


# compute the 5-folds cross-validation score of Polynomial Regression model
X_ploy = poly_features.fit_transform(X)
poly_reg_scores = cross_val_score(poly_reg_model,X_ploy,y,
                                   scoring = 'neg_mean_squared_error',cv = 5)
ploy_reg_rmse = np.sqrt(-poly_reg_scores)

In [None]:
# define a function to display the various scores of the measure metrics
def display_score(scores):
    print("Scores:",scores)
    print('Mean:', scores.mean())
    print('Standard deviation:',scores.std())

In [None]:
print('Cross-validation scores of Linear Regression model:')
display_score(linear_reg_rmse)

print('\nCross-validation scores of Polynomial Regression model:')
display_score(ploy_reg_rmse)