# Tuning Hyperparameters of Lasso Regression

In [5]:
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_absolute_error
import pandas as pd
import numpy as np

In [7]:
df_x = pd.read_csv('newzeland population.csv')
df_x = df_x.iloc[::-1].reset_index(drop=True)
df_x = df_x.iloc[30:-1]
df_x.reset_index(drop=True, inplace=True)
df_x.head()

Unnamed: 0,Year,Population
0,1980,3112900
1,1981,3124900
2,1982,3156100
3,1983,3199300
4,1984,3227100


In [8]:
df_x.tail()

Unnamed: 0,Year,Population
40,2020,5090200
41,2021,5111300
42,2022,5117200
43,2023,5223100
44,2024,5269939


In [9]:
df_x.shape

(45, 2)

In [11]:
x = np.array(df_x['Population'])
print(x)

[3112900 3124900 3156100 3199300 3227100 3247100 3246300 3274400 3283400
 3299200 3329800 3495100 3531700 3572200 3620000 3673400 3732000 3781300
 3815000 3835100 3857700 3880500 3948500 4027200 4087500 4133900 4184600
 4223800 4259800 4302600 4350700 4384000 4408100 4442100 4516500 4609400
 4714100 4813600 4900600 4979200 5090200 5111300 5117200 5223100 5269939]


In [12]:
df_y = pd.read_csv('cleaned_dataset.csv')
df_y = df_y.iloc[:,[0,-1]]
df_y.head()

Unnamed: 0,Year,Total
0,1980,331751
1,1981,294409
2,1982,252119
3,1983,254658
4,1984,274022


In [13]:
df_y.tail()

Unnamed: 0,Year,Total
40,2020,212064
41,2021,168188
42,2022,192313
43,2023,216533
44,2024,226317


In [15]:
df_y.shape

(45, 2)

In [16]:
y = np.array(df_y['Total'])
print(y)

[331751 294409 252119 254658 274022 272657 298086 292500 308817 270585
 241026 255878 258491 261371 271906 276802 269061 266665 272436 262059
 266422 255739 265322 282515 287754 297185 314727 333004 348638 346707
 308540 284899 272250 229410 218170 220185 225931 221757 212872 204566
 212064 168188 192313 216533 226317]


# Train_test_split

In [19]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    x, 
    y,
    test_size=0.2,
)

print(X_train.shape, X_test.shape)
print(y_train.shape, y_test.shape)

(36,) (9,)
(36,) (9,)


In [20]:
# 1. Reshape to 2D arrays
X_train_2d = X_train.reshape(-1, 1)
X_test_2d = X_test.reshape(-1, 1)

# Train the model

In [24]:
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Lasso
from sklearn.model_selection import cross_val_score


# 2. Scaling (important for Lasso)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_2d)
X_test_scaled = scaler.transform(X_test_2d)

# 3. Train default Lasso model
lasso = Lasso()  # default parameters
lasso.fit(X_train_scaled, y_train)

lasso.score(X_test_scaled, y_test)


0.1353491506088027

In [25]:
cross_val_score(lasso, X_test_scaled, y_test, cv=5).mean()



np.float64(nan)

# Hyper parameter tuning(Grid Search CV)

In [29]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'alpha': [0.0001, 0.001, 0.01, 0.1, 1, 10, 100],  # regularization strength
    'max_iter': [1000, 5000, 10000],                  # iterations for convergence
    'selection': ['cyclic', 'random']                # coefficient update strategy
}

grid_search = GridSearchCV(
    estimator=lasso,
    param_grid=param_grid,
)
grid_search.fit(X_train_scaled, y_train)

In [30]:
grid_search.best_params_

{'alpha': 100, 'max_iter': 1000, 'selection': 'cyclic'}

In [32]:
grid_search.score(X_test_scaled, y_test)


0.13627848154674393

In [33]:
from sklearn.linear_model import Lasso
import pickle

# Example dataset (replace X_train, y_train with your data)
# X_train -> feature matrix
# y_train -> target vector

params = {'alpha': 100, 'max_iter': 1000, 'selection': 'cyclic'}

# Train Lasso Regression Model
model = Lasso(**params)
model.fit(X_train_scaled, y_train)

# Save model as a .pkl file
with open("lasso_model.pkl", "wb") as f:
    pickle.dump(model, f)

print("Model trained and saved as lasso_model.pkl")


Model trained and saved as lasso_model.pkl
